string-direction 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4012023bbe9a099ec7e4bc0c0c366e79df36b194
4
- data.tar.gz: 8ad5696dafb160dff2310054f77e1504ee7a4cfe
3
+ metadata.gz: c2d34e48984500c5942983fea9d97942cb09ab49
4
+ data.tar.gz: 751f2b9960885a5ec1e3905c171cf3716f5e0813
5
5
  SHA512:
6
- metadata.gz: efc912ae089c7ea5b7002797f935543619c18bc7c8ea12db133ff9cff49e0c8f40af69213710c1df03db4e4ca2b6cd8f15eb0741449070f1dd93162ccdc2c976
7
- data.tar.gz: 228e529c260071ea4d8b21e89556a91b7780fab5d803a673807d636cf29e22fb1412e80a42a9b5c7e916a8f81001f70d0c0bd6d261ba1415cc1c837713fffc54
6
+ metadata.gz: f6e7699515186abbd69f7ee9f99d8c3456b54edf8e7667fa26afda7f7e87d8fb7a5513fc9be95325f28d449fc1c99c06bc3bc0853d2cce368669a66e3b7c6f99
7
+ data.tar.gz: eebfacf8dc846064bd23e18dcd9ae49306bff5d8e3ca340697b05ff3288d691598109313e883b4062c59ed8dee7bce09882e3a4b155ade834e1a8c1b9982275d
data/README.md CHANGED
@@ -39,7 +39,7 @@ detector = StringDirection::Detector.new(:foo, :bar)
39
39
 
40
40
  In the above example, classes `StringDirection::FooStrategy` and `StringDirection::BarStrategy` have to be in the load path.
41
41
 
42
- Two strategies are natively integrated: `marks` and `characters`. They are used, in that order, as default strategies if no arguments are given to the detector.
42
+ Three strategies are natively integrated: `marks`, `characters` & `dominant`. `marks` && `characters` are used as default strategies if no arguments are given to the detector.
43
43
 
44
44
  ### marks
45
45
 
@@ -102,6 +102,21 @@ Keep in mind than only [scripts recognized by Ruby regular expressions](http://w
102
102
 
103
103
  `characters` strategy can not only analyze a string but everything responding to `to_s`.
104
104
 
105
+ ### dominant
106
+
107
+ With `dominant` strategy, a string can be left-to-right or right-to-left, but never bidi. It returns one or the other in function of which one has more characters.
108
+
109
+ ```ruby
110
+ detector = StringDirection::Detector.new(:dominant)
111
+
112
+ detector.direction('e العربية') #=> 'rtl'
113
+ detector.direction('english ة') #=> 'ltr'
114
+ ```
115
+
116
+ As with `characters` strategy, you can change which scripts are considered right-to-left.
117
+
118
+ `dominant` strategy can not only analyze a string but everything responding to `to_s`.
119
+
105
120
  ### Custom Strategies
106
121
 
107
122
  You can define your custom strategies. To do so, you just have to define a class inside `StringDirection` module with a name ending with `Strategy`. This class has to respond to an instance method `run` which takes the string as argument. You can inherit from `StringDirection::Strategy` to have convenient methods `ltr`, `rtl` and `bidi` which return expected result. If the strategy doesn't know the direction, it must return `nil`.
@@ -4,6 +4,7 @@ require 'string-direction/detector'
4
4
  require 'string-direction/strategy'
5
5
  require 'string-direction/strategies/marks_strategy'
6
6
  require 'string-direction/strategies/characters_strategy'
7
+ require 'string-direction/strategies/dominant_strategy'
7
8
  require 'string-direction/string_methods'
8
9
 
9
10
  # Constants & configuration common in the whole library
@@ -2,7 +2,7 @@ module StringDirection
2
2
  # Strategy to detect direction from the scripts to which string characters belong
3
3
  class CharactersStrategy < Strategy
4
4
  # Ignored characters: unicode marks, punctuations, symbols, separator and other general categories
5
- CHAR_IGNORE_REGEX = /[\p{M}\p{P}\p{S}\p{Z}\p{C}]/.freeze
5
+ IGNORED_CHARS = '\p{M}\p{P}\p{S}\p{Z}\p{C}'.freeze
6
6
 
7
7
  # Inspect to wich scripts characters belongs and infer from them the string direction. right-to-left scripts are those in {Configuration#rtl_scripts}
8
8
  #
@@ -21,16 +21,24 @@ module StringDirection
21
21
 
22
22
  private
23
23
 
24
+ def rtl_regex
25
+ @rtl_regex ||= /[#{rtl_script_character_classes}]/
26
+ end
27
+
28
+ def ltr_regex
29
+ @ltr_regex ||= /[^#{rtl_script_character_classes}#{IGNORED_CHARS}]/
30
+ end
31
+
24
32
  def rtl_characters?(string)
25
- string.match(/[#{join_rtl_for_regex}]/)
33
+ string.match(rtl_regex)
26
34
  end
27
35
 
28
36
  def ltr_characters?(string)
29
- string.gsub(CHAR_IGNORE_REGEX, '').match(/[^#{join_rtl_for_regex}]/)
37
+ string.match(ltr_regex)
30
38
  end
31
39
 
32
- def join_rtl_for_regex
33
- rtl_scripts.map { |script| '\p{' + script + '}' }.join
40
+ def rtl_script_character_classes
41
+ @rtl_script_character_classes ||= rtl_scripts.map { |script| "\\p{#{script}}" }.join
34
42
  end
35
43
 
36
44
  def rtl_scripts
@@ -0,0 +1,28 @@
1
+ module StringDirection
2
+ # Strategy to decide direction between ltr or rtl in function of which is the main type
3
+ class DominantStrategy < CharactersStrategy
4
+ # Get the number of ltr and rtl characters in the supplied string and infer
5
+ # direction from the most common type. For this strategy the direction can
6
+ # be ltr or rtl, but never bidi. In case of draw it returns nil.
7
+ #
8
+ # params [String] The string to inspect
9
+ # @return [String, nil]
10
+ def run(string)
11
+ string = string.to_s
12
+ ltr_count = chars_count(string, ltr_regex)
13
+ rtl_count = chars_count(string, rtl_regex)
14
+ diff = ltr_count - rtl_count
15
+ return ltr if diff > 0
16
+ return rtl if diff < 0
17
+ nil
18
+ end
19
+
20
+ private
21
+
22
+ def chars_count(string, regex)
23
+ count = 0
24
+ string.scan(regex) { count += 1 }
25
+ count
26
+ end
27
+ end
28
+ end
@@ -1,4 +1,4 @@
1
1
  module StringDirection
2
2
  # Current library version
3
- VERSION = '1.1.0'
3
+ VERSION = '1.2.0'
4
4
  end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringDirection::DominantStrategy do
4
+ describe '#run' do
5
+ let(:english) { 'English' }
6
+ let(:arabic) { 'العربية' }
7
+
8
+ subject { described_class.new.run(string) }
9
+
10
+ context 'when both left-to-right and right-to-left characters are present in equal numbers' do
11
+ let(:string) { arabic + english }
12
+
13
+ it "returns nil" do
14
+ expect(subject).to eq nil
15
+ end
16
+ end
17
+
18
+ context 'when both left-to-right and right-to-left characters are present, with more ltr' do
19
+ let(:string) { english + arabic + english }
20
+
21
+ it "returns 'ltr'" do
22
+ expect(subject).to eq 'ltr'
23
+ end
24
+ end
25
+
26
+ context 'when right-to-left character are present but none of left-to-right' do
27
+ let(:string) { arabic }
28
+
29
+ it "returns 'rtl'" do
30
+ expect(subject).to eq 'rtl'
31
+ end
32
+ end
33
+
34
+ context 'when left-to-right character are present but none of right-to-left' do
35
+ let(:string) { english }
36
+
37
+ it "returns 'ltr'" do
38
+ expect(subject).to eq 'ltr'
39
+ end
40
+ end
41
+
42
+ context 'when neither left-to-right nor right-to-left characters are present' do
43
+ let(:string) { ' ' }
44
+
45
+ it 'returns nil' do
46
+ expect(subject).to be_nil
47
+ end
48
+ end
49
+
50
+ context 'when default right-to-left scripts are changed' do
51
+ let(:new_rtl_script) { 'Latin' }
52
+ let(:old_rtl_script) { 'Arabic' }
53
+
54
+ context 'when there are characters from an added right-to-left script' do
55
+ let(:string) { english }
56
+
57
+ it 'treats them as right-to-left chracters' do
58
+ StringDirection.configure do |config|
59
+ config.rtl_scripts << new_rtl_script
60
+ end
61
+
62
+ expect(subject).to eq 'rtl'
63
+ end
64
+ end
65
+
66
+ context 'when there are characters from a deleted right-to-left script ' do
67
+ let(:string) { arabic }
68
+
69
+ it 'treats them as left-to-right characters' do
70
+ StringDirection.configure do |config|
71
+ config.rtl_scripts.delete(old_rtl_script)
72
+ end
73
+
74
+ expect(subject).to eq 'ltr'
75
+ end
76
+ end
77
+
78
+ after :each do
79
+ StringDirection.reset_configuration
80
+ end
81
+ end
82
+
83
+ context 'when special characters are present' do
84
+ let(:string) do
85
+ mark = "\u0903"
86
+ punctuation = '_'
87
+ symbol = '€'
88
+ separator = ' '
89
+ other = "\u0005"
90
+
91
+ arabic + mark + punctuation + symbol + separator + other
92
+ end
93
+
94
+ it 'ignores them' do
95
+ expect(subject).to eq 'rtl'
96
+ end
97
+ end
98
+
99
+ context 'when an object responding to #to_s is given' do
100
+ let(:string) do
101
+ class StringDirection::TestObject
102
+ def to_s
103
+ 'English'
104
+ end
105
+ end
106
+
107
+ StringDirection::TestObject.new
108
+ end
109
+
110
+ it 'takes as string the result of #to_s method' do
111
+ expect(subject).to eq('ltr')
112
+ end
113
+ end
114
+ end
115
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-direction
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marc Busqué
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2016-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: yard
@@ -112,6 +112,7 @@ files:
112
112
  - lib/string-direction/configuration.rb
113
113
  - lib/string-direction/detector.rb
114
114
  - lib/string-direction/strategies/characters_strategy.rb
115
+ - lib/string-direction/strategies/dominant_strategy.rb
115
116
  - lib/string-direction/strategies/marks_strategy.rb
116
117
  - lib/string-direction/strategy.rb
117
118
  - lib/string-direction/string_methods.rb
@@ -120,6 +121,7 @@ files:
120
121
  - spec/string-direction/configuration_spec.rb
121
122
  - spec/string-direction/detector_spec.rb
122
123
  - spec/string-direction/strategies/characters_strategy_spec.rb
124
+ - spec/string-direction/strategies/dominant_strategy_spec.rb
123
125
  - spec/string-direction/strategies/marks_strategy_spec.rb
124
126
  - spec/string-direction/strategy_spec.rb
125
127
  - spec/string-direction/string_methods_spec.rb
@@ -145,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
147
  version: '0'
146
148
  requirements: []
147
149
  rubyforge_project:
148
- rubygems_version: 2.4.5
150
+ rubygems_version: 2.6.3
149
151
  signing_key:
150
152
  specification_version: 4
151
153
  summary: Automatic detection of text direction (ltr, rtl or bidi) for strings