string-direction 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4012023bbe9a099ec7e4bc0c0c366e79df36b194
4
- data.tar.gz: 8ad5696dafb160dff2310054f77e1504ee7a4cfe
3
+ metadata.gz: c2d34e48984500c5942983fea9d97942cb09ab49
4
+ data.tar.gz: 751f2b9960885a5ec1e3905c171cf3716f5e0813
5
5
  SHA512:
6
- metadata.gz: efc912ae089c7ea5b7002797f935543619c18bc7c8ea12db133ff9cff49e0c8f40af69213710c1df03db4e4ca2b6cd8f15eb0741449070f1dd93162ccdc2c976
7
- data.tar.gz: 228e529c260071ea4d8b21e89556a91b7780fab5d803a673807d636cf29e22fb1412e80a42a9b5c7e916a8f81001f70d0c0bd6d261ba1415cc1c837713fffc54
6
+ metadata.gz: f6e7699515186abbd69f7ee9f99d8c3456b54edf8e7667fa26afda7f7e87d8fb7a5513fc9be95325f28d449fc1c99c06bc3bc0853d2cce368669a66e3b7c6f99
7
+ data.tar.gz: eebfacf8dc846064bd23e18dcd9ae49306bff5d8e3ca340697b05ff3288d691598109313e883b4062c59ed8dee7bce09882e3a4b155ade834e1a8c1b9982275d
data/README.md CHANGED
@@ -39,7 +39,7 @@ detector = StringDirection::Detector.new(:foo, :bar)
39
39
 
40
40
  In the above example, classes `StringDirection::FooStrategy` and `StringDirection::BarStrategy` have to be in the load path.
41
41
 
42
- Two strategies are natively integrated: `marks` and `characters`. They are used, in that order, as default strategies if no arguments are given to the detector.
42
+ Three strategies are natively integrated: `marks`, `characters` & `dominant`. `marks` && `characters` are used as default strategies if no arguments are given to the detector.
43
43
 
44
44
  ### marks
45
45
 
@@ -102,6 +102,21 @@ Keep in mind than only [scripts recognized by Ruby regular expressions](http://w
102
102
 
103
103
  `characters` strategy can not only analyze a string but everything responding to `to_s`.
104
104
 
105
+ ### dominant
106
+
107
+ With `dominant` strategy, a string can be left-to-right or right-to-left, but never bidi. It returns one or the other in function of which one has more characters.
108
+
109
+ ```ruby
110
+ detector = StringDirection::Detector.new(:dominant)
111
+
112
+ detector.direction('e العربية') #=> 'rtl'
113
+ detector.direction('english ة') #=> 'ltr'
114
+ ```
115
+
116
+ As with `characters` strategy, you can change which scripts are considered right-to-left.
117
+
118
+ `dominant` strategy can not only analyze a string but everything responding to `to_s`.
119
+
105
120
  ### Custom Strategies
106
121
 
107
122
  You can define your custom strategies. To do so, you just have to define a class inside `StringDirection` module with a name ending with `Strategy`. This class has to respond to an instance method `run` which takes the string as argument. You can inherit from `StringDirection::Strategy` to have convenient methods `ltr`, `rtl` and `bidi` which return expected result. If the strategy doesn't know the direction, it must return `nil`.
@@ -4,6 +4,7 @@ require 'string-direction/detector'
4
4
  require 'string-direction/strategy'
5
5
  require 'string-direction/strategies/marks_strategy'
6
6
  require 'string-direction/strategies/characters_strategy'
7
+ require 'string-direction/strategies/dominant_strategy'
7
8
  require 'string-direction/string_methods'
8
9
 
9
10
  # Constants & configuration common in the whole library
@@ -2,7 +2,7 @@ module StringDirection
2
2
  # Strategy to detect direction from the scripts to which string characters belong
3
3
  class CharactersStrategy < Strategy
4
4
  # Ignored characters: unicode marks, punctuations, symbols, separator and other general categories
5
- CHAR_IGNORE_REGEX = /[\p{M}\p{P}\p{S}\p{Z}\p{C}]/.freeze
5
+ IGNORED_CHARS = '\p{M}\p{P}\p{S}\p{Z}\p{C}'.freeze
6
6
 
7
7
  # Inspect to wich scripts characters belongs and infer from them the string direction. right-to-left scripts are those in {Configuration#rtl_scripts}
8
8
  #
@@ -21,16 +21,24 @@ module StringDirection
21
21
 
22
22
  private
23
23
 
24
+ def rtl_regex
25
+ @rtl_regex ||= /[#{rtl_script_character_classes}]/
26
+ end
27
+
28
+ def ltr_regex
29
+ @ltr_regex ||= /[^#{rtl_script_character_classes}#{IGNORED_CHARS}]/
30
+ end
31
+
24
32
  def rtl_characters?(string)
25
- string.match(/[#{join_rtl_for_regex}]/)
33
+ string.match(rtl_regex)
26
34
  end
27
35
 
28
36
  def ltr_characters?(string)
29
- string.gsub(CHAR_IGNORE_REGEX, '').match(/[^#{join_rtl_for_regex}]/)
37
+ string.match(ltr_regex)
30
38
  end
31
39
 
32
- def join_rtl_for_regex
33
- rtl_scripts.map { |script| '\p{' + script + '}' }.join
40
+ def rtl_script_character_classes
41
+ @rtl_script_character_classes ||= rtl_scripts.map { |script| "\\p{#{script}}" }.join
34
42
  end
35
43
 
36
44
  def rtl_scripts
@@ -0,0 +1,28 @@
1
+ module StringDirection
2
+ # Strategy to decide direction between ltr or rtl in function of which is the main type
3
+ class DominantStrategy < CharactersStrategy
4
+ # Get the number of ltr and rtl characters in the supplied string and infer
5
+ # direction from the most common type. For this strategy the direction can
6
+ # be ltr or rtl, but never bidi. In case of draw it returns nil.
7
+ #
8
+ # params [String] The string to inspect
9
+ # @return [String, nil]
10
+ def run(string)
11
+ string = string.to_s
12
+ ltr_count = chars_count(string, ltr_regex)
13
+ rtl_count = chars_count(string, rtl_regex)
14
+ diff = ltr_count - rtl_count
15
+ return ltr if diff > 0
16
+ return rtl if diff < 0
17
+ nil
18
+ end
19
+
20
+ private
21
+
22
+ def chars_count(string, regex)
23
+ count = 0
24
+ string.scan(regex) { count += 1 }
25
+ count
26
+ end
27
+ end
28
+ end
@@ -1,4 +1,4 @@
1
1
  module StringDirection
2
2
  # Current library version
3
- VERSION = '1.1.0'
3
+ VERSION = '1.2.0'
4
4
  end
@@ -0,0 +1,115 @@
1
+ require 'spec_helper'
2
+
3
+ describe StringDirection::DominantStrategy do
4
+ describe '#run' do
5
+ let(:english) { 'English' }
6
+ let(:arabic) { 'العربية' }
7
+
8
+ subject { described_class.new.run(string) }
9
+
10
+ context 'when both left-to-right and right-to-left characters are present in equal numbers' do
11
+ let(:string) { arabic + english }
12
+
13
+ it "returns nil" do
14
+ expect(subject).to eq nil
15
+ end
16
+ end
17
+
18
+ context 'when both left-to-right and right-to-left characters are present, with more ltr' do
19
+ let(:string) { english + arabic + english }
20
+
21
+ it "returns 'ltr'" do
22
+ expect(subject).to eq 'ltr'
23
+ end
24
+ end
25
+
26
+ context 'when right-to-left character are present but none of left-to-right' do
27
+ let(:string) { arabic }
28
+
29
+ it "returns 'rtl'" do
30
+ expect(subject).to eq 'rtl'
31
+ end
32
+ end
33
+
34
+ context 'when left-to-right character are present but none of right-to-left' do
35
+ let(:string) { english }
36
+
37
+ it "returns 'ltr'" do
38
+ expect(subject).to eq 'ltr'
39
+ end
40
+ end
41
+
42
+ context 'when neither left-to-right nor right-to-left characters are present' do
43
+ let(:string) { ' ' }
44
+
45
+ it 'returns nil' do
46
+ expect(subject).to be_nil
47
+ end
48
+ end
49
+
50
+ context 'when default right-to-left scripts are changed' do
51
+ let(:new_rtl_script) { 'Latin' }
52
+ let(:old_rtl_script) { 'Arabic' }
53
+
54
+ context 'when there are characters from an added right-to-left script' do
55
+ let(:string) { english }
56
+
57
+ it 'treats them as right-to-left chracters' do
58
+ StringDirection.configure do |config|
59
+ config.rtl_scripts << new_rtl_script
60
+ end
61
+
62
+ expect(subject).to eq 'rtl'
63
+ end
64
+ end
65
+
66
+ context 'when there are characters from a deleted right-to-left script ' do
67
+ let(:string) { arabic }
68
+
69
+ it 'treats them as left-to-right characters' do
70
+ StringDirection.configure do |config|
71
+ config.rtl_scripts.delete(old_rtl_script)
72
+ end
73
+
74
+ expect(subject).to eq 'ltr'
75
+ end
76
+ end
77
+
78
+ after :each do
79
+ StringDirection.reset_configuration
80
+ end
81
+ end
82
+
83
+ context 'when special characters are present' do
84
+ let(:string) do
85
+ mark = "\u0903"
86
+ punctuation = '_'
87
+ symbol = '€'
88
+ separator = ' '
89
+ other = "\u0005"
90
+
91
+ arabic + mark + punctuation + symbol + separator + other
92
+ end
93
+
94
+ it 'ignores them' do
95
+ expect(subject).to eq 'rtl'
96
+ end
97
+ end
98
+
99
+ context 'when an object responding to #to_s is given' do
100
+ let(:string) do
101
+ class StringDirection::TestObject
102
+ def to_s
103
+ 'English'
104
+ end
105
+ end
106
+
107
+ StringDirection::TestObject.new
108
+ end
109
+
110
+ it 'takes as string the result of #to_s method' do
111
+ expect(subject).to eq('ltr')
112
+ end
113
+ end
114
+ end
115
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string-direction
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marc Busqué
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2016-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: yard
@@ -112,6 +112,7 @@ files:
112
112
  - lib/string-direction/configuration.rb
113
113
  - lib/string-direction/detector.rb
114
114
  - lib/string-direction/strategies/characters_strategy.rb
115
+ - lib/string-direction/strategies/dominant_strategy.rb
115
116
  - lib/string-direction/strategies/marks_strategy.rb
116
117
  - lib/string-direction/strategy.rb
117
118
  - lib/string-direction/string_methods.rb
@@ -120,6 +121,7 @@ files:
120
121
  - spec/string-direction/configuration_spec.rb
121
122
  - spec/string-direction/detector_spec.rb
122
123
  - spec/string-direction/strategies/characters_strategy_spec.rb
124
+ - spec/string-direction/strategies/dominant_strategy_spec.rb
123
125
  - spec/string-direction/strategies/marks_strategy_spec.rb
124
126
  - spec/string-direction/strategy_spec.rb
125
127
  - spec/string-direction/string_methods_spec.rb
@@ -145,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
147
  version: '0'
146
148
  requirements: []
147
149
  rubyforge_project:
148
- rubygems_version: 2.4.5
150
+ rubygems_version: 2.6.3
149
151
  signing_key:
150
152
  specification_version: 4
151
153
  summary: Automatic detection of text direction (ltr, rtl or bidi) for strings