string-direction 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -1
- data/lib/string-direction.rb +1 -0
- data/lib/string-direction/strategies/characters_strategy.rb +13 -5
- data/lib/string-direction/strategies/dominant_strategy.rb +28 -0
- data/lib/string-direction/version.rb +1 -1
- data/spec/string-direction/strategies/dominant_strategy_spec.rb +115 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2d34e48984500c5942983fea9d97942cb09ab49
|
4
|
+
data.tar.gz: 751f2b9960885a5ec1e3905c171cf3716f5e0813
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6e7699515186abbd69f7ee9f99d8c3456b54edf8e7667fa26afda7f7e87d8fb7a5513fc9be95325f28d449fc1c99c06bc3bc0853d2cce368669a66e3b7c6f99
|
7
|
+
data.tar.gz: eebfacf8dc846064bd23e18dcd9ae49306bff5d8e3ca340697b05ff3288d691598109313e883b4062c59ed8dee7bce09882e3a4b155ade834e1a8c1b9982275d
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ detector = StringDirection::Detector.new(:foo, :bar)
|
|
39
39
|
|
40
40
|
In the above example, classes `StringDirection::FooStrategy` and `StringDirection::BarStrategy` have to be in the load path.
|
41
41
|
|
42
|
-
|
42
|
+
Three strategies are natively integrated: `marks`, `characters` & `dominant`. `marks` && `characters` are used as default strategies if no arguments are given to the detector.
|
43
43
|
|
44
44
|
### marks
|
45
45
|
|
@@ -102,6 +102,21 @@ Keep in mind than only [scripts recognized by Ruby regular expressions](http://w
|
|
102
102
|
|
103
103
|
`characters` strategy can not only analyze a string but everything responding to `to_s`.
|
104
104
|
|
105
|
+
### dominant
|
106
|
+
|
107
|
+
With `dominant` strategy, a string can be left-to-right or right-to-left, but never bidi. It returns one or the other in function of which one has more characters.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
detector = StringDirection::Detector.new(:dominant)
|
111
|
+
|
112
|
+
detector.direction('e العربية') #=> 'rtl'
|
113
|
+
detector.direction('english ة') #=> 'ltr'
|
114
|
+
```
|
115
|
+
|
116
|
+
As with `characters` strategy, you can change which scripts are considered right-to-left.
|
117
|
+
|
118
|
+
`dominant` strategy can not only analyze a string but everything responding to `to_s`.
|
119
|
+
|
105
120
|
### Custom Strategies
|
106
121
|
|
107
122
|
You can define your custom strategies. To do so, you just have to define a class inside `StringDirection` module with a name ending with `Strategy`. This class has to respond to an instance method `run` which takes the string as argument. You can inherit from `StringDirection::Strategy` to have convenient methods `ltr`, `rtl` and `bidi` which return expected result. If the strategy doesn't know the direction, it must return `nil`.
|
data/lib/string-direction.rb
CHANGED
@@ -4,6 +4,7 @@ require 'string-direction/detector'
|
|
4
4
|
require 'string-direction/strategy'
|
5
5
|
require 'string-direction/strategies/marks_strategy'
|
6
6
|
require 'string-direction/strategies/characters_strategy'
|
7
|
+
require 'string-direction/strategies/dominant_strategy'
|
7
8
|
require 'string-direction/string_methods'
|
8
9
|
|
9
10
|
# Constants & configuration common in the whole library
|
@@ -2,7 +2,7 @@ module StringDirection
|
|
2
2
|
# Strategy to detect direction from the scripts to which string characters belong
|
3
3
|
class CharactersStrategy < Strategy
|
4
4
|
# Ignored characters: unicode marks, punctuations, symbols, separator and other general categories
|
5
|
-
|
5
|
+
IGNORED_CHARS = '\p{M}\p{P}\p{S}\p{Z}\p{C}'.freeze
|
6
6
|
|
7
7
|
# Inspect to wich scripts characters belongs and infer from them the string direction. right-to-left scripts are those in {Configuration#rtl_scripts}
|
8
8
|
#
|
@@ -21,16 +21,24 @@ module StringDirection
|
|
21
21
|
|
22
22
|
private
|
23
23
|
|
24
|
+
def rtl_regex
|
25
|
+
@rtl_regex ||= /[#{rtl_script_character_classes}]/
|
26
|
+
end
|
27
|
+
|
28
|
+
def ltr_regex
|
29
|
+
@ltr_regex ||= /[^#{rtl_script_character_classes}#{IGNORED_CHARS}]/
|
30
|
+
end
|
31
|
+
|
24
32
|
def rtl_characters?(string)
|
25
|
-
string.match(
|
33
|
+
string.match(rtl_regex)
|
26
34
|
end
|
27
35
|
|
28
36
|
def ltr_characters?(string)
|
29
|
-
string.
|
37
|
+
string.match(ltr_regex)
|
30
38
|
end
|
31
39
|
|
32
|
-
def
|
33
|
-
rtl_scripts.map { |script|
|
40
|
+
def rtl_script_character_classes
|
41
|
+
@rtl_script_character_classes ||= rtl_scripts.map { |script| "\\p{#{script}}" }.join
|
34
42
|
end
|
35
43
|
|
36
44
|
def rtl_scripts
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module StringDirection
|
2
|
+
# Strategy to decide direction between ltr or rtl in function of which is the main type
|
3
|
+
class DominantStrategy < CharactersStrategy
|
4
|
+
# Get the number of ltr and rtl characters in the supplied string and infer
|
5
|
+
# direction from the most common type. For this strategy the direction can
|
6
|
+
# be ltr or rtl, but never bidi. In case of draw it returns nil.
|
7
|
+
#
|
8
|
+
# params [String] The string to inspect
|
9
|
+
# @return [String, nil]
|
10
|
+
def run(string)
|
11
|
+
string = string.to_s
|
12
|
+
ltr_count = chars_count(string, ltr_regex)
|
13
|
+
rtl_count = chars_count(string, rtl_regex)
|
14
|
+
diff = ltr_count - rtl_count
|
15
|
+
return ltr if diff > 0
|
16
|
+
return rtl if diff < 0
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def chars_count(string, regex)
|
23
|
+
count = 0
|
24
|
+
string.scan(regex) { count += 1 }
|
25
|
+
count
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe StringDirection::DominantStrategy do
|
4
|
+
describe '#run' do
|
5
|
+
let(:english) { 'English' }
|
6
|
+
let(:arabic) { 'العربية' }
|
7
|
+
|
8
|
+
subject { described_class.new.run(string) }
|
9
|
+
|
10
|
+
context 'when both left-to-right and right-to-left characters are present in equal numbers' do
|
11
|
+
let(:string) { arabic + english }
|
12
|
+
|
13
|
+
it "returns nil" do
|
14
|
+
expect(subject).to eq nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when both left-to-right and right-to-left characters are present, with more ltr' do
|
19
|
+
let(:string) { english + arabic + english }
|
20
|
+
|
21
|
+
it "returns 'ltr'" do
|
22
|
+
expect(subject).to eq 'ltr'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'when right-to-left character are present but none of left-to-right' do
|
27
|
+
let(:string) { arabic }
|
28
|
+
|
29
|
+
it "returns 'rtl'" do
|
30
|
+
expect(subject).to eq 'rtl'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when left-to-right character are present but none of right-to-left' do
|
35
|
+
let(:string) { english }
|
36
|
+
|
37
|
+
it "returns 'ltr'" do
|
38
|
+
expect(subject).to eq 'ltr'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'when neither left-to-right nor right-to-left characters are present' do
|
43
|
+
let(:string) { ' ' }
|
44
|
+
|
45
|
+
it 'returns nil' do
|
46
|
+
expect(subject).to be_nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'when default right-to-left scripts are changed' do
|
51
|
+
let(:new_rtl_script) { 'Latin' }
|
52
|
+
let(:old_rtl_script) { 'Arabic' }
|
53
|
+
|
54
|
+
context 'when there are characters from an added right-to-left script' do
|
55
|
+
let(:string) { english }
|
56
|
+
|
57
|
+
it 'treats them as right-to-left chracters' do
|
58
|
+
StringDirection.configure do |config|
|
59
|
+
config.rtl_scripts << new_rtl_script
|
60
|
+
end
|
61
|
+
|
62
|
+
expect(subject).to eq 'rtl'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'when there are characters from a deleted right-to-left script ' do
|
67
|
+
let(:string) { arabic }
|
68
|
+
|
69
|
+
it 'treats them as left-to-right characters' do
|
70
|
+
StringDirection.configure do |config|
|
71
|
+
config.rtl_scripts.delete(old_rtl_script)
|
72
|
+
end
|
73
|
+
|
74
|
+
expect(subject).to eq 'ltr'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
after :each do
|
79
|
+
StringDirection.reset_configuration
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'when special characters are present' do
|
84
|
+
let(:string) do
|
85
|
+
mark = "\u0903"
|
86
|
+
punctuation = '_'
|
87
|
+
symbol = '€'
|
88
|
+
separator = ' '
|
89
|
+
other = "\u0005"
|
90
|
+
|
91
|
+
arabic + mark + punctuation + symbol + separator + other
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'ignores them' do
|
95
|
+
expect(subject).to eq 'rtl'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'when an object responding to #to_s is given' do
|
100
|
+
let(:string) do
|
101
|
+
class StringDirection::TestObject
|
102
|
+
def to_s
|
103
|
+
'English'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
StringDirection::TestObject.new
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'takes as string the result of #to_s method' do
|
111
|
+
expect(subject).to eq('ltr')
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string-direction
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marc Busqué
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: yard
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- lib/string-direction/configuration.rb
|
113
113
|
- lib/string-direction/detector.rb
|
114
114
|
- lib/string-direction/strategies/characters_strategy.rb
|
115
|
+
- lib/string-direction/strategies/dominant_strategy.rb
|
115
116
|
- lib/string-direction/strategies/marks_strategy.rb
|
116
117
|
- lib/string-direction/strategy.rb
|
117
118
|
- lib/string-direction/string_methods.rb
|
@@ -120,6 +121,7 @@ files:
|
|
120
121
|
- spec/string-direction/configuration_spec.rb
|
121
122
|
- spec/string-direction/detector_spec.rb
|
122
123
|
- spec/string-direction/strategies/characters_strategy_spec.rb
|
124
|
+
- spec/string-direction/strategies/dominant_strategy_spec.rb
|
123
125
|
- spec/string-direction/strategies/marks_strategy_spec.rb
|
124
126
|
- spec/string-direction/strategy_spec.rb
|
125
127
|
- spec/string-direction/string_methods_spec.rb
|
@@ -145,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
147
|
version: '0'
|
146
148
|
requirements: []
|
147
149
|
rubyforge_project:
|
148
|
-
rubygems_version: 2.
|
150
|
+
rubygems_version: 2.6.3
|
149
151
|
signing_key:
|
150
152
|
specification_version: 4
|
151
153
|
summary: Automatic detection of text direction (ltr, rtl or bidi) for strings
|