string-direction 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -1
- data/lib/string-direction.rb +1 -0
- data/lib/string-direction/strategies/characters_strategy.rb +13 -5
- data/lib/string-direction/strategies/dominant_strategy.rb +28 -0
- data/lib/string-direction/version.rb +1 -1
- data/spec/string-direction/strategies/dominant_strategy_spec.rb +115 -0
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2d34e48984500c5942983fea9d97942cb09ab49
|
4
|
+
data.tar.gz: 751f2b9960885a5ec1e3905c171cf3716f5e0813
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6e7699515186abbd69f7ee9f99d8c3456b54edf8e7667fa26afda7f7e87d8fb7a5513fc9be95325f28d449fc1c99c06bc3bc0853d2cce368669a66e3b7c6f99
|
7
|
+
data.tar.gz: eebfacf8dc846064bd23e18dcd9ae49306bff5d8e3ca340697b05ff3288d691598109313e883b4062c59ed8dee7bce09882e3a4b155ade834e1a8c1b9982275d
|
data/README.md
CHANGED
@@ -39,7 +39,7 @@ detector = StringDirection::Detector.new(:foo, :bar)
|
|
39
39
|
|
40
40
|
In the above example, classes `StringDirection::FooStrategy` and `StringDirection::BarStrategy` have to be in the load path.
|
41
41
|
|
42
|
-
|
42
|
+
Three strategies are natively integrated: `marks`, `characters` & `dominant`. `marks` && `characters` are used as default strategies if no arguments are given to the detector.
|
43
43
|
|
44
44
|
### marks
|
45
45
|
|
@@ -102,6 +102,21 @@ Keep in mind than only [scripts recognized by Ruby regular expressions](http://w
|
|
102
102
|
|
103
103
|
`characters` strategy can not only analyze a string but everything responding to `to_s`.
|
104
104
|
|
105
|
+
### dominant
|
106
|
+
|
107
|
+
With `dominant` strategy, a string can be left-to-right or right-to-left, but never bidi. It returns one or the other in function of which one has more characters.
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
detector = StringDirection::Detector.new(:dominant)
|
111
|
+
|
112
|
+
detector.direction('e العربية') #=> 'rtl'
|
113
|
+
detector.direction('english ة') #=> 'ltr'
|
114
|
+
```
|
115
|
+
|
116
|
+
As with `characters` strategy, you can change which scripts are considered right-to-left.
|
117
|
+
|
118
|
+
`dominant` strategy can not only analyze a string but everything responding to `to_s`.
|
119
|
+
|
105
120
|
### Custom Strategies
|
106
121
|
|
107
122
|
You can define your custom strategies. To do so, you just have to define a class inside `StringDirection` module with a name ending with `Strategy`. This class has to respond to an instance method `run` which takes the string as argument. You can inherit from `StringDirection::Strategy` to have convenient methods `ltr`, `rtl` and `bidi` which return expected result. If the strategy doesn't know the direction, it must return `nil`.
|
data/lib/string-direction.rb
CHANGED
@@ -4,6 +4,7 @@ require 'string-direction/detector'
|
|
4
4
|
require 'string-direction/strategy'
|
5
5
|
require 'string-direction/strategies/marks_strategy'
|
6
6
|
require 'string-direction/strategies/characters_strategy'
|
7
|
+
require 'string-direction/strategies/dominant_strategy'
|
7
8
|
require 'string-direction/string_methods'
|
8
9
|
|
9
10
|
# Constants & configuration common in the whole library
|
@@ -2,7 +2,7 @@ module StringDirection
|
|
2
2
|
# Strategy to detect direction from the scripts to which string characters belong
|
3
3
|
class CharactersStrategy < Strategy
|
4
4
|
# Ignored characters: unicode marks, punctuations, symbols, separator and other general categories
|
5
|
-
|
5
|
+
IGNORED_CHARS = '\p{M}\p{P}\p{S}\p{Z}\p{C}'.freeze
|
6
6
|
|
7
7
|
# Inspect to wich scripts characters belongs and infer from them the string direction. right-to-left scripts are those in {Configuration#rtl_scripts}
|
8
8
|
#
|
@@ -21,16 +21,24 @@ module StringDirection
|
|
21
21
|
|
22
22
|
private
|
23
23
|
|
24
|
+
def rtl_regex
|
25
|
+
@rtl_regex ||= /[#{rtl_script_character_classes}]/
|
26
|
+
end
|
27
|
+
|
28
|
+
def ltr_regex
|
29
|
+
@ltr_regex ||= /[^#{rtl_script_character_classes}#{IGNORED_CHARS}]/
|
30
|
+
end
|
31
|
+
|
24
32
|
def rtl_characters?(string)
|
25
|
-
string.match(
|
33
|
+
string.match(rtl_regex)
|
26
34
|
end
|
27
35
|
|
28
36
|
def ltr_characters?(string)
|
29
|
-
string.
|
37
|
+
string.match(ltr_regex)
|
30
38
|
end
|
31
39
|
|
32
|
-
def
|
33
|
-
rtl_scripts.map { |script|
|
40
|
+
def rtl_script_character_classes
|
41
|
+
@rtl_script_character_classes ||= rtl_scripts.map { |script| "\\p{#{script}}" }.join
|
34
42
|
end
|
35
43
|
|
36
44
|
def rtl_scripts
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module StringDirection
|
2
|
+
# Strategy to decide direction between ltr or rtl in function of which is the main type
|
3
|
+
class DominantStrategy < CharactersStrategy
|
4
|
+
# Get the number of ltr and rtl characters in the supplied string and infer
|
5
|
+
# direction from the most common type. For this strategy the direction can
|
6
|
+
# be ltr or rtl, but never bidi. In case of draw it returns nil.
|
7
|
+
#
|
8
|
+
# params [String] The string to inspect
|
9
|
+
# @return [String, nil]
|
10
|
+
def run(string)
|
11
|
+
string = string.to_s
|
12
|
+
ltr_count = chars_count(string, ltr_regex)
|
13
|
+
rtl_count = chars_count(string, rtl_regex)
|
14
|
+
diff = ltr_count - rtl_count
|
15
|
+
return ltr if diff > 0
|
16
|
+
return rtl if diff < 0
|
17
|
+
nil
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def chars_count(string, regex)
|
23
|
+
count = 0
|
24
|
+
string.scan(regex) { count += 1 }
|
25
|
+
count
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe StringDirection::DominantStrategy do
|
4
|
+
describe '#run' do
|
5
|
+
let(:english) { 'English' }
|
6
|
+
let(:arabic) { 'العربية' }
|
7
|
+
|
8
|
+
subject { described_class.new.run(string) }
|
9
|
+
|
10
|
+
context 'when both left-to-right and right-to-left characters are present in equal numbers' do
|
11
|
+
let(:string) { arabic + english }
|
12
|
+
|
13
|
+
it "returns nil" do
|
14
|
+
expect(subject).to eq nil
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when both left-to-right and right-to-left characters are present, with more ltr' do
|
19
|
+
let(:string) { english + arabic + english }
|
20
|
+
|
21
|
+
it "returns 'ltr'" do
|
22
|
+
expect(subject).to eq 'ltr'
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'when right-to-left character are present but none of left-to-right' do
|
27
|
+
let(:string) { arabic }
|
28
|
+
|
29
|
+
it "returns 'rtl'" do
|
30
|
+
expect(subject).to eq 'rtl'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
context 'when left-to-right character are present but none of right-to-left' do
|
35
|
+
let(:string) { english }
|
36
|
+
|
37
|
+
it "returns 'ltr'" do
|
38
|
+
expect(subject).to eq 'ltr'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'when neither left-to-right nor right-to-left characters are present' do
|
43
|
+
let(:string) { ' ' }
|
44
|
+
|
45
|
+
it 'returns nil' do
|
46
|
+
expect(subject).to be_nil
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'when default right-to-left scripts are changed' do
|
51
|
+
let(:new_rtl_script) { 'Latin' }
|
52
|
+
let(:old_rtl_script) { 'Arabic' }
|
53
|
+
|
54
|
+
context 'when there are characters from an added right-to-left script' do
|
55
|
+
let(:string) { english }
|
56
|
+
|
57
|
+
it 'treats them as right-to-left chracters' do
|
58
|
+
StringDirection.configure do |config|
|
59
|
+
config.rtl_scripts << new_rtl_script
|
60
|
+
end
|
61
|
+
|
62
|
+
expect(subject).to eq 'rtl'
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'when there are characters from a deleted right-to-left script ' do
|
67
|
+
let(:string) { arabic }
|
68
|
+
|
69
|
+
it 'treats them as left-to-right characters' do
|
70
|
+
StringDirection.configure do |config|
|
71
|
+
config.rtl_scripts.delete(old_rtl_script)
|
72
|
+
end
|
73
|
+
|
74
|
+
expect(subject).to eq 'ltr'
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
after :each do
|
79
|
+
StringDirection.reset_configuration
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'when special characters are present' do
|
84
|
+
let(:string) do
|
85
|
+
mark = "\u0903"
|
86
|
+
punctuation = '_'
|
87
|
+
symbol = '€'
|
88
|
+
separator = ' '
|
89
|
+
other = "\u0005"
|
90
|
+
|
91
|
+
arabic + mark + punctuation + symbol + separator + other
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'ignores them' do
|
95
|
+
expect(subject).to eq 'rtl'
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'when an object responding to #to_s is given' do
|
100
|
+
let(:string) do
|
101
|
+
class StringDirection::TestObject
|
102
|
+
def to_s
|
103
|
+
'English'
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
StringDirection::TestObject.new
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'takes as string the result of #to_s method' do
|
111
|
+
expect(subject).to eq('ltr')
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string-direction
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Marc Busqué
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: yard
|
@@ -112,6 +112,7 @@ files:
|
|
112
112
|
- lib/string-direction/configuration.rb
|
113
113
|
- lib/string-direction/detector.rb
|
114
114
|
- lib/string-direction/strategies/characters_strategy.rb
|
115
|
+
- lib/string-direction/strategies/dominant_strategy.rb
|
115
116
|
- lib/string-direction/strategies/marks_strategy.rb
|
116
117
|
- lib/string-direction/strategy.rb
|
117
118
|
- lib/string-direction/string_methods.rb
|
@@ -120,6 +121,7 @@ files:
|
|
120
121
|
- spec/string-direction/configuration_spec.rb
|
121
122
|
- spec/string-direction/detector_spec.rb
|
122
123
|
- spec/string-direction/strategies/characters_strategy_spec.rb
|
124
|
+
- spec/string-direction/strategies/dominant_strategy_spec.rb
|
123
125
|
- spec/string-direction/strategies/marks_strategy_spec.rb
|
124
126
|
- spec/string-direction/strategy_spec.rb
|
125
127
|
- spec/string-direction/string_methods_spec.rb
|
@@ -145,7 +147,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
145
147
|
version: '0'
|
146
148
|
requirements: []
|
147
149
|
rubyforge_project:
|
148
|
-
rubygems_version: 2.
|
150
|
+
rubygems_version: 2.6.3
|
149
151
|
signing_key:
|
150
152
|
specification_version: 4
|
151
153
|
summary: Automatic detection of text direction (ltr, rtl or bidi) for strings
|