twitter_cldr 1.6.0 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/History.txt +5 -0
- data/js/lib/twitter_cldr_js.rb +2 -0
- data/lib/twitter_cldr/collation/collator.rb +8 -3
- data/lib/twitter_cldr/collation/sort_key_builder.rb +118 -34
- data/lib/twitter_cldr/collation/trie_builder.rb +5 -1
- data/lib/twitter_cldr/resources/import/tailoring.rb +14 -5
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +1 -0
- data/resources/collation/tailoring/ar.yml +1 -0
- data/resources/collation/tailoring/ca.yml +1 -0
- data/resources/collation/tailoring/cs.yml +1 -0
- data/resources/collation/tailoring/da.yml +2 -0
- data/resources/collation/tailoring/de.yml +1 -0
- data/resources/collation/tailoring/el.yml +1 -0
- data/resources/collation/tailoring/en.yml +1 -0
- data/resources/collation/tailoring/es.yml +1 -0
- data/resources/collation/tailoring/eu.yml +1 -0
- data/resources/collation/tailoring/fa.yml +1 -0
- data/resources/collation/tailoring/fi.yml +1 -0
- data/resources/collation/tailoring/fil.yml +1 -0
- data/resources/collation/tailoring/fr.yml +1 -0
- data/resources/collation/tailoring/he.yml +1 -0
- data/resources/collation/tailoring/hi.yml +1 -0
- data/resources/collation/tailoring/hu.yml +1 -0
- data/resources/collation/tailoring/id.yml +1 -0
- data/resources/collation/tailoring/it.yml +1 -0
- data/resources/collation/tailoring/ja.yml +1 -0
- data/resources/collation/tailoring/ko.yml +1 -0
- data/resources/collation/tailoring/ms.yml +1 -0
- data/resources/collation/tailoring/nb.yml +1 -0
- data/resources/collation/tailoring/nl.yml +1 -0
- data/resources/collation/tailoring/pl.yml +1 -0
- data/resources/collation/tailoring/pt.yml +1 -0
- data/resources/collation/tailoring/ru.yml +1 -0
- data/resources/collation/tailoring/sv.yml +1 -0
- data/resources/collation/tailoring/th.yml +1 -0
- data/resources/collation/tailoring/tr.yml +1 -0
- data/resources/collation/tailoring/uk.yml +1 -0
- data/resources/collation/tailoring/ur.yml +1 -0
- data/resources/collation/tailoring/zh-Hant.yml +1 -0
- data/resources/collation/tailoring/zh.yml +1 -0
- data/spec/collation/collator_spec.rb +118 -16
- data/spec/collation/sort_key_builder_spec.rb +79 -25
- data/spec/collation/tailoring_spec.rb +0 -76
- data/spec/collation/tailoring_tests/da.txt +181 -181
- data/spec/collation/trie_builder_spec.rb +26 -12
- metadata +3 -3
@@ -56,19 +56,25 @@ describe Collator do
|
|
56
56
|
end
|
57
57
|
|
58
58
|
describe '#initialize' do
|
59
|
-
before
|
60
|
-
|
61
|
-
|
62
|
-
it 'initializes default collator if locale is not specified' do
|
63
|
-
Collator.new.locale.should be_nil
|
59
|
+
before :each do
|
60
|
+
stub(TrieBuilder).load_trie { trie }
|
61
|
+
any_instance_of(Collator) { |c| stub(c).load_trie { trie } }
|
64
62
|
end
|
65
63
|
|
66
|
-
|
67
|
-
|
64
|
+
context 'without locale' do
|
65
|
+
it 'initializes default collator' do
|
66
|
+
Collator.new.locale.should be_nil
|
67
|
+
end
|
68
68
|
end
|
69
69
|
|
70
|
-
|
71
|
-
|
70
|
+
context 'with locale' do
|
71
|
+
it 'initialized tailored collator with provided locale' do
|
72
|
+
Collator.new(:ru).locale.should == :ru
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'converts locale' do
|
76
|
+
Collator.new(:no).locale.should == :nb
|
77
|
+
end
|
72
78
|
end
|
73
79
|
end
|
74
80
|
|
@@ -101,16 +107,36 @@ describe Collator do
|
|
101
107
|
let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
|
102
108
|
|
103
109
|
before(:each) { stub(TrieBuilder).load_trie { trie } }
|
104
|
-
before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
|
105
110
|
|
106
|
-
|
107
|
-
mock(
|
108
|
-
|
111
|
+
describe 'calculating sort key' do
|
112
|
+
before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, nil) { sort_key } }
|
113
|
+
|
114
|
+
it 'calculates sort key for a string' do
|
115
|
+
mock(collator).get_collation_elements(string) { collation_elements }
|
116
|
+
collator.get_sort_key(string).should == sort_key
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'calculates sort key for an array of code points (represented as hex strings)' do
|
120
|
+
mock(collator).get_collation_elements(code_points_hex) { collation_elements }
|
121
|
+
collator.get_sort_key(code_points_hex).should == sort_key
|
122
|
+
end
|
109
123
|
end
|
110
124
|
|
111
|
-
|
112
|
-
|
113
|
-
|
125
|
+
describe 'uses tailoring options' do
|
126
|
+
let(:case_first) { :upper }
|
127
|
+
let(:locale) { :uk }
|
128
|
+
|
129
|
+
it 'passes case-first sort option to sort key builder' do
|
130
|
+
mock(TwitterCldr::Collation::TrieBuilder).load_tailored_trie(locale, trie) { Trie.new }
|
131
|
+
mock(TwitterCldr::Collation::TrieBuilder).tailoring_data(locale) { { :collator_options => { :case_first => case_first } } }
|
132
|
+
|
133
|
+
collator = Collator.new(locale)
|
134
|
+
|
135
|
+
mock(collator).get_collation_elements(code_points_hex) { collation_elements }
|
136
|
+
mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, case_first) { sort_key }
|
137
|
+
|
138
|
+
collator.get_sort_key(code_points_hex).should == sort_key
|
139
|
+
end
|
114
140
|
end
|
115
141
|
end
|
116
142
|
|
@@ -165,6 +191,82 @@ describe Collator do
|
|
165
191
|
end
|
166
192
|
end
|
167
193
|
|
194
|
+
describe 'tailoring support' do
|
195
|
+
before(:each) do
|
196
|
+
stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
|
197
|
+
stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
|
198
|
+
stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
|
199
|
+
end
|
200
|
+
|
201
|
+
let(:locale) { :some_locale }
|
202
|
+
let(:default_collator) { Collator.new }
|
203
|
+
let(:tailored_collator) { Collator.new(locale) }
|
204
|
+
|
205
|
+
describe 'tailoring rules support' do
|
206
|
+
it 'tailored collation elements are used' do
|
207
|
+
default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
|
208
|
+
tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
|
209
|
+
|
210
|
+
default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
|
211
|
+
tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
|
212
|
+
end
|
213
|
+
|
214
|
+
it 'original contractions for tailored elements are applied' do
|
215
|
+
default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
216
|
+
tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
describe 'contractions suppressing support' do
|
221
|
+
it 'suppressed contractions are ignored' do
|
222
|
+
default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
|
223
|
+
tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'non-suppressed contractions are used' do
|
227
|
+
default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
228
|
+
tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
let(:fractional_uca_short_stub) do
|
233
|
+
<<END
|
234
|
+
# collation elements from default FCE table
|
235
|
+
0301; [, 8D, 05]
|
236
|
+
0306; [, 91, 05]
|
237
|
+
041A; [5C 6C, 05, 8F] # К
|
238
|
+
0413; [5C 1A, 05, 8F] # Г
|
239
|
+
0415; [5C 34, 05, 8F] # Е
|
240
|
+
|
241
|
+
# tailored (in UK locale) with "Г < ґ <<< Ґ"
|
242
|
+
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
|
243
|
+
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
|
244
|
+
|
245
|
+
# contraction for a tailored collation element
|
246
|
+
0491 0306; [5C, DB, 09] # ґ̆
|
247
|
+
|
248
|
+
# contractions suppressed in tailoring (for RU locale)
|
249
|
+
041A 0301; [5C CC, 05, 8F] # Ќ
|
250
|
+
0413 0301; [5C 30, 05, 8F] # Ѓ
|
251
|
+
|
252
|
+
# contractions non-suppressed in tailoring
|
253
|
+
0415 0306; [5C 36, 05, 8F] # Ӗ
|
254
|
+
END
|
255
|
+
end
|
256
|
+
|
257
|
+
let(:tailoring_resource_stub) do
|
258
|
+
<<END
|
259
|
+
---
|
260
|
+
:tailored_table: ! '0491; [5C1B, 5, 5]
|
261
|
+
|
262
|
+
0490; [5C1B, 5, 86]'
|
263
|
+
:suppressed_contractions: ГК
|
264
|
+
...
|
265
|
+
END
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
168
270
|
def mock_sort_key(collator, string, sort_key)
|
169
271
|
mock(collator).get_sort_key(string) { sort_key }
|
170
272
|
end
|
@@ -17,7 +17,7 @@ describe SortKeyBuilder do
|
|
17
17
|
it 'returns a sort key for a given array of collation elements' do
|
18
18
|
sort_key = SortKeyBuilder.new(collation_elements)
|
19
19
|
|
20
|
-
mock(SortKeyBuilder).new(collation_elements) { sort_key }
|
20
|
+
mock(SortKeyBuilder).new(collation_elements, nil) { sort_key }
|
21
21
|
mock(sort_key).bytes_array { sort_key_bytes }
|
22
22
|
|
23
23
|
SortKeyBuilder.build(collation_elements).should == sort_key_bytes
|
@@ -28,6 +28,16 @@ describe SortKeyBuilder do
|
|
28
28
|
it 'assigns collation elements array' do
|
29
29
|
SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
|
30
30
|
end
|
31
|
+
|
32
|
+
it 'accepts case-first option as the second argument' do
|
33
|
+
SortKeyBuilder::VALID_CASE_FIRST_OPTIONS.each do |case_first|
|
34
|
+
lambda { SortKeyBuilder.new([], case_first) }.should_not raise_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'raises an ArgumentError for invalid case-first option' do
|
39
|
+
lambda { SortKeyBuilder.new([], :wat) }.should raise_error(ArgumentError)
|
40
|
+
end
|
31
41
|
end
|
32
42
|
|
33
43
|
describe '#bytes_array' do
|
@@ -40,40 +50,84 @@ describe SortKeyBuilder do
|
|
40
50
|
sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
|
41
51
|
end
|
42
52
|
|
43
|
-
|
44
|
-
|
45
|
-
|
53
|
+
describe 'primary weights' do
|
54
|
+
it 'compresses primary weights' do
|
55
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
56
|
+
[0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
|
46
57
|
|
47
|
-
|
48
|
-
|
49
|
-
|
58
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
59
|
+
[0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
|
60
|
+
end
|
50
61
|
|
51
|
-
|
52
|
-
|
53
|
-
|
62
|
+
it 'works when there is an ignorable primary weight in the middle' do
|
63
|
+
SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
|
64
|
+
end
|
54
65
|
|
55
|
-
|
56
|
-
|
57
|
-
|
66
|
+
it 'do not compress single byte primary weights' do
|
67
|
+
SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
|
68
|
+
end
|
58
69
|
|
59
|
-
|
60
|
-
|
61
|
-
|
70
|
+
it 'resets primary lead bytes counter after a single byte weight' do
|
71
|
+
SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
|
72
|
+
end
|
62
73
|
|
63
|
-
|
64
|
-
|
74
|
+
it 'compresses only compressible primary weights' do
|
75
|
+
SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
|
76
|
+
end
|
65
77
|
end
|
66
78
|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
describe 'secondary weights' do
|
80
|
+
it 'compresses secondary weights' do
|
81
|
+
SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
|
82
|
+
end
|
70
83
|
|
71
|
-
|
72
|
-
|
84
|
+
it 'compresses secondary weights into multiple bytes if necessary' do
|
85
|
+
SortKeyBuilder.new([[0, 5, 0]] * 100).bytes_array.should == [1, 69, 40, 1]
|
86
|
+
end
|
73
87
|
end
|
74
88
|
|
75
|
-
|
76
|
-
|
89
|
+
describe 'tertiary weights' do
|
90
|
+
context 'when case_first is not set' do
|
91
|
+
it 'removes case bits and adds top addition to bytes that are greater than common' do
|
92
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 73], [0, 0, 137], [0, 0, 201]]).bytes_array.should == [1, 1, 137, 137, 137, 137]
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'compresses tertiary weights' do
|
96
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 0x84, 0xA7, 6]
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
100
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100).bytes_array.should == [1, 1, 0x30, 0x30, 0x12]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
context 'when case_first is :upper' do
|
105
|
+
it 'inverts case bits and subtract bottom addition from bytes that are smaller than common' do
|
106
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :upper).bytes_array.should == [1, 1, 201, 80, 15]
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'compresses tertiary weights' do
|
110
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :upper).bytes_array.should == [1, 1, 0xC4, 0xE7, 0xC3]
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
114
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100, :upper).bytes_array.should == [1, 1, 0x9C, 0x9C, 0xB3]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'when case_first is :lower' do
|
119
|
+
it 'leaves case bits and adds top addition to bytes that are greater than common' do
|
120
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :lower).bytes_array.should == [1, 1, 73, 144, 207]
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'compresses tertiary weights' do
|
124
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :lower).bytes_array.should == [1, 1, 0x44, 0x67, 6]
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
128
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100, :lower).bytes_array.should == [1, 1, 0x1A, 0x1A, 0x1A, 0x1A, 0x14]
|
129
|
+
end
|
130
|
+
end
|
77
131
|
end
|
78
132
|
end
|
79
133
|
|
@@ -9,82 +9,6 @@ include TwitterCldr::Collation
|
|
9
9
|
|
10
10
|
describe 'Unicode collation tailoring' do
|
11
11
|
|
12
|
-
describe 'tailoring support' do
|
13
|
-
before(:each) do
|
14
|
-
stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
|
15
|
-
stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
|
16
|
-
stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
|
17
|
-
end
|
18
|
-
|
19
|
-
let(:locale) { :some_locale }
|
20
|
-
let(:default_collator) { Collator.new }
|
21
|
-
let(:tailored_collator) { Collator.new(locale) }
|
22
|
-
|
23
|
-
describe 'tailoring rules support' do
|
24
|
-
it 'tailored collation elements are used' do
|
25
|
-
default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
|
26
|
-
tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
|
27
|
-
|
28
|
-
default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
|
29
|
-
tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'original contractions for tailored elements are applied' do
|
33
|
-
default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
34
|
-
tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe 'contractions suppressing support' do
|
39
|
-
it 'suppressed contractions are ignored' do
|
40
|
-
default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
|
41
|
-
tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
|
42
|
-
end
|
43
|
-
|
44
|
-
it 'non-suppressed contractions are used' do
|
45
|
-
default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
46
|
-
tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
let(:fractional_uca_short_stub) do
|
51
|
-
<<END
|
52
|
-
# collation elements from default FCE table
|
53
|
-
0301; [, 8D, 05]
|
54
|
-
0306; [, 91, 05]
|
55
|
-
041A; [5C 6C, 05, 8F] # К
|
56
|
-
0413; [5C 1A, 05, 8F] # Г
|
57
|
-
0415; [5C 34, 05, 8F] # Е
|
58
|
-
|
59
|
-
# tailored (in UK locale) with "Г < ґ <<< Ґ"
|
60
|
-
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
|
61
|
-
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
|
62
|
-
|
63
|
-
# contraction for a tailored collation element
|
64
|
-
0491 0306; [5C, DB, 09] # ґ̆
|
65
|
-
|
66
|
-
# contractions suppressed in tailoring (for RU locale)
|
67
|
-
041A 0301; [5C CC, 05, 8F] # Ќ
|
68
|
-
0413 0301; [5C 30, 05, 8F] # Ѓ
|
69
|
-
|
70
|
-
# contractions non-suppressed in tailoring
|
71
|
-
0415 0306; [5C 36, 05, 8F] # Ӗ
|
72
|
-
END
|
73
|
-
end
|
74
|
-
|
75
|
-
let(:tailoring_resource_stub) do
|
76
|
-
<<END
|
77
|
-
---
|
78
|
-
:tailored_table: ! '0491; [5C1B, 5, 5]
|
79
|
-
|
80
|
-
0490; [5C1B, 5, 86]'
|
81
|
-
:suppressed_contractions: ГК
|
82
|
-
...
|
83
|
-
END
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
12
|
# Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
|
89
13
|
# Test files format: # - comments, // - pending tests.
|
90
14
|
#
|