twitter_cldr 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/History.txt +5 -0
- data/js/lib/twitter_cldr_js.rb +2 -0
- data/lib/twitter_cldr/collation/collator.rb +8 -3
- data/lib/twitter_cldr/collation/sort_key_builder.rb +118 -34
- data/lib/twitter_cldr/collation/trie_builder.rb +5 -1
- data/lib/twitter_cldr/resources/import/tailoring.rb +14 -5
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +1 -0
- data/resources/collation/tailoring/ar.yml +1 -0
- data/resources/collation/tailoring/ca.yml +1 -0
- data/resources/collation/tailoring/cs.yml +1 -0
- data/resources/collation/tailoring/da.yml +2 -0
- data/resources/collation/tailoring/de.yml +1 -0
- data/resources/collation/tailoring/el.yml +1 -0
- data/resources/collation/tailoring/en.yml +1 -0
- data/resources/collation/tailoring/es.yml +1 -0
- data/resources/collation/tailoring/eu.yml +1 -0
- data/resources/collation/tailoring/fa.yml +1 -0
- data/resources/collation/tailoring/fi.yml +1 -0
- data/resources/collation/tailoring/fil.yml +1 -0
- data/resources/collation/tailoring/fr.yml +1 -0
- data/resources/collation/tailoring/he.yml +1 -0
- data/resources/collation/tailoring/hi.yml +1 -0
- data/resources/collation/tailoring/hu.yml +1 -0
- data/resources/collation/tailoring/id.yml +1 -0
- data/resources/collation/tailoring/it.yml +1 -0
- data/resources/collation/tailoring/ja.yml +1 -0
- data/resources/collation/tailoring/ko.yml +1 -0
- data/resources/collation/tailoring/ms.yml +1 -0
- data/resources/collation/tailoring/nb.yml +1 -0
- data/resources/collation/tailoring/nl.yml +1 -0
- data/resources/collation/tailoring/pl.yml +1 -0
- data/resources/collation/tailoring/pt.yml +1 -0
- data/resources/collation/tailoring/ru.yml +1 -0
- data/resources/collation/tailoring/sv.yml +1 -0
- data/resources/collation/tailoring/th.yml +1 -0
- data/resources/collation/tailoring/tr.yml +1 -0
- data/resources/collation/tailoring/uk.yml +1 -0
- data/resources/collation/tailoring/ur.yml +1 -0
- data/resources/collation/tailoring/zh-Hant.yml +1 -0
- data/resources/collation/tailoring/zh.yml +1 -0
- data/spec/collation/collator_spec.rb +118 -16
- data/spec/collation/sort_key_builder_spec.rb +79 -25
- data/spec/collation/tailoring_spec.rb +0 -76
- data/spec/collation/tailoring_tests/da.txt +181 -181
- data/spec/collation/trie_builder_spec.rb +26 -12
- metadata +3 -3
@@ -56,19 +56,25 @@ describe Collator do
|
|
56
56
|
end
|
57
57
|
|
58
58
|
describe '#initialize' do
|
59
|
-
before
|
60
|
-
|
61
|
-
|
62
|
-
it 'initializes default collator if locale is not specified' do
|
63
|
-
Collator.new.locale.should be_nil
|
59
|
+
before :each do
|
60
|
+
stub(TrieBuilder).load_trie { trie }
|
61
|
+
any_instance_of(Collator) { |c| stub(c).load_trie { trie } }
|
64
62
|
end
|
65
63
|
|
66
|
-
|
67
|
-
|
64
|
+
context 'without locale' do
|
65
|
+
it 'initializes default collator' do
|
66
|
+
Collator.new.locale.should be_nil
|
67
|
+
end
|
68
68
|
end
|
69
69
|
|
70
|
-
|
71
|
-
|
70
|
+
context 'with locale' do
|
71
|
+
it 'initialized tailored collator with provided locale' do
|
72
|
+
Collator.new(:ru).locale.should == :ru
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'converts locale' do
|
76
|
+
Collator.new(:no).locale.should == :nb
|
77
|
+
end
|
72
78
|
end
|
73
79
|
end
|
74
80
|
|
@@ -101,16 +107,36 @@ describe Collator do
|
|
101
107
|
let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
|
102
108
|
|
103
109
|
before(:each) { stub(TrieBuilder).load_trie { trie } }
|
104
|
-
before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
|
105
110
|
|
106
|
-
|
107
|
-
mock(
|
108
|
-
|
111
|
+
describe 'calculating sort key' do
|
112
|
+
before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, nil) { sort_key } }
|
113
|
+
|
114
|
+
it 'calculates sort key for a string' do
|
115
|
+
mock(collator).get_collation_elements(string) { collation_elements }
|
116
|
+
collator.get_sort_key(string).should == sort_key
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'calculates sort key for an array of code points (represented as hex strings)' do
|
120
|
+
mock(collator).get_collation_elements(code_points_hex) { collation_elements }
|
121
|
+
collator.get_sort_key(code_points_hex).should == sort_key
|
122
|
+
end
|
109
123
|
end
|
110
124
|
|
111
|
-
|
112
|
-
|
113
|
-
|
125
|
+
describe 'uses tailoring options' do
|
126
|
+
let(:case_first) { :upper }
|
127
|
+
let(:locale) { :uk }
|
128
|
+
|
129
|
+
it 'passes case-first sort option to sort key builder' do
|
130
|
+
mock(TwitterCldr::Collation::TrieBuilder).load_tailored_trie(locale, trie) { Trie.new }
|
131
|
+
mock(TwitterCldr::Collation::TrieBuilder).tailoring_data(locale) { { :collator_options => { :case_first => case_first } } }
|
132
|
+
|
133
|
+
collator = Collator.new(locale)
|
134
|
+
|
135
|
+
mock(collator).get_collation_elements(code_points_hex) { collation_elements }
|
136
|
+
mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, case_first) { sort_key }
|
137
|
+
|
138
|
+
collator.get_sort_key(code_points_hex).should == sort_key
|
139
|
+
end
|
114
140
|
end
|
115
141
|
end
|
116
142
|
|
@@ -165,6 +191,82 @@ describe Collator do
|
|
165
191
|
end
|
166
192
|
end
|
167
193
|
|
194
|
+
describe 'tailoring support' do
|
195
|
+
before(:each) do
|
196
|
+
stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
|
197
|
+
stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
|
198
|
+
stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
|
199
|
+
end
|
200
|
+
|
201
|
+
let(:locale) { :some_locale }
|
202
|
+
let(:default_collator) { Collator.new }
|
203
|
+
let(:tailored_collator) { Collator.new(locale) }
|
204
|
+
|
205
|
+
describe 'tailoring rules support' do
|
206
|
+
it 'tailored collation elements are used' do
|
207
|
+
default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
|
208
|
+
tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
|
209
|
+
|
210
|
+
default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
|
211
|
+
tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
|
212
|
+
end
|
213
|
+
|
214
|
+
it 'original contractions for tailored elements are applied' do
|
215
|
+
default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
216
|
+
tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
describe 'contractions suppressing support' do
|
221
|
+
it 'suppressed contractions are ignored' do
|
222
|
+
default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
|
223
|
+
tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
|
224
|
+
end
|
225
|
+
|
226
|
+
it 'non-suppressed contractions are used' do
|
227
|
+
default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
228
|
+
tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
let(:fractional_uca_short_stub) do
|
233
|
+
<<END
|
234
|
+
# collation elements from default FCE table
|
235
|
+
0301; [, 8D, 05]
|
236
|
+
0306; [, 91, 05]
|
237
|
+
041A; [5C 6C, 05, 8F] # К
|
238
|
+
0413; [5C 1A, 05, 8F] # Г
|
239
|
+
0415; [5C 34, 05, 8F] # Е
|
240
|
+
|
241
|
+
# tailored (in UK locale) with "Г < ґ <<< Ґ"
|
242
|
+
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
|
243
|
+
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
|
244
|
+
|
245
|
+
# contraction for a tailored collation element
|
246
|
+
0491 0306; [5C, DB, 09] # ґ̆
|
247
|
+
|
248
|
+
# contractions suppressed in tailoring (for RU locale)
|
249
|
+
041A 0301; [5C CC, 05, 8F] # Ќ
|
250
|
+
0413 0301; [5C 30, 05, 8F] # Ѓ
|
251
|
+
|
252
|
+
# contractions non-suppressed in tailoring
|
253
|
+
0415 0306; [5C 36, 05, 8F] # Ӗ
|
254
|
+
END
|
255
|
+
end
|
256
|
+
|
257
|
+
let(:tailoring_resource_stub) do
|
258
|
+
<<END
|
259
|
+
---
|
260
|
+
:tailored_table: ! '0491; [5C1B, 5, 5]
|
261
|
+
|
262
|
+
0490; [5C1B, 5, 86]'
|
263
|
+
:suppressed_contractions: ГК
|
264
|
+
...
|
265
|
+
END
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
168
270
|
def mock_sort_key(collator, string, sort_key)
|
169
271
|
mock(collator).get_sort_key(string) { sort_key }
|
170
272
|
end
|
@@ -17,7 +17,7 @@ describe SortKeyBuilder do
|
|
17
17
|
it 'returns a sort key for a given array of collation elements' do
|
18
18
|
sort_key = SortKeyBuilder.new(collation_elements)
|
19
19
|
|
20
|
-
mock(SortKeyBuilder).new(collation_elements) { sort_key }
|
20
|
+
mock(SortKeyBuilder).new(collation_elements, nil) { sort_key }
|
21
21
|
mock(sort_key).bytes_array { sort_key_bytes }
|
22
22
|
|
23
23
|
SortKeyBuilder.build(collation_elements).should == sort_key_bytes
|
@@ -28,6 +28,16 @@ describe SortKeyBuilder do
|
|
28
28
|
it 'assigns collation elements array' do
|
29
29
|
SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
|
30
30
|
end
|
31
|
+
|
32
|
+
it 'accepts case-first option as the second argument' do
|
33
|
+
SortKeyBuilder::VALID_CASE_FIRST_OPTIONS.each do |case_first|
|
34
|
+
lambda { SortKeyBuilder.new([], case_first) }.should_not raise_error
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'raises an ArgumentError for invalid case-first option' do
|
39
|
+
lambda { SortKeyBuilder.new([], :wat) }.should raise_error(ArgumentError)
|
40
|
+
end
|
31
41
|
end
|
32
42
|
|
33
43
|
describe '#bytes_array' do
|
@@ -40,40 +50,84 @@ describe SortKeyBuilder do
|
|
40
50
|
sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
|
41
51
|
end
|
42
52
|
|
43
|
-
|
44
|
-
|
45
|
-
|
53
|
+
describe 'primary weights' do
|
54
|
+
it 'compresses primary weights' do
|
55
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
56
|
+
[0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
|
46
57
|
|
47
|
-
|
48
|
-
|
49
|
-
|
58
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
59
|
+
[0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
|
60
|
+
end
|
50
61
|
|
51
|
-
|
52
|
-
|
53
|
-
|
62
|
+
it 'works when there is an ignorable primary weight in the middle' do
|
63
|
+
SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
|
64
|
+
end
|
54
65
|
|
55
|
-
|
56
|
-
|
57
|
-
|
66
|
+
it 'do not compress single byte primary weights' do
|
67
|
+
SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
|
68
|
+
end
|
58
69
|
|
59
|
-
|
60
|
-
|
61
|
-
|
70
|
+
it 'resets primary lead bytes counter after a single byte weight' do
|
71
|
+
SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
|
72
|
+
end
|
62
73
|
|
63
|
-
|
64
|
-
|
74
|
+
it 'compresses only compressible primary weights' do
|
75
|
+
SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
|
76
|
+
end
|
65
77
|
end
|
66
78
|
|
67
|
-
|
68
|
-
|
69
|
-
|
79
|
+
describe 'secondary weights' do
|
80
|
+
it 'compresses secondary weights' do
|
81
|
+
SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
|
82
|
+
end
|
70
83
|
|
71
|
-
|
72
|
-
|
84
|
+
it 'compresses secondary weights into multiple bytes if necessary' do
|
85
|
+
SortKeyBuilder.new([[0, 5, 0]] * 100).bytes_array.should == [1, 69, 40, 1]
|
86
|
+
end
|
73
87
|
end
|
74
88
|
|
75
|
-
|
76
|
-
|
89
|
+
describe 'tertiary weights' do
|
90
|
+
context 'when case_first is not set' do
|
91
|
+
it 'removes case bits and adds top addition to bytes that are greater than common' do
|
92
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 73], [0, 0, 137], [0, 0, 201]]).bytes_array.should == [1, 1, 137, 137, 137, 137]
|
93
|
+
end
|
94
|
+
|
95
|
+
it 'compresses tertiary weights' do
|
96
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 0x84, 0xA7, 6]
|
97
|
+
end
|
98
|
+
|
99
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
100
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100).bytes_array.should == [1, 1, 0x30, 0x30, 0x12]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
context 'when case_first is :upper' do
|
105
|
+
it 'inverts case bits and subtract bottom addition from bytes that are smaller than common' do
|
106
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :upper).bytes_array.should == [1, 1, 201, 80, 15]
|
107
|
+
end
|
108
|
+
|
109
|
+
it 'compresses tertiary weights' do
|
110
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :upper).bytes_array.should == [1, 1, 0xC4, 0xE7, 0xC3]
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
114
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100, :upper).bytes_array.should == [1, 1, 0x9C, 0x9C, 0xB3]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
context 'when case_first is :lower' do
|
119
|
+
it 'leaves case bits and adds top addition to bytes that are greater than common' do
|
120
|
+
SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :lower).bytes_array.should == [1, 1, 73, 144, 207]
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'compresses tertiary weights' do
|
124
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :lower).bytes_array.should == [1, 1, 0x44, 0x67, 6]
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'compresses tertiary weights into multiple bytes if necessary' do
|
128
|
+
SortKeyBuilder.new([[0, 0, 5]] * 100, :lower).bytes_array.should == [1, 1, 0x1A, 0x1A, 0x1A, 0x1A, 0x14]
|
129
|
+
end
|
130
|
+
end
|
77
131
|
end
|
78
132
|
end
|
79
133
|
|
@@ -9,82 +9,6 @@ include TwitterCldr::Collation
|
|
9
9
|
|
10
10
|
describe 'Unicode collation tailoring' do
|
11
11
|
|
12
|
-
describe 'tailoring support' do
|
13
|
-
before(:each) do
|
14
|
-
stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
|
15
|
-
stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
|
16
|
-
stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
|
17
|
-
end
|
18
|
-
|
19
|
-
let(:locale) { :some_locale }
|
20
|
-
let(:default_collator) { Collator.new }
|
21
|
-
let(:tailored_collator) { Collator.new(locale) }
|
22
|
-
|
23
|
-
describe 'tailoring rules support' do
|
24
|
-
it 'tailored collation elements are used' do
|
25
|
-
default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
|
26
|
-
tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
|
27
|
-
|
28
|
-
default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
|
29
|
-
tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'original contractions for tailored elements are applied' do
|
33
|
-
default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
34
|
-
tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
describe 'contractions suppressing support' do
|
39
|
-
it 'suppressed contractions are ignored' do
|
40
|
-
default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
|
41
|
-
tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
|
42
|
-
end
|
43
|
-
|
44
|
-
it 'non-suppressed contractions are used' do
|
45
|
-
default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
46
|
-
tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
let(:fractional_uca_short_stub) do
|
51
|
-
<<END
|
52
|
-
# collation elements from default FCE table
|
53
|
-
0301; [, 8D, 05]
|
54
|
-
0306; [, 91, 05]
|
55
|
-
041A; [5C 6C, 05, 8F] # К
|
56
|
-
0413; [5C 1A, 05, 8F] # Г
|
57
|
-
0415; [5C 34, 05, 8F] # Е
|
58
|
-
|
59
|
-
# tailored (in UK locale) with "Г < ґ <<< Ґ"
|
60
|
-
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
|
61
|
-
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
|
62
|
-
|
63
|
-
# contraction for a tailored collation element
|
64
|
-
0491 0306; [5C, DB, 09] # ґ̆
|
65
|
-
|
66
|
-
# contractions suppressed in tailoring (for RU locale)
|
67
|
-
041A 0301; [5C CC, 05, 8F] # Ќ
|
68
|
-
0413 0301; [5C 30, 05, 8F] # Ѓ
|
69
|
-
|
70
|
-
# contractions non-suppressed in tailoring
|
71
|
-
0415 0306; [5C 36, 05, 8F] # Ӗ
|
72
|
-
END
|
73
|
-
end
|
74
|
-
|
75
|
-
let(:tailoring_resource_stub) do
|
76
|
-
<<END
|
77
|
-
---
|
78
|
-
:tailored_table: ! '0491; [5C1B, 5, 5]
|
79
|
-
|
80
|
-
0490; [5C1B, 5, 86]'
|
81
|
-
:suppressed_contractions: ГК
|
82
|
-
...
|
83
|
-
END
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
12
|
# Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
|
89
13
|
# Test files format: # - comments, // - pending tests.
|
90
14
|
#
|