twitter_cldr 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/Gemfile +1 -0
  2. data/History.txt +5 -0
  3. data/js/lib/twitter_cldr_js.rb +2 -0
  4. data/lib/twitter_cldr/collation/collator.rb +8 -3
  5. data/lib/twitter_cldr/collation/sort_key_builder.rb +118 -34
  6. data/lib/twitter_cldr/collation/trie_builder.rb +5 -1
  7. data/lib/twitter_cldr/resources/import/tailoring.rb +14 -5
  8. data/lib/twitter_cldr/version.rb +1 -1
  9. data/resources/collation/tailoring/af.yml +1 -0
  10. data/resources/collation/tailoring/ar.yml +1 -0
  11. data/resources/collation/tailoring/ca.yml +1 -0
  12. data/resources/collation/tailoring/cs.yml +1 -0
  13. data/resources/collation/tailoring/da.yml +2 -0
  14. data/resources/collation/tailoring/de.yml +1 -0
  15. data/resources/collation/tailoring/el.yml +1 -0
  16. data/resources/collation/tailoring/en.yml +1 -0
  17. data/resources/collation/tailoring/es.yml +1 -0
  18. data/resources/collation/tailoring/eu.yml +1 -0
  19. data/resources/collation/tailoring/fa.yml +1 -0
  20. data/resources/collation/tailoring/fi.yml +1 -0
  21. data/resources/collation/tailoring/fil.yml +1 -0
  22. data/resources/collation/tailoring/fr.yml +1 -0
  23. data/resources/collation/tailoring/he.yml +1 -0
  24. data/resources/collation/tailoring/hi.yml +1 -0
  25. data/resources/collation/tailoring/hu.yml +1 -0
  26. data/resources/collation/tailoring/id.yml +1 -0
  27. data/resources/collation/tailoring/it.yml +1 -0
  28. data/resources/collation/tailoring/ja.yml +1 -0
  29. data/resources/collation/tailoring/ko.yml +1 -0
  30. data/resources/collation/tailoring/ms.yml +1 -0
  31. data/resources/collation/tailoring/nb.yml +1 -0
  32. data/resources/collation/tailoring/nl.yml +1 -0
  33. data/resources/collation/tailoring/pl.yml +1 -0
  34. data/resources/collation/tailoring/pt.yml +1 -0
  35. data/resources/collation/tailoring/ru.yml +1 -0
  36. data/resources/collation/tailoring/sv.yml +1 -0
  37. data/resources/collation/tailoring/th.yml +1 -0
  38. data/resources/collation/tailoring/tr.yml +1 -0
  39. data/resources/collation/tailoring/uk.yml +1 -0
  40. data/resources/collation/tailoring/ur.yml +1 -0
  41. data/resources/collation/tailoring/zh-Hant.yml +1 -0
  42. data/resources/collation/tailoring/zh.yml +1 -0
  43. data/spec/collation/collator_spec.rb +118 -16
  44. data/spec/collation/sort_key_builder_spec.rb +79 -25
  45. data/spec/collation/tailoring_spec.rb +0 -76
  46. data/spec/collation/tailoring_tests/da.txt +181 -181
  47. data/spec/collation/trie_builder_spec.rb +26 -12
  48. metadata +3 -3
@@ -56,19 +56,25 @@ describe Collator do
56
56
  end
57
57
 
58
58
  describe '#initialize' do
59
- before(:each) { stub(TrieBuilder).load_trie { trie } }
60
- before(:each) { any_instance_of(Collator) { |c| stub(c).load_trie { trie } } }
61
-
62
- it 'initializes default collator if locale is not specified' do
63
- Collator.new.locale.should be_nil
59
+ before :each do
60
+ stub(TrieBuilder).load_trie { trie }
61
+ any_instance_of(Collator) { |c| stub(c).load_trie { trie } }
64
62
  end
65
63
 
66
- it 'initialized tailored collator if locale is provided' do
67
- Collator.new(:ru).locale.should == :ru
64
+ context 'without locale' do
65
+ it 'initializes default collator' do
66
+ Collator.new.locale.should be_nil
67
+ end
68
68
  end
69
69
 
70
- it 'converts locale' do
71
- Collator.new(:no).locale.should == :nb
70
+ context 'with locale' do
71
+ it 'initialized tailored collator with provided locale' do
72
+ Collator.new(:ru).locale.should == :ru
73
+ end
74
+
75
+ it 'converts locale' do
76
+ Collator.new(:no).locale.should == :nb
77
+ end
72
78
  end
73
79
  end
74
80
 
@@ -101,16 +107,36 @@ describe Collator do
101
107
  let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
102
108
 
103
109
  before(:each) { stub(TrieBuilder).load_trie { trie } }
104
- before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
105
110
 
106
- it 'calculates sort key for a string' do
107
- mock(collator).get_collation_elements(string) { collation_elements }
108
- collator.get_sort_key(string).should == sort_key
111
+ describe 'calculating sort key' do
112
+ before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, nil) { sort_key } }
113
+
114
+ it 'calculates sort key for a string' do
115
+ mock(collator).get_collation_elements(string) { collation_elements }
116
+ collator.get_sort_key(string).should == sort_key
117
+ end
118
+
119
+ it 'calculates sort key for an array of code points (represented as hex strings)' do
120
+ mock(collator).get_collation_elements(code_points_hex) { collation_elements }
121
+ collator.get_sort_key(code_points_hex).should == sort_key
122
+ end
109
123
  end
110
124
 
111
- it 'calculates sort key for an array of code points (represented as hex strings)' do
112
- mock(collator).get_collation_elements(code_points_hex) { collation_elements }
113
- collator.get_sort_key(code_points_hex).should == sort_key
125
+ describe 'uses tailoring options' do
126
+ let(:case_first) { :upper }
127
+ let(:locale) { :uk }
128
+
129
+ it 'passes case-first sort option to sort key builder' do
130
+ mock(TwitterCldr::Collation::TrieBuilder).load_tailored_trie(locale, trie) { Trie.new }
131
+ mock(TwitterCldr::Collation::TrieBuilder).tailoring_data(locale) { { :collator_options => { :case_first => case_first } } }
132
+
133
+ collator = Collator.new(locale)
134
+
135
+ mock(collator).get_collation_elements(code_points_hex) { collation_elements }
136
+ mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, case_first) { sort_key }
137
+
138
+ collator.get_sort_key(code_points_hex).should == sort_key
139
+ end
114
140
  end
115
141
  end
116
142
 
@@ -165,6 +191,82 @@ describe Collator do
165
191
  end
166
192
  end
167
193
 
194
+ describe 'tailoring support' do
195
+ before(:each) do
196
+ stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
197
+ stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
198
+ stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
199
+ end
200
+
201
+ let(:locale) { :some_locale }
202
+ let(:default_collator) { Collator.new }
203
+ let(:tailored_collator) { Collator.new(locale) }
204
+
205
+ describe 'tailoring rules support' do
206
+ it 'tailored collation elements are used' do
207
+ default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
208
+ tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
209
+
210
+ default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
211
+ tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
212
+ end
213
+
214
+ it 'original contractions for tailored elements are applied' do
215
+ default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
216
+ tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
217
+ end
218
+ end
219
+
220
+ describe 'contractions suppressing support' do
221
+ it 'suppressed contractions are ignored' do
222
+ default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
223
+ tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
224
+ end
225
+
226
+ it 'non-suppressed contractions are used' do
227
+ default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
228
+ tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
229
+ end
230
+ end
231
+
232
+ let(:fractional_uca_short_stub) do
233
+ <<END
234
+ # collation elements from default FCE table
235
+ 0301; [, 8D, 05]
236
+ 0306; [, 91, 05]
237
+ 041A; [5C 6C, 05, 8F] # К
238
+ 0413; [5C 1A, 05, 8F] # Г
239
+ 0415; [5C 34, 05, 8F] # Е
240
+
241
+ # tailored (in UK locale) with "Г < ґ <<< Ґ"
242
+ 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
243
+ 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
244
+
245
+ # contraction for a tailored collation element
246
+ 0491 0306; [5C, DB, 09] # ґ̆
247
+
248
+ # contractions suppressed in tailoring (for RU locale)
249
+ 041A 0301; [5C CC, 05, 8F] # Ќ
250
+ 0413 0301; [5C 30, 05, 8F] # Ѓ
251
+
252
+ # contractions non-suppressed in tailoring
253
+ 0415 0306; [5C 36, 05, 8F] # Ӗ
254
+ END
255
+ end
256
+
257
+ let(:tailoring_resource_stub) do
258
+ <<END
259
+ ---
260
+ :tailored_table: ! '0491; [5C1B, 5, 5]
261
+
262
+ 0490; [5C1B, 5, 86]'
263
+ :suppressed_contractions: ГК
264
+ ...
265
+ END
266
+ end
267
+
268
+ end
269
+
168
270
  def mock_sort_key(collator, string, sort_key)
169
271
  mock(collator).get_sort_key(string) { sort_key }
170
272
  end
@@ -17,7 +17,7 @@ describe SortKeyBuilder do
17
17
  it 'returns a sort key for a given array of collation elements' do
18
18
  sort_key = SortKeyBuilder.new(collation_elements)
19
19
 
20
- mock(SortKeyBuilder).new(collation_elements) { sort_key }
20
+ mock(SortKeyBuilder).new(collation_elements, nil) { sort_key }
21
21
  mock(sort_key).bytes_array { sort_key_bytes }
22
22
 
23
23
  SortKeyBuilder.build(collation_elements).should == sort_key_bytes
@@ -28,6 +28,16 @@ describe SortKeyBuilder do
28
28
  it 'assigns collation elements array' do
29
29
  SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
30
30
  end
31
+
32
+ it 'accepts case-first option as the second argument' do
33
+ SortKeyBuilder::VALID_CASE_FIRST_OPTIONS.each do |case_first|
34
+ lambda { SortKeyBuilder.new([], case_first) }.should_not raise_error
35
+ end
36
+ end
37
+
38
+ it 'raises an ArgumentError for invalid case-first option' do
39
+ lambda { SortKeyBuilder.new([], :wat) }.should raise_error(ArgumentError)
40
+ end
31
41
  end
32
42
 
33
43
  describe '#bytes_array' do
@@ -40,40 +50,84 @@ describe SortKeyBuilder do
40
50
  sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
41
51
  end
42
52
 
43
- it 'compresses primary weights' do
44
- SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
45
- [0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
53
+ describe 'primary weights' do
54
+ it 'compresses primary weights' do
55
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
56
+ [0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
46
57
 
47
- SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
48
- [0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
49
- end
58
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
59
+ [0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
60
+ end
50
61
 
51
- it 'works when there is an ignorable primary weight in the middle' do
52
- SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
53
- end
62
+ it 'works when there is an ignorable primary weight in the middle' do
63
+ SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
64
+ end
54
65
 
55
- it 'do not compress single byte primary weights' do
56
- SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
57
- end
66
+ it 'do not compress single byte primary weights' do
67
+ SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
68
+ end
58
69
 
59
- it 'resets primary lead bytes counter after a single byte weight' do
60
- SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
61
- end
70
+ it 'resets primary lead bytes counter after a single byte weight' do
71
+ SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
72
+ end
62
73
 
63
- it 'compresses only allowed primary weights' do
64
- SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
74
+ it 'compresses only compressible primary weights' do
75
+ SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
76
+ end
65
77
  end
66
78
 
67
- it 'compresses secondary weights' do
68
- SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
69
- end
79
+ describe 'secondary weights' do
80
+ it 'compresses secondary weights' do
81
+ SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
82
+ end
70
83
 
71
- it 'compresses tertiary weights' do
72
- SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
84
+ it 'compresses secondary weights into multiple bytes if necessary' do
85
+ SortKeyBuilder.new([[0, 5, 0]] * 100).bytes_array.should == [1, 69, 40, 1]
86
+ end
73
87
  end
74
88
 
75
- it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
76
- SortKeyBuilder.new([[0, 5, 5]] * 100).bytes_array.should == [1, 69, 40, 1, 48, 48, 18]
89
+ describe 'tertiary weights' do
90
+ context 'when case_first is not set' do
91
+ it 'removes case bits and adds top addition to bytes that are greater than common' do
92
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 73], [0, 0, 137], [0, 0, 201]]).bytes_array.should == [1, 1, 137, 137, 137, 137]
93
+ end
94
+
95
+ it 'compresses tertiary weights' do
96
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 0x84, 0xA7, 6]
97
+ end
98
+
99
+ it 'compresses tertiary weights into multiple bytes if necessary' do
100
+ SortKeyBuilder.new([[0, 0, 5]] * 100).bytes_array.should == [1, 1, 0x30, 0x30, 0x12]
101
+ end
102
+ end
103
+
104
+ context 'when case_first is :upper' do
105
+ it 'inverts case bits and subtract bottom addition from bytes that are smaller than common' do
106
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :upper).bytes_array.should == [1, 1, 201, 80, 15]
107
+ end
108
+
109
+ it 'compresses tertiary weights' do
110
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :upper).bytes_array.should == [1, 1, 0xC4, 0xE7, 0xC3]
111
+ end
112
+
113
+ it 'compresses tertiary weights into multiple bytes if necessary' do
114
+ SortKeyBuilder.new([[0, 0, 5]] * 100, :upper).bytes_array.should == [1, 1, 0x9C, 0x9C, 0xB3]
115
+ end
116
+ end
117
+
118
+ context 'when case_first is :lower' do
119
+ it 'leaves case bits and adds top addition to bytes that are greater than common' do
120
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :lower).bytes_array.should == [1, 1, 73, 144, 207]
121
+ end
122
+
123
+ it 'compresses tertiary weights' do
124
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :lower).bytes_array.should == [1, 1, 0x44, 0x67, 6]
125
+ end
126
+
127
+ it 'compresses tertiary weights into multiple bytes if necessary' do
128
+ SortKeyBuilder.new([[0, 0, 5]] * 100, :lower).bytes_array.should == [1, 1, 0x1A, 0x1A, 0x1A, 0x1A, 0x14]
129
+ end
130
+ end
77
131
  end
78
132
  end
79
133
 
@@ -9,82 +9,6 @@ include TwitterCldr::Collation
9
9
 
10
10
  describe 'Unicode collation tailoring' do
11
11
 
12
- describe 'tailoring support' do
13
- before(:each) do
14
- stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
15
- stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
16
- stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
17
- end
18
-
19
- let(:locale) { :some_locale }
20
- let(:default_collator) { Collator.new }
21
- let(:tailored_collator) { Collator.new(locale) }
22
-
23
- describe 'tailoring rules support' do
24
- it 'tailored collation elements are used' do
25
- default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
26
- tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
27
-
28
- default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
29
- tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
30
- end
31
-
32
- it 'original contractions for tailored elements are applied' do
33
- default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
34
- tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
35
- end
36
- end
37
-
38
- describe 'contractions suppressing support' do
39
- it 'suppressed contractions are ignored' do
40
- default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
41
- tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
42
- end
43
-
44
- it 'non-suppressed contractions are used' do
45
- default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
46
- tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
47
- end
48
- end
49
-
50
- let(:fractional_uca_short_stub) do
51
- <<END
52
- # collation elements from default FCE table
53
- 0301; [, 8D, 05]
54
- 0306; [, 91, 05]
55
- 041A; [5C 6C, 05, 8F] # К
56
- 0413; [5C 1A, 05, 8F] # Г
57
- 0415; [5C 34, 05, 8F] # Е
58
-
59
- # tailored (in UK locale) with "Г < ґ <<< Ґ"
60
- 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
61
- 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
62
-
63
- # contraction for a tailored collation element
64
- 0491 0306; [5C, DB, 09] # ґ̆
65
-
66
- # contractions suppressed in tailoring (for RU locale)
67
- 041A 0301; [5C CC, 05, 8F] # Ќ
68
- 0413 0301; [5C 30, 05, 8F] # Ѓ
69
-
70
- # contractions non-suppressed in tailoring
71
- 0415 0306; [5C 36, 05, 8F] # Ӗ
72
- END
73
- end
74
-
75
- let(:tailoring_resource_stub) do
76
- <<END
77
- ---
78
- :tailored_table: ! '0491; [5C1B, 5, 5]
79
-
80
- 0490; [5C1B, 5, 86]'
81
- :suppressed_contractions: ГК
82
- ...
83
- END
84
- end
85
-
86
- end
87
-
88
12
  # Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
89
13
  # Test files format: # - comments, // - pending tests.
90
14
  #