twitter_cldr 1.6.0 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/Gemfile +1 -0
  2. data/History.txt +5 -0
  3. data/js/lib/twitter_cldr_js.rb +2 -0
  4. data/lib/twitter_cldr/collation/collator.rb +8 -3
  5. data/lib/twitter_cldr/collation/sort_key_builder.rb +118 -34
  6. data/lib/twitter_cldr/collation/trie_builder.rb +5 -1
  7. data/lib/twitter_cldr/resources/import/tailoring.rb +14 -5
  8. data/lib/twitter_cldr/version.rb +1 -1
  9. data/resources/collation/tailoring/af.yml +1 -0
  10. data/resources/collation/tailoring/ar.yml +1 -0
  11. data/resources/collation/tailoring/ca.yml +1 -0
  12. data/resources/collation/tailoring/cs.yml +1 -0
  13. data/resources/collation/tailoring/da.yml +2 -0
  14. data/resources/collation/tailoring/de.yml +1 -0
  15. data/resources/collation/tailoring/el.yml +1 -0
  16. data/resources/collation/tailoring/en.yml +1 -0
  17. data/resources/collation/tailoring/es.yml +1 -0
  18. data/resources/collation/tailoring/eu.yml +1 -0
  19. data/resources/collation/tailoring/fa.yml +1 -0
  20. data/resources/collation/tailoring/fi.yml +1 -0
  21. data/resources/collation/tailoring/fil.yml +1 -0
  22. data/resources/collation/tailoring/fr.yml +1 -0
  23. data/resources/collation/tailoring/he.yml +1 -0
  24. data/resources/collation/tailoring/hi.yml +1 -0
  25. data/resources/collation/tailoring/hu.yml +1 -0
  26. data/resources/collation/tailoring/id.yml +1 -0
  27. data/resources/collation/tailoring/it.yml +1 -0
  28. data/resources/collation/tailoring/ja.yml +1 -0
  29. data/resources/collation/tailoring/ko.yml +1 -0
  30. data/resources/collation/tailoring/ms.yml +1 -0
  31. data/resources/collation/tailoring/nb.yml +1 -0
  32. data/resources/collation/tailoring/nl.yml +1 -0
  33. data/resources/collation/tailoring/pl.yml +1 -0
  34. data/resources/collation/tailoring/pt.yml +1 -0
  35. data/resources/collation/tailoring/ru.yml +1 -0
  36. data/resources/collation/tailoring/sv.yml +1 -0
  37. data/resources/collation/tailoring/th.yml +1 -0
  38. data/resources/collation/tailoring/tr.yml +1 -0
  39. data/resources/collation/tailoring/uk.yml +1 -0
  40. data/resources/collation/tailoring/ur.yml +1 -0
  41. data/resources/collation/tailoring/zh-Hant.yml +1 -0
  42. data/resources/collation/tailoring/zh.yml +1 -0
  43. data/spec/collation/collator_spec.rb +118 -16
  44. data/spec/collation/sort_key_builder_spec.rb +79 -25
  45. data/spec/collation/tailoring_spec.rb +0 -76
  46. data/spec/collation/tailoring_tests/da.txt +181 -181
  47. data/spec/collation/trie_builder_spec.rb +26 -12
  48. metadata +3 -3
@@ -56,19 +56,25 @@ describe Collator do
56
56
  end
57
57
 
58
58
  describe '#initialize' do
59
- before(:each) { stub(TrieBuilder).load_trie { trie } }
60
- before(:each) { any_instance_of(Collator) { |c| stub(c).load_trie { trie } } }
61
-
62
- it 'initializes default collator if locale is not specified' do
63
- Collator.new.locale.should be_nil
59
+ before :each do
60
+ stub(TrieBuilder).load_trie { trie }
61
+ any_instance_of(Collator) { |c| stub(c).load_trie { trie } }
64
62
  end
65
63
 
66
- it 'initialized tailored collator if locale is provided' do
67
- Collator.new(:ru).locale.should == :ru
64
+ context 'without locale' do
65
+ it 'initializes default collator' do
66
+ Collator.new.locale.should be_nil
67
+ end
68
68
  end
69
69
 
70
- it 'converts locale' do
71
- Collator.new(:no).locale.should == :nb
70
+ context 'with locale' do
71
+ it 'initialized tailored collator with provided locale' do
72
+ Collator.new(:ru).locale.should == :ru
73
+ end
74
+
75
+ it 'converts locale' do
76
+ Collator.new(:no).locale.should == :nb
77
+ end
72
78
  end
73
79
  end
74
80
 
@@ -101,16 +107,36 @@ describe Collator do
101
107
  let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
102
108
 
103
109
  before(:each) { stub(TrieBuilder).load_trie { trie } }
104
- before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
105
110
 
106
- it 'calculates sort key for a string' do
107
- mock(collator).get_collation_elements(string) { collation_elements }
108
- collator.get_sort_key(string).should == sort_key
111
+ describe 'calculating sort key' do
112
+ before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, nil) { sort_key } }
113
+
114
+ it 'calculates sort key for a string' do
115
+ mock(collator).get_collation_elements(string) { collation_elements }
116
+ collator.get_sort_key(string).should == sort_key
117
+ end
118
+
119
+ it 'calculates sort key for an array of code points (represented as hex strings)' do
120
+ mock(collator).get_collation_elements(code_points_hex) { collation_elements }
121
+ collator.get_sort_key(code_points_hex).should == sort_key
122
+ end
109
123
  end
110
124
 
111
- it 'calculates sort key for an array of code points (represented as hex strings)' do
112
- mock(collator).get_collation_elements(code_points_hex) { collation_elements }
113
- collator.get_sort_key(code_points_hex).should == sort_key
125
+ describe 'uses tailoring options' do
126
+ let(:case_first) { :upper }
127
+ let(:locale) { :uk }
128
+
129
+ it 'passes case-first sort option to sort key builder' do
130
+ mock(TwitterCldr::Collation::TrieBuilder).load_tailored_trie(locale, trie) { Trie.new }
131
+ mock(TwitterCldr::Collation::TrieBuilder).tailoring_data(locale) { { :collator_options => { :case_first => case_first } } }
132
+
133
+ collator = Collator.new(locale)
134
+
135
+ mock(collator).get_collation_elements(code_points_hex) { collation_elements }
136
+ mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements, case_first) { sort_key }
137
+
138
+ collator.get_sort_key(code_points_hex).should == sort_key
139
+ end
114
140
  end
115
141
  end
116
142
 
@@ -165,6 +191,82 @@ describe Collator do
165
191
  end
166
192
  end
167
193
 
194
+ describe 'tailoring support' do
195
+ before(:each) do
196
+ stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
197
+ stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
198
+ stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
199
+ end
200
+
201
+ let(:locale) { :some_locale }
202
+ let(:default_collator) { Collator.new }
203
+ let(:tailored_collator) { Collator.new(locale) }
204
+
205
+ describe 'tailoring rules support' do
206
+ it 'tailored collation elements are used' do
207
+ default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
208
+ tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
209
+
210
+ default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
211
+ tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
212
+ end
213
+
214
+ it 'original contractions for tailored elements are applied' do
215
+ default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
216
+ tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
217
+ end
218
+ end
219
+
220
+ describe 'contractions suppressing support' do
221
+ it 'suppressed contractions are ignored' do
222
+ default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
223
+ tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
224
+ end
225
+
226
+ it 'non-suppressed contractions are used' do
227
+ default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
228
+ tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
229
+ end
230
+ end
231
+
232
+ let(:fractional_uca_short_stub) do
233
+ <<END
234
+ # collation elements from default FCE table
235
+ 0301; [, 8D, 05]
236
+ 0306; [, 91, 05]
237
+ 041A; [5C 6C, 05, 8F] # К
238
+ 0413; [5C 1A, 05, 8F] # Г
239
+ 0415; [5C 34, 05, 8F] # Е
240
+
241
+ # tailored (in UK locale) with "Г < ґ <<< Ґ"
242
+ 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
243
+ 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
244
+
245
+ # contraction for a tailored collation element
246
+ 0491 0306; [5C, DB, 09] # ґ̆
247
+
248
+ # contractions suppressed in tailoring (for RU locale)
249
+ 041A 0301; [5C CC, 05, 8F] # Ќ
250
+ 0413 0301; [5C 30, 05, 8F] # Ѓ
251
+
252
+ # contractions non-suppressed in tailoring
253
+ 0415 0306; [5C 36, 05, 8F] # Ӗ
254
+ END
255
+ end
256
+
257
+ let(:tailoring_resource_stub) do
258
+ <<END
259
+ ---
260
+ :tailored_table: ! '0491; [5C1B, 5, 5]
261
+
262
+ 0490; [5C1B, 5, 86]'
263
+ :suppressed_contractions: ГК
264
+ ...
265
+ END
266
+ end
267
+
268
+ end
269
+
168
270
  def mock_sort_key(collator, string, sort_key)
169
271
  mock(collator).get_sort_key(string) { sort_key }
170
272
  end
@@ -17,7 +17,7 @@ describe SortKeyBuilder do
17
17
  it 'returns a sort key for a given array of collation elements' do
18
18
  sort_key = SortKeyBuilder.new(collation_elements)
19
19
 
20
- mock(SortKeyBuilder).new(collation_elements) { sort_key }
20
+ mock(SortKeyBuilder).new(collation_elements, nil) { sort_key }
21
21
  mock(sort_key).bytes_array { sort_key_bytes }
22
22
 
23
23
  SortKeyBuilder.build(collation_elements).should == sort_key_bytes
@@ -28,6 +28,16 @@ describe SortKeyBuilder do
28
28
  it 'assigns collation elements array' do
29
29
  SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
30
30
  end
31
+
32
+ it 'accepts case-first option as the second argument' do
33
+ SortKeyBuilder::VALID_CASE_FIRST_OPTIONS.each do |case_first|
34
+ lambda { SortKeyBuilder.new([], case_first) }.should_not raise_error
35
+ end
36
+ end
37
+
38
+ it 'raises an ArgumentError for invalid case-first option' do
39
+ lambda { SortKeyBuilder.new([], :wat) }.should raise_error(ArgumentError)
40
+ end
31
41
  end
32
42
 
33
43
  describe '#bytes_array' do
@@ -40,40 +50,84 @@ describe SortKeyBuilder do
40
50
  sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
41
51
  end
42
52
 
43
- it 'compresses primary weights' do
44
- SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
45
- [0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
53
+ describe 'primary weights' do
54
+ it 'compresses primary weights' do
55
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
56
+ [0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
46
57
 
47
- SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
48
- [0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
49
- end
58
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
59
+ [0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
60
+ end
50
61
 
51
- it 'works when there is an ignorable primary weight in the middle' do
52
- SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
53
- end
62
+ it 'works when there is an ignorable primary weight in the middle' do
63
+ SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
64
+ end
54
65
 
55
- it 'do not compress single byte primary weights' do
56
- SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
57
- end
66
+ it 'do not compress single byte primary weights' do
67
+ SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
68
+ end
58
69
 
59
- it 'resets primary lead bytes counter after a single byte weight' do
60
- SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
61
- end
70
+ it 'resets primary lead bytes counter after a single byte weight' do
71
+ SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
72
+ end
62
73
 
63
- it 'compresses only allowed primary weights' do
64
- SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
74
+ it 'compresses only compressible primary weights' do
75
+ SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
76
+ end
65
77
  end
66
78
 
67
- it 'compresses secondary weights' do
68
- SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
69
- end
79
+ describe 'secondary weights' do
80
+ it 'compresses secondary weights' do
81
+ SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
82
+ end
70
83
 
71
- it 'compresses tertiary weights' do
72
- SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
84
+ it 'compresses secondary weights into multiple bytes if necessary' do
85
+ SortKeyBuilder.new([[0, 5, 0]] * 100).bytes_array.should == [1, 69, 40, 1]
86
+ end
73
87
  end
74
88
 
75
- it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
76
- SortKeyBuilder.new([[0, 5, 5]] * 100).bytes_array.should == [1, 69, 40, 1, 48, 48, 18]
89
+ describe 'tertiary weights' do
90
+ context 'when case_first is not set' do
91
+ it 'removes case bits and adds top addition to bytes that are greater than common' do
92
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 73], [0, 0, 137], [0, 0, 201]]).bytes_array.should == [1, 1, 137, 137, 137, 137]
93
+ end
94
+
95
+ it 'compresses tertiary weights' do
96
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 0x84, 0xA7, 6]
97
+ end
98
+
99
+ it 'compresses tertiary weights into multiple bytes if necessary' do
100
+ SortKeyBuilder.new([[0, 0, 5]] * 100).bytes_array.should == [1, 1, 0x30, 0x30, 0x12]
101
+ end
102
+ end
103
+
104
+ context 'when case_first is :upper' do
105
+ it 'inverts case bits and subtract bottom addition from bytes that are smaller than common' do
106
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :upper).bytes_array.should == [1, 1, 201, 80, 15]
107
+ end
108
+
109
+ it 'compresses tertiary weights' do
110
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :upper).bytes_array.should == [1, 1, 0xC4, 0xE7, 0xC3]
111
+ end
112
+
113
+ it 'compresses tertiary weights into multiple bytes if necessary' do
114
+ SortKeyBuilder.new([[0, 0, 5]] * 100, :upper).bytes_array.should == [1, 1, 0x9C, 0x9C, 0xB3]
115
+ end
116
+ end
117
+
118
+ context 'when case_first is :lower' do
119
+ it 'leaves case bits and adds top addition to bytes that are greater than common' do
120
+ SortKeyBuilder.new([[0, 0, 9], [0, 0, 80], [0, 0, 143]], :lower).bytes_array.should == [1, 1, 73, 144, 207]
121
+ end
122
+
123
+ it 'compresses tertiary weights' do
124
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]], :lower).bytes_array.should == [1, 1, 0x44, 0x67, 6]
125
+ end
126
+
127
+ it 'compresses tertiary weights into multiple bytes if necessary' do
128
+ SortKeyBuilder.new([[0, 0, 5]] * 100, :lower).bytes_array.should == [1, 1, 0x1A, 0x1A, 0x1A, 0x1A, 0x14]
129
+ end
130
+ end
77
131
  end
78
132
  end
79
133
 
@@ -9,82 +9,6 @@ include TwitterCldr::Collation
9
9
 
10
10
  describe 'Unicode collation tailoring' do
11
11
 
12
- describe 'tailoring support' do
13
- before(:each) do
14
- stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
15
- stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
16
- stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
17
- end
18
-
19
- let(:locale) { :some_locale }
20
- let(:default_collator) { Collator.new }
21
- let(:tailored_collator) { Collator.new(locale) }
22
-
23
- describe 'tailoring rules support' do
24
- it 'tailored collation elements are used' do
25
- default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
26
- tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
27
-
28
- default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
29
- tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
30
- end
31
-
32
- it 'original contractions for tailored elements are applied' do
33
- default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
34
- tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
35
- end
36
- end
37
-
38
- describe 'contractions suppressing support' do
39
- it 'suppressed contractions are ignored' do
40
- default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
41
- tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
42
- end
43
-
44
- it 'non-suppressed contractions are used' do
45
- default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
46
- tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
47
- end
48
- end
49
-
50
- let(:fractional_uca_short_stub) do
51
- <<END
52
- # collation elements from default FCE table
53
- 0301; [, 8D, 05]
54
- 0306; [, 91, 05]
55
- 041A; [5C 6C, 05, 8F] # К
56
- 0413; [5C 1A, 05, 8F] # Г
57
- 0415; [5C 34, 05, 8F] # Е
58
-
59
- # tailored (in UK locale) with "Г < ґ <<< Ґ"
60
- 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
61
- 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
62
-
63
- # contraction for a tailored collation element
64
- 0491 0306; [5C, DB, 09] # ґ̆
65
-
66
- # contractions suppressed in tailoring (for RU locale)
67
- 041A 0301; [5C CC, 05, 8F] # Ќ
68
- 0413 0301; [5C 30, 05, 8F] # Ѓ
69
-
70
- # contractions non-suppressed in tailoring
71
- 0415 0306; [5C 36, 05, 8F] # Ӗ
72
- END
73
- end
74
-
75
- let(:tailoring_resource_stub) do
76
- <<END
77
- ---
78
- :tailored_table: ! '0491; [5C1B, 5, 5]
79
-
80
- 0490; [5C1B, 5, 86]'
81
- :suppressed_contractions: ГК
82
- ...
83
- END
84
- end
85
-
86
- end
87
-
88
12
  # Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
89
13
  # Test files format: # - comments, // - pending tests.
90
14
  #