twitter_cldr 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +32 -0
- data/History.txt +78 -0
- data/README.md +72 -62
- data/Rakefile +22 -0
- data/js/lib/compiler.rb +40 -0
- data/js/lib/mustache/bundle.coffee +14 -0
- data/js/lib/mustache/calendars/datetime.coffee +240 -0
- data/js/lib/mustache/calendars/timespan.coffee +52 -0
- data/js/lib/mustache/plurals/rules.coffee +14 -0
- data/js/lib/renderers/base.rb +18 -0
- data/js/lib/renderers/bundle.rb +18 -0
- data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
- data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
- data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
- data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
- data/js/lib/twitter_cldr_js.rb +85 -0
- data/js/spec/js/calendars/datetime_spec.js +418 -0
- data/js/spec/js/calendars/timespan_spec.js +91 -0
- data/js/spec/js/plurals/plural_rules_spec.js +28 -0
- data/js/spec/js/support/jasmine.yml +8 -0
- data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
- data/js/spec/rb/spec_helper.rb +13 -0
- data/lib/twitter_cldr.rb +2 -1
- data/lib/twitter_cldr/collation.rb +2 -1
- data/lib/twitter_cldr/collation/collator.rb +49 -31
- data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
- data/lib/twitter_cldr/collation/trie.rb +116 -24
- data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
- data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
- data/lib/twitter_cldr/core_ext/array.rb +14 -1
- data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
- data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
- data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
- data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
- data/lib/twitter_cldr/resources.rb +11 -0
- data/lib/twitter_cldr/resources/import.rb +12 -0
- data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
- data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
- data/lib/twitter_cldr/shared.rb +0 -1
- data/lib/twitter_cldr/tokenizers/base.rb +9 -9
- data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
- data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
- data/lib/twitter_cldr/utils.rb +11 -0
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +3 -0
- data/resources/collation/tailoring/ar.yml +21 -0
- data/resources/collation/tailoring/ca.yml +9 -0
- data/resources/collation/tailoring/cs.yml +25 -0
- data/resources/collation/tailoring/da.yml +59 -0
- data/resources/collation/tailoring/de.yml +3 -0
- data/resources/collation/tailoring/el.yml +3 -0
- data/resources/collation/tailoring/en.yml +3 -0
- data/resources/collation/tailoring/es.yml +5 -0
- data/resources/collation/tailoring/eu.yml +3 -0
- data/resources/collation/tailoring/fa.yml +73 -0
- data/resources/collation/tailoring/fi.yml +61 -0
- data/resources/collation/tailoring/fil.yml +11 -0
- data/resources/collation/tailoring/fr.yml +3 -0
- data/resources/collation/tailoring/he.yml +3 -0
- data/resources/collation/tailoring/hi.yml +7 -0
- data/resources/collation/tailoring/hu.yml +125 -0
- data/resources/collation/tailoring/id.yml +3 -0
- data/resources/collation/tailoring/it.yml +3 -0
- data/resources/collation/tailoring/ja.yml +14647 -0
- data/resources/collation/tailoring/ko.yml +14953 -0
- data/resources/collation/tailoring/ms.yml +3 -0
- data/resources/collation/tailoring/nb.yml +59 -0
- data/resources/collation/tailoring/nl.yml +3 -0
- data/resources/collation/tailoring/pl.yml +37 -0
- data/resources/collation/tailoring/pt.yml +3 -0
- data/resources/collation/tailoring/ru.yml +3 -0
- data/resources/collation/tailoring/sv.yml +63 -0
- data/resources/collation/tailoring/th.yml +19 -0
- data/resources/collation/tailoring/tr.yml +27 -0
- data/resources/collation/tailoring/uk.yml +5 -0
- data/resources/collation/tailoring/ur.yml +163 -0
- data/resources/collation/tailoring/zh-Hant.yml +3 -0
- data/resources/collation/tailoring/zh.yml +149 -0
- data/resources/custom/locales/af/units.yml +19 -0
- data/resources/custom/locales/ar/units.yml +35 -0
- data/resources/custom/locales/ca/units.yml +19 -0
- data/resources/custom/locales/cs/units.yml +23 -0
- data/resources/custom/locales/da/units.yml +19 -0
- data/resources/custom/locales/de/units.yml +19 -0
- data/resources/custom/locales/el/units.yml +19 -0
- data/resources/custom/locales/en/units.yml +18 -0
- data/resources/custom/locales/es/units.yml +19 -0
- data/resources/custom/locales/eu/units.yml +19 -0
- data/resources/custom/locales/fa/units.yml +15 -0
- data/resources/custom/locales/fi/units.yml +19 -0
- data/resources/custom/locales/fil/units.yml +19 -0
- data/resources/custom/locales/fr/units.yml +19 -0
- data/resources/custom/locales/he/units.yml +19 -0
- data/resources/custom/locales/hi/units.yml +19 -0
- data/resources/custom/locales/hu/units.yml +15 -0
- data/resources/custom/locales/id/units.yml +15 -0
- data/resources/custom/locales/it/units.yml +19 -0
- data/resources/custom/locales/ja/units.yml +15 -0
- data/resources/custom/locales/ko/units.yml +15 -0
- data/resources/custom/locales/ms/units.yml +15 -0
- data/resources/custom/locales/nb/units.yml +19 -0
- data/resources/custom/locales/nl/units.yml +19 -0
- data/resources/custom/locales/pl/units.yml +23 -0
- data/resources/custom/locales/pt/units.yml +19 -0
- data/resources/custom/locales/ru/units.yml +27 -0
- data/resources/custom/locales/sv/units.yml +19 -0
- data/resources/custom/locales/th/units.yml +15 -0
- data/resources/custom/locales/tr/units.yml +15 -0
- data/resources/custom/locales/uk/units.yml +27 -0
- data/resources/custom/locales/ur/units.yml +19 -0
- data/resources/custom/locales/zh-Hant/units.yml +15 -0
- data/resources/custom/locales/zh/units.yml +15 -0
- data/resources/locales/af/units.yml +112 -65
- data/resources/locales/ar/units.yml +196 -126
- data/resources/locales/ca/units.yml +112 -70
- data/resources/locales/cs/units.yml +140 -91
- data/resources/locales/da/units.yml +98 -56
- data/resources/locales/de/units.yml +112 -70
- data/resources/locales/el/units.yml +119 -84
- data/resources/locales/en/units.yml +84 -42
- data/resources/locales/es/units.yml +112 -70
- data/resources/locales/eu/units.yml +105 -68
- data/resources/locales/fa/units.yml +98 -63
- data/resources/locales/fi/units.yml +112 -70
- data/resources/locales/fil/units.yml +98 -56
- data/resources/locales/fr/units.yml +112 -70
- data/resources/locales/he/units.yml +98 -56
- data/resources/locales/hi/units.yml +98 -56
- data/resources/locales/hu/units.yml +84 -49
- data/resources/locales/id/units.yml +84 -49
- data/resources/locales/it/units.yml +98 -56
- data/resources/locales/ja/units.yml +84 -49
- data/resources/locales/ko/units.yml +84 -49
- data/resources/locales/ms/units.yml +112 -63
- data/resources/locales/nb/units.yml +106 -64
- data/resources/locales/nl/units.yml +98 -56
- data/resources/locales/pl/units.yml +181 -112
- data/resources/locales/pt/units.yml +112 -70
- data/resources/locales/ru/units.yml +168 -112
- data/resources/locales/sv/units.yml +112 -70
- data/resources/locales/th/units.yml +84 -49
- data/resources/locales/tr/units.yml +84 -49
- data/resources/locales/uk/units.yml +168 -112
- data/resources/locales/ur/units.yml +112 -63
- data/resources/locales/zh-Hant/units.yml +84 -49
- data/resources/locales/zh/units.yml +84 -49
- data/spec/collation/collation_spec.rb +1 -1
- data/spec/collation/collator_spec.rb +120 -48
- data/spec/collation/sort_key_builder_spec.rb +80 -0
- data/spec/collation/tailoring_spec.rb +137 -0
- data/spec/collation/tailoring_tests/af.txt +321 -0
- data/spec/collation/tailoring_tests/ar.txt +188 -0
- data/spec/collation/tailoring_tests/ca.txt +446 -0
- data/spec/collation/tailoring_tests/cs.txt +273 -0
- data/spec/collation/tailoring_tests/da.txt +293 -0
- data/spec/collation/tailoring_tests/de.txt +414 -0
- data/spec/collation/tailoring_tests/el.txt +228 -0
- data/spec/collation/tailoring_tests/en.txt +399 -0
- data/spec/collation/tailoring_tests/es.txt +402 -0
- data/spec/collation/tailoring_tests/eu.txt +183 -0
- data/spec/collation/tailoring_tests/fa.txt +263 -0
- data/spec/collation/tailoring_tests/fi.txt +389 -0
- data/spec/collation/tailoring_tests/fil.txt +279 -0
- data/spec/collation/tailoring_tests/fr.txt +363 -0
- data/spec/collation/tailoring_tests/he.txt +167 -0
- data/spec/collation/tailoring_tests/hi.txt +230 -0
- data/spec/collation/tailoring_tests/hu.txt +773 -0
- data/spec/collation/tailoring_tests/id.txt +171 -0
- data/spec/collation/tailoring_tests/it.txt +231 -0
- data/spec/collation/tailoring_tests/ja.txt +4287 -0
- data/spec/collation/tailoring_tests/ko.txt +1761 -0
- data/spec/collation/tailoring_tests/ms.txt +531 -0
- data/spec/collation/tailoring_tests/nb.txt +375 -0
- data/spec/collation/tailoring_tests/nl.txt +273 -0
- data/spec/collation/tailoring_tests/pl.txt +225 -0
- data/spec/collation/tailoring_tests/pt.txt +405 -0
- data/spec/collation/tailoring_tests/ru.txt +213 -0
- data/spec/collation/tailoring_tests/sv.txt +353 -0
- data/spec/collation/tailoring_tests/th.txt +239 -0
- data/spec/collation/tailoring_tests/tr.txt +414 -0
- data/spec/collation/tailoring_tests/uk.txt +218 -0
- data/spec/collation/tailoring_tests/ur.txt +284 -0
- data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
- data/spec/collation/tailoring_tests/zh.txt +717 -0
- data/spec/collation/trie_builder_spec.rb +131 -51
- data/spec/collation/trie_spec.rb +301 -26
- data/spec/collation/trie_with_fallback_spec.rb +41 -0
- data/spec/core_ext/array_spec.rb +46 -3
- data/spec/core_ext/calendars/date_spec.rb +24 -24
- data/spec/core_ext/calendars/datetime_spec.rb +7 -0
- data/spec/core_ext/calendars/time_spec.rb +2 -2
- data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
- data/spec/formatters/plurals/rules_spec.rb +3 -11
- data/spec/readme_spec.rb +15 -15
- data/spec/resources/loader_spec.rb +94 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
- data/spec/twitter_cldr_spec.rb +3 -3
- data/spec/utils_spec.rb +38 -0
- data/twitter_cldr.gemspec +25 -0
- metadata +156 -110
- data/spec/collation/sort_key_spec.rb +0 -56
- data/spec/shared/resources_spec.rb +0 -75
@@ -38,7 +38,7 @@ describe 'Unicode Collation Algorithm' do
|
|
38
38
|
current_code_points = $1.split
|
39
39
|
current_hex_code_points = current_code_points.map { |cp| cp.to_i(16) }
|
40
40
|
|
41
|
-
current_sort_key = collator.
|
41
|
+
current_sort_key = collator.get_sort_key(current_code_points)
|
42
42
|
|
43
43
|
if previous_sort_key
|
44
44
|
result = (previous_sort_key <=> current_sort_key).nonzero? || (previous_hex_code_points <=> current_hex_code_points)
|
@@ -9,58 +9,108 @@ include TwitterCldr::Collation
|
|
9
9
|
|
10
10
|
describe Collator do
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
12
|
+
let(:trie) { Trie.new }
|
13
|
+
|
14
|
+
before(:each) { clear_fce_tries_cache }
|
15
|
+
after(:all) { clear_fce_tries_cache }
|
16
|
+
|
17
|
+
describe '.default_fce_trie' do
|
18
|
+
before(:each) do
|
19
|
+
clear_default_fce_trie_cache
|
20
|
+
mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { trie }
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns default fractional collation elements trie' do
|
24
|
+
Collator.default_fce_trie.should == trie
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'loads the trie only once' do
|
28
|
+
Collator.default_fce_trie.object_id.should == Collator.default_fce_trie.object_id
|
29
|
+
end
|
15
30
|
|
16
|
-
|
17
|
-
|
31
|
+
it 'locks the trie' do
|
32
|
+
Collator.default_fce_trie.should be_locked
|
33
|
+
end
|
18
34
|
end
|
19
35
|
|
20
|
-
describe '.
|
21
|
-
|
22
|
-
|
23
|
-
|
36
|
+
describe '.tailored_fce_trie' do
|
37
|
+
let(:locale) { :ru }
|
38
|
+
|
39
|
+
before(:each) do
|
40
|
+
clear_tailored_fce_tries_cache
|
41
|
+
stub(Collator).default_fce_trie { trie }
|
42
|
+
mock(TrieBuilder).load_tailored_trie(locale, Collator.default_fce_trie) { trie }
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'returns default fractional collation elements trie' do
|
46
|
+
Collator.tailored_fce_trie(locale).should == trie
|
24
47
|
end
|
25
48
|
|
26
49
|
it 'loads the trie only once' do
|
27
|
-
|
50
|
+
Collator.tailored_fce_trie(locale).object_id.should == Collator.tailored_fce_trie(locale).object_id
|
51
|
+
end
|
28
52
|
|
29
|
-
|
53
|
+
it 'locks the trie' do
|
54
|
+
Collator.tailored_fce_trie(locale).should be_locked
|
30
55
|
end
|
31
56
|
end
|
32
57
|
|
33
|
-
describe '#
|
34
|
-
|
35
|
-
|
36
|
-
|
58
|
+
describe '#initialize' do
|
59
|
+
before(:each) { stub(TrieBuilder).load_trie { trie } }
|
60
|
+
before(:each) { any_instance_of(Collator) { |c| stub(c).load_trie { trie } } }
|
61
|
+
|
62
|
+
it 'initializes default collator if locale is not specified' do
|
63
|
+
Collator.new.locale.should be_nil
|
37
64
|
end
|
38
65
|
|
39
|
-
it '
|
40
|
-
|
66
|
+
it 'initialized tailored collator if locale is provided' do
|
67
|
+
Collator.new(:ru).locale.should == :ru
|
68
|
+
end
|
41
69
|
|
42
|
-
|
43
|
-
|
70
|
+
it 'converts locale' do
|
71
|
+
Collator.new(:no).locale.should == :nb
|
44
72
|
end
|
45
73
|
end
|
46
74
|
|
47
|
-
describe '#
|
48
|
-
let(:collator)
|
49
|
-
let(:string)
|
50
|
-
let(:code_points_hex)
|
51
|
-
let(:code_points)
|
52
|
-
let(:
|
75
|
+
describe '#get_collation_elements' do
|
76
|
+
let(:collator) { Collator.new }
|
77
|
+
let(:string) { 'abc' }
|
78
|
+
let(:code_points_hex) { %w[0061 0062 0063] }
|
79
|
+
let(:code_points) { code_points_hex.map { |cp| cp.to_i(16) } }
|
80
|
+
let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
|
53
81
|
|
54
|
-
before
|
82
|
+
before :each do
|
83
|
+
mock(TwitterCldr::Normalization::NFD).normalize_code_points(code_points_hex) { code_points_hex }
|
84
|
+
stub(TwitterCldr::Normalization::Base).combining_class_for { 0 }
|
85
|
+
end
|
86
|
+
|
87
|
+
it 'returns collation elements for a string' do
|
88
|
+
collator.get_collation_elements(string).should == collation_elements
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'returns collation elements for an array of code points (represented as hex strings)' do
|
92
|
+
collator.get_collation_elements(code_points_hex).should == collation_elements
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe '#get_sort_key' do
|
97
|
+
let(:collator) { Collator.new }
|
98
|
+
let(:string) { 'abc' }
|
99
|
+
let(:code_points_hex) { %w[0061 0062 0063] }
|
100
|
+
let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
|
101
|
+
let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
|
102
|
+
|
103
|
+
before(:each) { stub(TrieBuilder).load_trie { trie } }
|
104
|
+
before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
|
55
105
|
|
56
106
|
it 'calculates sort key for a string' do
|
57
|
-
mock(
|
58
|
-
collator.
|
107
|
+
mock(collator).get_collation_elements(string) { collation_elements }
|
108
|
+
collator.get_sort_key(string).should == sort_key
|
59
109
|
end
|
60
110
|
|
61
111
|
it 'calculates sort key for an array of code points (represented as hex strings)' do
|
62
|
-
|
63
|
-
collator.
|
112
|
+
mock(collator).get_collation_elements(code_points_hex) { collation_elements }
|
113
|
+
collator.get_sort_key(code_points_hex).should == sort_key
|
64
114
|
end
|
65
115
|
end
|
66
116
|
|
@@ -69,6 +119,8 @@ describe Collator do
|
|
69
119
|
let(:sort_key) { [1, 3, 8, 9] }
|
70
120
|
let(:another_sort_key) { [6, 8, 9, 2] }
|
71
121
|
|
122
|
+
before(:each) { stub(Collator).default_fce_trie { trie } }
|
123
|
+
|
72
124
|
it 'compares strings by sort keys' do
|
73
125
|
stub_sort_key(collator, 'foo', sort_key)
|
74
126
|
stub_sort_key(collator, 'bar', another_sort_key)
|
@@ -77,41 +129,61 @@ describe Collator do
|
|
77
129
|
collator.compare('bar', 'foo').should == 1
|
78
130
|
end
|
79
131
|
|
80
|
-
it 'returns 0 without computing sort keys if strings are equal' do
|
81
|
-
dont_allow(collator).
|
132
|
+
it 'returns 0 without computing sort keys if the strings are equal' do
|
133
|
+
dont_allow(collator).get_sort_key
|
82
134
|
|
83
135
|
collator.compare('foo', 'foo').should == 0
|
84
136
|
end
|
137
|
+
end
|
85
138
|
|
86
|
-
|
87
|
-
|
139
|
+
describe 'sorting' do
|
140
|
+
let(:collator) { Collator.new }
|
141
|
+
let(:sort_keys) { [['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]] }
|
142
|
+
let(:array) { %w[bca aaa abc] }
|
143
|
+
let(:sorted) { %w[aaa abc bca] }
|
88
144
|
|
89
|
-
|
145
|
+
before :each do
|
146
|
+
stub(Collator).default_fce_trie { trie }
|
147
|
+
sort_keys.each { |s, key| mock_sort_key(collator, s, key) }
|
90
148
|
end
|
91
|
-
end
|
92
149
|
|
93
|
-
|
94
|
-
|
150
|
+
describe '#sort' do
|
151
|
+
it 'sorts strings by sort keys' do
|
152
|
+
collator.sort(array).should == sorted
|
153
|
+
end
|
95
154
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
collator.sort(%w[bca aaa abc]).should == %w[aaa abc bca]
|
155
|
+
it 'does not change the original array' do
|
156
|
+
lambda { collator.sort(array) }.should_not change { array }
|
157
|
+
end
|
100
158
|
end
|
101
159
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
160
|
+
describe '#sort!' do
|
161
|
+
it 'sorts strings array by sort keys in-place ' do
|
162
|
+
collator.sort!(array)
|
163
|
+
array.should == sorted
|
164
|
+
end
|
106
165
|
end
|
107
166
|
end
|
108
167
|
|
109
168
|
def mock_sort_key(collator, string, sort_key)
|
110
|
-
mock(collator).
|
169
|
+
mock(collator).get_sort_key(string) { sort_key }
|
111
170
|
end
|
112
171
|
|
113
172
|
def stub_sort_key(collator, string, sort_key)
|
114
|
-
stub(collator).
|
173
|
+
stub(collator).get_sort_key(string) { sort_key }
|
174
|
+
end
|
175
|
+
|
176
|
+
def clear_fce_tries_cache
|
177
|
+
clear_default_fce_trie_cache
|
178
|
+
clear_tailored_fce_tries_cache
|
179
|
+
end
|
180
|
+
|
181
|
+
def clear_default_fce_trie_cache
|
182
|
+
Collator.instance_variable_set(:@default_fce_trie, nil)
|
183
|
+
end
|
184
|
+
|
185
|
+
def clear_tailored_fce_tries_cache
|
186
|
+
Collator.instance_variable_set(:@tailored_fce_tries_cache, nil)
|
115
187
|
end
|
116
188
|
|
117
189
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe SortKeyBuilder do
|
11
|
+
|
12
|
+
let(:sort_key) { SortKeyBuilder.new(collation_elements) }
|
13
|
+
let(:collation_elements) { [[63, 13, 149], [66, 81, 143]] }
|
14
|
+
let(:sort_key_bytes) { [63, 66, 1, 13, 81, 1, 149, 143] }
|
15
|
+
|
16
|
+
describe '.build' do
|
17
|
+
it 'returns a sort key for a given array of collation elements' do
|
18
|
+
sort_key = SortKeyBuilder.new(collation_elements)
|
19
|
+
|
20
|
+
mock(SortKeyBuilder).new(collation_elements) { sort_key }
|
21
|
+
mock(sort_key).bytes_array { sort_key_bytes }
|
22
|
+
|
23
|
+
SortKeyBuilder.build(collation_elements).should == sort_key_bytes
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe '#initialize' do
|
28
|
+
it 'assigns collation elements array' do
|
29
|
+
SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe '#bytes_array' do
|
34
|
+
it 'builds sort key bytes' do
|
35
|
+
sort_key.bytes_array.should == sort_key_bytes
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'builds bytes array only once' do
|
39
|
+
mock(sort_key).build_bytes_array { sort_key_bytes }
|
40
|
+
sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'compresses primary weights' do
|
44
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
45
|
+
[0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
|
46
|
+
|
47
|
+
SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
|
48
|
+
[0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'works when there is an ignorable primary weight in the middle' do
|
52
|
+
SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'do not compress single byte primary weights' do
|
56
|
+
SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'resets primary lead bytes counter after a single byte weight' do
|
60
|
+
SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'compresses only allowed primary weights' do
|
64
|
+
SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'compresses secondary weights' do
|
68
|
+
SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'compresses tertiary weights' do
|
72
|
+
SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
|
73
|
+
end
|
74
|
+
|
75
|
+
it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
|
76
|
+
SortKeyBuilder.new([[0, 5, 5]] * 100).bytes_array.should == [1, 69, 40, 1, 48, 48, 18]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'spec_helper'
|
7
|
+
|
8
|
+
include TwitterCldr::Collation
|
9
|
+
|
10
|
+
describe 'Unicode collation tailoring' do
|
11
|
+
|
12
|
+
describe 'tailoring support' do
|
13
|
+
before(:each) do
|
14
|
+
stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
|
15
|
+
stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
|
16
|
+
stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
|
17
|
+
end
|
18
|
+
|
19
|
+
let(:locale) { :some_locale }
|
20
|
+
let(:default_collator) { Collator.new }
|
21
|
+
let(:tailored_collator) { Collator.new(locale) }
|
22
|
+
|
23
|
+
describe 'tailoring rules support' do
|
24
|
+
it 'tailored collation elements are used' do
|
25
|
+
default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
|
26
|
+
tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
|
27
|
+
|
28
|
+
default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
|
29
|
+
tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'original contractions for tailored elements are applied' do
|
33
|
+
default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
34
|
+
tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
describe 'contractions suppressing support' do
|
39
|
+
it 'suppressed contractions are ignored' do
|
40
|
+
default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
|
41
|
+
tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'non-suppressed contractions are used' do
|
45
|
+
default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
46
|
+
tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
let(:fractional_uca_short_stub) do
|
51
|
+
<<END
|
52
|
+
# collation elements from default FCE table
|
53
|
+
0301; [, 8D, 05]
|
54
|
+
0306; [, 91, 05]
|
55
|
+
041A; [5C 6C, 05, 8F] # К
|
56
|
+
0413; [5C 1A, 05, 8F] # Г
|
57
|
+
0415; [5C 34, 05, 8F] # Е
|
58
|
+
|
59
|
+
# tailored (in UK locale) with "Г < ґ <<< Ґ"
|
60
|
+
0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
|
61
|
+
0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
|
62
|
+
|
63
|
+
# contraction for a tailored collation element
|
64
|
+
0491 0306; [5C, DB, 09] # ґ̆
|
65
|
+
|
66
|
+
# contractions suppressed in tailoring (for RU locale)
|
67
|
+
041A 0301; [5C CC, 05, 8F] # Ќ
|
68
|
+
0413 0301; [5C 30, 05, 8F] # Ѓ
|
69
|
+
|
70
|
+
# contractions non-suppressed in tailoring
|
71
|
+
0415 0306; [5C 36, 05, 8F] # Ӗ
|
72
|
+
END
|
73
|
+
end
|
74
|
+
|
75
|
+
let(:tailoring_resource_stub) do
|
76
|
+
<<END
|
77
|
+
---
|
78
|
+
:tailored_table: ! '0491; [5C1B, 5, 5]
|
79
|
+
|
80
|
+
0490; [5C1B, 5, 86]'
|
81
|
+
:suppressed_contractions: ГК
|
82
|
+
...
|
83
|
+
END
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
# Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
|
89
|
+
# Test files format: # - comments, // - pending tests.
|
90
|
+
#
|
91
|
+
it 'passes tailoring test for each supported locale', :slow => true do
|
92
|
+
TwitterCldr.supported_locales.each do |locale|
|
93
|
+
collator = Collator.new(locale)
|
94
|
+
|
95
|
+
print "#{locale}\t-\t"
|
96
|
+
|
97
|
+
lines = open(File.join(File.dirname(__FILE__), 'tailoring_tests', "#{locale}.txt")) { |f| f.lines.map(&:strip) }
|
98
|
+
|
99
|
+
active_tests = lines.count(&method(:tailoring_test?))
|
100
|
+
pending_tests = lines.count(&method(:pending_tailoring_test?))
|
101
|
+
print "tests: %-4d active, %5.1f%% %5s pending\t-\t" % [active_tests, (100.0 * pending_tests / (pending_tests + active_tests)), "(#{pending_tests})"]
|
102
|
+
|
103
|
+
last_number = last = nil
|
104
|
+
|
105
|
+
failures = lines.each_with_index.inject([]) do |memo, (current, number)|
|
106
|
+
if tailoring_test?(current)
|
107
|
+
memo << [last_number + 1, last, current] if tailoring_test?(last) && collator.compare(last, current) == 1
|
108
|
+
|
109
|
+
last = current
|
110
|
+
last_number = number
|
111
|
+
elsif pending_tailoring_test?(current)
|
112
|
+
last_number = last = nil
|
113
|
+
end
|
114
|
+
|
115
|
+
memo
|
116
|
+
end
|
117
|
+
|
118
|
+
if failures.empty?
|
119
|
+
puts "OK"
|
120
|
+
else
|
121
|
+
failures_info = "#{failures.size} failures: #{failures.inspect}"
|
122
|
+
|
123
|
+
puts failures_info
|
124
|
+
failures.should(be_empty, "#{locale} - #{failures_info}")
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def pending_tailoring_test?(line)
|
130
|
+
!!(line =~ %r{^//})
|
131
|
+
end
|
132
|
+
|
133
|
+
def tailoring_test?(line)
|
134
|
+
!!(line && line !~ %r{^(//|#|\s*$)})
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|