twitter_cldr 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/Gemfile +32 -0
  2. data/History.txt +78 -0
  3. data/README.md +72 -62
  4. data/Rakefile +22 -0
  5. data/js/lib/compiler.rb +40 -0
  6. data/js/lib/mustache/bundle.coffee +14 -0
  7. data/js/lib/mustache/calendars/datetime.coffee +240 -0
  8. data/js/lib/mustache/calendars/timespan.coffee +52 -0
  9. data/js/lib/mustache/plurals/rules.coffee +14 -0
  10. data/js/lib/renderers/base.rb +18 -0
  11. data/js/lib/renderers/bundle.rb +18 -0
  12. data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
  13. data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
  14. data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
  15. data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
  16. data/js/lib/twitter_cldr_js.rb +85 -0
  17. data/js/spec/js/calendars/datetime_spec.js +418 -0
  18. data/js/spec/js/calendars/timespan_spec.js +91 -0
  19. data/js/spec/js/plurals/plural_rules_spec.js +28 -0
  20. data/js/spec/js/support/jasmine.yml +8 -0
  21. data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
  22. data/js/spec/rb/spec_helper.rb +13 -0
  23. data/lib/twitter_cldr.rb +2 -1
  24. data/lib/twitter_cldr/collation.rb +2 -1
  25. data/lib/twitter_cldr/collation/collator.rb +49 -31
  26. data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
  27. data/lib/twitter_cldr/collation/trie.rb +116 -24
  28. data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
  29. data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
  30. data/lib/twitter_cldr/core_ext/array.rb +14 -1
  31. data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
  32. data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
  33. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
  34. data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
  35. data/lib/twitter_cldr/resources.rb +11 -0
  36. data/lib/twitter_cldr/resources/import.rb +12 -0
  37. data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
  38. data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
  39. data/lib/twitter_cldr/shared.rb +0 -1
  40. data/lib/twitter_cldr/tokenizers/base.rb +9 -9
  41. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
  42. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
  43. data/lib/twitter_cldr/utils.rb +11 -0
  44. data/lib/twitter_cldr/version.rb +1 -1
  45. data/resources/collation/tailoring/af.yml +3 -0
  46. data/resources/collation/tailoring/ar.yml +21 -0
  47. data/resources/collation/tailoring/ca.yml +9 -0
  48. data/resources/collation/tailoring/cs.yml +25 -0
  49. data/resources/collation/tailoring/da.yml +59 -0
  50. data/resources/collation/tailoring/de.yml +3 -0
  51. data/resources/collation/tailoring/el.yml +3 -0
  52. data/resources/collation/tailoring/en.yml +3 -0
  53. data/resources/collation/tailoring/es.yml +5 -0
  54. data/resources/collation/tailoring/eu.yml +3 -0
  55. data/resources/collation/tailoring/fa.yml +73 -0
  56. data/resources/collation/tailoring/fi.yml +61 -0
  57. data/resources/collation/tailoring/fil.yml +11 -0
  58. data/resources/collation/tailoring/fr.yml +3 -0
  59. data/resources/collation/tailoring/he.yml +3 -0
  60. data/resources/collation/tailoring/hi.yml +7 -0
  61. data/resources/collation/tailoring/hu.yml +125 -0
  62. data/resources/collation/tailoring/id.yml +3 -0
  63. data/resources/collation/tailoring/it.yml +3 -0
  64. data/resources/collation/tailoring/ja.yml +14647 -0
  65. data/resources/collation/tailoring/ko.yml +14953 -0
  66. data/resources/collation/tailoring/ms.yml +3 -0
  67. data/resources/collation/tailoring/nb.yml +59 -0
  68. data/resources/collation/tailoring/nl.yml +3 -0
  69. data/resources/collation/tailoring/pl.yml +37 -0
  70. data/resources/collation/tailoring/pt.yml +3 -0
  71. data/resources/collation/tailoring/ru.yml +3 -0
  72. data/resources/collation/tailoring/sv.yml +63 -0
  73. data/resources/collation/tailoring/th.yml +19 -0
  74. data/resources/collation/tailoring/tr.yml +27 -0
  75. data/resources/collation/tailoring/uk.yml +5 -0
  76. data/resources/collation/tailoring/ur.yml +163 -0
  77. data/resources/collation/tailoring/zh-Hant.yml +3 -0
  78. data/resources/collation/tailoring/zh.yml +149 -0
  79. data/resources/custom/locales/af/units.yml +19 -0
  80. data/resources/custom/locales/ar/units.yml +35 -0
  81. data/resources/custom/locales/ca/units.yml +19 -0
  82. data/resources/custom/locales/cs/units.yml +23 -0
  83. data/resources/custom/locales/da/units.yml +19 -0
  84. data/resources/custom/locales/de/units.yml +19 -0
  85. data/resources/custom/locales/el/units.yml +19 -0
  86. data/resources/custom/locales/en/units.yml +18 -0
  87. data/resources/custom/locales/es/units.yml +19 -0
  88. data/resources/custom/locales/eu/units.yml +19 -0
  89. data/resources/custom/locales/fa/units.yml +15 -0
  90. data/resources/custom/locales/fi/units.yml +19 -0
  91. data/resources/custom/locales/fil/units.yml +19 -0
  92. data/resources/custom/locales/fr/units.yml +19 -0
  93. data/resources/custom/locales/he/units.yml +19 -0
  94. data/resources/custom/locales/hi/units.yml +19 -0
  95. data/resources/custom/locales/hu/units.yml +15 -0
  96. data/resources/custom/locales/id/units.yml +15 -0
  97. data/resources/custom/locales/it/units.yml +19 -0
  98. data/resources/custom/locales/ja/units.yml +15 -0
  99. data/resources/custom/locales/ko/units.yml +15 -0
  100. data/resources/custom/locales/ms/units.yml +15 -0
  101. data/resources/custom/locales/nb/units.yml +19 -0
  102. data/resources/custom/locales/nl/units.yml +19 -0
  103. data/resources/custom/locales/pl/units.yml +23 -0
  104. data/resources/custom/locales/pt/units.yml +19 -0
  105. data/resources/custom/locales/ru/units.yml +27 -0
  106. data/resources/custom/locales/sv/units.yml +19 -0
  107. data/resources/custom/locales/th/units.yml +15 -0
  108. data/resources/custom/locales/tr/units.yml +15 -0
  109. data/resources/custom/locales/uk/units.yml +27 -0
  110. data/resources/custom/locales/ur/units.yml +19 -0
  111. data/resources/custom/locales/zh-Hant/units.yml +15 -0
  112. data/resources/custom/locales/zh/units.yml +15 -0
  113. data/resources/locales/af/units.yml +112 -65
  114. data/resources/locales/ar/units.yml +196 -126
  115. data/resources/locales/ca/units.yml +112 -70
  116. data/resources/locales/cs/units.yml +140 -91
  117. data/resources/locales/da/units.yml +98 -56
  118. data/resources/locales/de/units.yml +112 -70
  119. data/resources/locales/el/units.yml +119 -84
  120. data/resources/locales/en/units.yml +84 -42
  121. data/resources/locales/es/units.yml +112 -70
  122. data/resources/locales/eu/units.yml +105 -68
  123. data/resources/locales/fa/units.yml +98 -63
  124. data/resources/locales/fi/units.yml +112 -70
  125. data/resources/locales/fil/units.yml +98 -56
  126. data/resources/locales/fr/units.yml +112 -70
  127. data/resources/locales/he/units.yml +98 -56
  128. data/resources/locales/hi/units.yml +98 -56
  129. data/resources/locales/hu/units.yml +84 -49
  130. data/resources/locales/id/units.yml +84 -49
  131. data/resources/locales/it/units.yml +98 -56
  132. data/resources/locales/ja/units.yml +84 -49
  133. data/resources/locales/ko/units.yml +84 -49
  134. data/resources/locales/ms/units.yml +112 -63
  135. data/resources/locales/nb/units.yml +106 -64
  136. data/resources/locales/nl/units.yml +98 -56
  137. data/resources/locales/pl/units.yml +181 -112
  138. data/resources/locales/pt/units.yml +112 -70
  139. data/resources/locales/ru/units.yml +168 -112
  140. data/resources/locales/sv/units.yml +112 -70
  141. data/resources/locales/th/units.yml +84 -49
  142. data/resources/locales/tr/units.yml +84 -49
  143. data/resources/locales/uk/units.yml +168 -112
  144. data/resources/locales/ur/units.yml +112 -63
  145. data/resources/locales/zh-Hant/units.yml +84 -49
  146. data/resources/locales/zh/units.yml +84 -49
  147. data/spec/collation/collation_spec.rb +1 -1
  148. data/spec/collation/collator_spec.rb +120 -48
  149. data/spec/collation/sort_key_builder_spec.rb +80 -0
  150. data/spec/collation/tailoring_spec.rb +137 -0
  151. data/spec/collation/tailoring_tests/af.txt +321 -0
  152. data/spec/collation/tailoring_tests/ar.txt +188 -0
  153. data/spec/collation/tailoring_tests/ca.txt +446 -0
  154. data/spec/collation/tailoring_tests/cs.txt +273 -0
  155. data/spec/collation/tailoring_tests/da.txt +293 -0
  156. data/spec/collation/tailoring_tests/de.txt +414 -0
  157. data/spec/collation/tailoring_tests/el.txt +228 -0
  158. data/spec/collation/tailoring_tests/en.txt +399 -0
  159. data/spec/collation/tailoring_tests/es.txt +402 -0
  160. data/spec/collation/tailoring_tests/eu.txt +183 -0
  161. data/spec/collation/tailoring_tests/fa.txt +263 -0
  162. data/spec/collation/tailoring_tests/fi.txt +389 -0
  163. data/spec/collation/tailoring_tests/fil.txt +279 -0
  164. data/spec/collation/tailoring_tests/fr.txt +363 -0
  165. data/spec/collation/tailoring_tests/he.txt +167 -0
  166. data/spec/collation/tailoring_tests/hi.txt +230 -0
  167. data/spec/collation/tailoring_tests/hu.txt +773 -0
  168. data/spec/collation/tailoring_tests/id.txt +171 -0
  169. data/spec/collation/tailoring_tests/it.txt +231 -0
  170. data/spec/collation/tailoring_tests/ja.txt +4287 -0
  171. data/spec/collation/tailoring_tests/ko.txt +1761 -0
  172. data/spec/collation/tailoring_tests/ms.txt +531 -0
  173. data/spec/collation/tailoring_tests/nb.txt +375 -0
  174. data/spec/collation/tailoring_tests/nl.txt +273 -0
  175. data/spec/collation/tailoring_tests/pl.txt +225 -0
  176. data/spec/collation/tailoring_tests/pt.txt +405 -0
  177. data/spec/collation/tailoring_tests/ru.txt +213 -0
  178. data/spec/collation/tailoring_tests/sv.txt +353 -0
  179. data/spec/collation/tailoring_tests/th.txt +239 -0
  180. data/spec/collation/tailoring_tests/tr.txt +414 -0
  181. data/spec/collation/tailoring_tests/uk.txt +218 -0
  182. data/spec/collation/tailoring_tests/ur.txt +284 -0
  183. data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
  184. data/spec/collation/tailoring_tests/zh.txt +717 -0
  185. data/spec/collation/trie_builder_spec.rb +131 -51
  186. data/spec/collation/trie_spec.rb +301 -26
  187. data/spec/collation/trie_with_fallback_spec.rb +41 -0
  188. data/spec/core_ext/array_spec.rb +46 -3
  189. data/spec/core_ext/calendars/date_spec.rb +24 -24
  190. data/spec/core_ext/calendars/datetime_spec.rb +7 -0
  191. data/spec/core_ext/calendars/time_spec.rb +2 -2
  192. data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
  193. data/spec/formatters/plurals/rules_spec.rb +3 -11
  194. data/spec/readme_spec.rb +15 -15
  195. data/spec/resources/loader_spec.rb +94 -0
  196. data/spec/spec_helper.rb +6 -0
  197. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
  198. data/spec/twitter_cldr_spec.rb +3 -3
  199. data/spec/utils_spec.rb +38 -0
  200. data/twitter_cldr.gemspec +25 -0
  201. metadata +156 -110
  202. data/spec/collation/sort_key_spec.rb +0 -56
  203. data/spec/shared/resources_spec.rb +0 -75
@@ -38,7 +38,7 @@ describe 'Unicode Collation Algorithm' do
38
38
  current_code_points = $1.split
39
39
  current_hex_code_points = current_code_points.map { |cp| cp.to_i(16) }
40
40
 
41
- current_sort_key = collator.sort_key(current_code_points)
41
+ current_sort_key = collator.get_sort_key(current_code_points)
42
42
 
43
43
  if previous_sort_key
44
44
  result = (previous_sort_key <=> current_sort_key).nonzero? || (previous_hex_code_points <=> current_hex_code_points)
@@ -9,58 +9,108 @@ include TwitterCldr::Collation
9
9
 
10
10
  describe Collator do
11
11
 
12
- before :each do
13
- Collator.instance_variable_set(:@trie, nil)
14
- end
12
+ let(:trie) { Trie.new }
13
+
14
+ before(:each) { clear_fce_tries_cache }
15
+ after(:all) { clear_fce_tries_cache }
16
+
17
+ describe '.default_fce_trie' do
18
+ before(:each) do
19
+ clear_default_fce_trie_cache
20
+ mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { trie }
21
+ end
22
+
23
+ it 'returns default fractional collation elements trie' do
24
+ Collator.default_fce_trie.should == trie
25
+ end
26
+
27
+ it 'loads the trie only once' do
28
+ Collator.default_fce_trie.object_id.should == Collator.default_fce_trie.object_id
29
+ end
15
30
 
16
- after :all do
17
- Collator.instance_variable_set(:@trie, nil)
31
+ it 'locks the trie' do
32
+ Collator.default_fce_trie.should be_locked
33
+ end
18
34
  end
19
35
 
20
- describe '.trie' do
21
- it 'returns collation elements trie' do
22
- mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
23
- Collator.trie.should == 'trie'
36
+ describe '.tailored_fce_trie' do
37
+ let(:locale) { :ru }
38
+
39
+ before(:each) do
40
+ clear_tailored_fce_tries_cache
41
+ stub(Collator).default_fce_trie { trie }
42
+ mock(TrieBuilder).load_tailored_trie(locale, Collator.default_fce_trie) { trie }
43
+ end
44
+
45
+ it 'returns default fractional collation elements trie' do
46
+ Collator.tailored_fce_trie(locale).should == trie
24
47
  end
25
48
 
26
49
  it 'loads the trie only once' do
27
- mock(TrieBuilder).load_trie(Collator::FRACTIONAL_UCA_SHORT_RESOURCE) { 'trie' }
50
+ Collator.tailored_fce_trie(locale).object_id.should == Collator.tailored_fce_trie(locale).object_id
51
+ end
28
52
 
29
- Collator.trie.object_id.should == Collator.trie.object_id
53
+ it 'locks the trie' do
54
+ Collator.tailored_fce_trie(locale).should be_locked
30
55
  end
31
56
  end
32
57
 
33
- describe '#trie' do
34
- it 'delegates to the class method' do
35
- mock(Collator).trie { 'trie' }
36
- Collator.new.trie.should == 'trie'
58
+ describe '#initialize' do
59
+ before(:each) { stub(TrieBuilder).load_trie { trie } }
60
+ before(:each) { any_instance_of(Collator) { |c| stub(c).load_trie { trie } } }
61
+
62
+ it 'initializes default collator if locale is not specified' do
63
+ Collator.new.locale.should be_nil
37
64
  end
38
65
 
39
- it 'calls class method only once' do
40
- mock(Collator).trie { 'trie' }
66
+ it 'initialized tailored collator if locale is provided' do
67
+ Collator.new(:ru).locale.should == :ru
68
+ end
41
69
 
42
- collator = Collator.new
43
- collator.trie.object_id.should == collator.trie.object_id
70
+ it 'converts locale' do
71
+ Collator.new(:no).locale.should == :nb
44
72
  end
45
73
  end
46
74
 
47
- describe '#sort_key' do
48
- let(:collator) { Collator.new }
49
- let(:string) { 'abc' }
50
- let(:code_points_hex) { %w[0061 0062 0063] }
51
- let(:code_points) { code_points_hex.map { |cp| cp.to_i(16) } }
52
- let(:sort_key) { [9986, 10498, 11010, 0, 1282, 1282, 1282, 0, 1282, 1282, 1282] }
75
+ describe '#get_collation_elements' do
76
+ let(:collator) { Collator.new }
77
+ let(:string) { 'abc' }
78
+ let(:code_points_hex) { %w[0061 0062 0063] }
79
+ let(:code_points) { code_points_hex.map { |cp| cp.to_i(16) } }
80
+ let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
53
81
 
54
- before(:each) { mock(collator).sort_key_for_code_points(code_points) { sort_key } }
82
+ before :each do
83
+ mock(TwitterCldr::Normalization::NFD).normalize_code_points(code_points_hex) { code_points_hex }
84
+ stub(TwitterCldr::Normalization::Base).combining_class_for { 0 }
85
+ end
86
+
87
+ it 'returns collation elements for a string' do
88
+ collator.get_collation_elements(string).should == collation_elements
89
+ end
90
+
91
+ it 'returns collation elements for an array of code points (represented as hex strings)' do
92
+ collator.get_collation_elements(code_points_hex).should == collation_elements
93
+ end
94
+ end
95
+
96
+ describe '#get_sort_key' do
97
+ let(:collator) { Collator.new }
98
+ let(:string) { 'abc' }
99
+ let(:code_points_hex) { %w[0061 0062 0063] }
100
+ let(:collation_elements) { [[39, 5, 5], [41, 5, 5], [43, 5, 5]] }
101
+ let(:sort_key) { [39, 41, 43, 1, 7, 1, 7] }
102
+
103
+ before(:each) { stub(TrieBuilder).load_trie { trie } }
104
+ before(:each) { mock(TwitterCldr::Collation::SortKeyBuilder).build(collation_elements) { sort_key } }
55
105
 
56
106
  it 'calculates sort key for a string' do
57
- mock(TwitterCldr::Utils::CodePoints).from_string(string) { code_points_hex }
58
- collator.sort_key(string).should == sort_key
107
+ mock(collator).get_collation_elements(string) { collation_elements }
108
+ collator.get_sort_key(string).should == sort_key
59
109
  end
60
110
 
61
111
  it 'calculates sort key for an array of code points (represented as hex strings)' do
62
- dont_allow(TwitterCldr::Utils::CodePoints).from_string(string)
63
- collator.sort_key(code_points_hex).should == sort_key
112
+ mock(collator).get_collation_elements(code_points_hex) { collation_elements }
113
+ collator.get_sort_key(code_points_hex).should == sort_key
64
114
  end
65
115
  end
66
116
 
@@ -69,6 +119,8 @@ describe Collator do
69
119
  let(:sort_key) { [1, 3, 8, 9] }
70
120
  let(:another_sort_key) { [6, 8, 9, 2] }
71
121
 
122
+ before(:each) { stub(Collator).default_fce_trie { trie } }
123
+
72
124
  it 'compares strings by sort keys' do
73
125
  stub_sort_key(collator, 'foo', sort_key)
74
126
  stub_sort_key(collator, 'bar', another_sort_key)
@@ -77,41 +129,61 @@ describe Collator do
77
129
  collator.compare('bar', 'foo').should == 1
78
130
  end
79
131
 
80
- it 'returns 0 without computing sort keys if strings are equal' do
81
- dont_allow(collator).sort_key
132
+ it 'returns 0 without computing sort keys if the strings are equal' do
133
+ dont_allow(collator).get_sort_key
82
134
 
83
135
  collator.compare('foo', 'foo').should == 0
84
136
  end
137
+ end
85
138
 
86
- it 'compares strings by code points if the sort keys are equal' do
87
- stub(collator).sort_key { sort_key }
139
+ describe 'sorting' do
140
+ let(:collator) { Collator.new }
141
+ let(:sort_keys) { [['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]] }
142
+ let(:array) { %w[bca aaa abc] }
143
+ let(:sorted) { %w[aaa abc bca] }
88
144
 
89
- collator.compare('bar', 'foo').should == -1
145
+ before :each do
146
+ stub(Collator).default_fce_trie { trie }
147
+ sort_keys.each { |s, key| mock_sort_key(collator, s, key) }
90
148
  end
91
- end
92
149
 
93
- describe '#sort' do
94
- let(:collator) { Collator.new }
150
+ describe '#sort' do
151
+ it 'sorts strings by sort keys' do
152
+ collator.sort(array).should == sorted
153
+ end
95
154
 
96
- it 'sorts strings by sort keys' do
97
- [['aaa', [1, 2, 3]], ['abc', [1, 3, 4]], ['bca', [2, 5, 9]]].each { |s, key| mock_sort_key(collator, s, key) }
98
-
99
- collator.sort(%w[bca aaa abc]).should == %w[aaa abc bca]
155
+ it 'does not change the original array' do
156
+ lambda { collator.sort(array) }.should_not change { array }
157
+ end
100
158
  end
101
159
 
102
- it 'sorts strings with equal sort keys by code points' do
103
- [['aaa', [1, 2, 3]], ['abc', [1, 2, 3]], ['bca', [1, 2, 3]]].each { |s, key| mock_sort_key(collator, s, key) }
104
-
105
- collator.sort(%w[bca abc aaa]).should == %w[aaa abc bca]
160
+ describe '#sort!' do
161
+ it 'sorts strings array by sort keys in-place ' do
162
+ collator.sort!(array)
163
+ array.should == sorted
164
+ end
106
165
  end
107
166
  end
108
167
 
109
168
  def mock_sort_key(collator, string, sort_key)
110
- mock(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
169
+ mock(collator).get_sort_key(string) { sort_key }
111
170
  end
112
171
 
113
172
  def stub_sort_key(collator, string, sort_key)
114
- stub(collator).sort_key(TwitterCldr::Utils::CodePoints.from_string(string)) { sort_key }
173
+ stub(collator).get_sort_key(string) { sort_key }
174
+ end
175
+
176
+ def clear_fce_tries_cache
177
+ clear_default_fce_trie_cache
178
+ clear_tailored_fce_tries_cache
179
+ end
180
+
181
+ def clear_default_fce_trie_cache
182
+ Collator.instance_variable_set(:@default_fce_trie, nil)
183
+ end
184
+
185
+ def clear_tailored_fce_tries_cache
186
+ Collator.instance_variable_set(:@tailored_fce_tries_cache, nil)
115
187
  end
116
188
 
117
189
  end
@@ -0,0 +1,80 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe SortKeyBuilder do
11
+
12
+ let(:sort_key) { SortKeyBuilder.new(collation_elements) }
13
+ let(:collation_elements) { [[63, 13, 149], [66, 81, 143]] }
14
+ let(:sort_key_bytes) { [63, 66, 1, 13, 81, 1, 149, 143] }
15
+
16
+ describe '.build' do
17
+ it 'returns a sort key for a given array of collation elements' do
18
+ sort_key = SortKeyBuilder.new(collation_elements)
19
+
20
+ mock(SortKeyBuilder).new(collation_elements) { sort_key }
21
+ mock(sort_key).bytes_array { sort_key_bytes }
22
+
23
+ SortKeyBuilder.build(collation_elements).should == sort_key_bytes
24
+ end
25
+ end
26
+
27
+ describe '#initialize' do
28
+ it 'assigns collation elements array' do
29
+ SortKeyBuilder.new(collation_elements).collation_elements.should == collation_elements
30
+ end
31
+ end
32
+
33
+ describe '#bytes_array' do
34
+ it 'builds sort key bytes' do
35
+ sort_key.bytes_array.should == sort_key_bytes
36
+ end
37
+
38
+ it 'builds bytes array only once' do
39
+ mock(sort_key).build_bytes_array { sort_key_bytes }
40
+ sort_key.bytes_array.object_id == sort_key.bytes_array.object_id
41
+ end
42
+
43
+ it 'compresses primary weights' do
44
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x908, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
45
+ [0x7A, 0x72, 0x73, 0x75, 0x3, 0x9, 0x08, 0x7A, 0x73, 1, 1]
46
+
47
+ SortKeyBuilder.new([[0x7A72, 0, 0], [0x7A73, 0, 0], [0x7A75, 0, 0], [0x9508, 0, 0], [0x7A73, 0, 0]]).bytes_array.should ==
48
+ [0x7A, 0x72, 0x73, 0x75, 0xFF, 0x95, 0x08, 0x7A, 0x73, 1, 1]
49
+ end
50
+
51
+ it 'works when there is an ignorable primary weight in the middle' do
52
+ SortKeyBuilder.new([[0x1312, 0, 0], [0, 0, 0], [0x1415, 0, 0]]).bytes_array.should == [0x13, 0x12, 0x14, 0x15, 1, 1]
53
+ end
54
+
55
+ it 'do not compress single byte primary weights' do
56
+ SortKeyBuilder.new([[0x13, 0, 0], [0x13, 0, 0]]).bytes_array.should == [0x13, 0x13, 1, 1]
57
+ end
58
+
59
+ it 'resets primary lead bytes counter after a single byte weight' do
60
+ SortKeyBuilder.new([[0x1415, 0, 0], [0x13, 0, 0], [0x13, 0, 0], [0x1412, 0, 0]]).bytes_array.should == [0x14, 0x15, 0x13, 0x13, 0x14, 0x12, 1, 1]
61
+ end
62
+
63
+ it 'compresses only allowed primary weights' do
64
+ SortKeyBuilder.new([[0x812, 0, 0], [0x811, 0, 0]]).bytes_array.should == [0x8, 0x12, 0x8, 0x11, 1, 1]
65
+ end
66
+
67
+ it 'compresses secondary weights' do
68
+ SortKeyBuilder.new([[0, 5, 0], [0, 5, 0], [0, 141, 0], [0, 5, 0], [0, 5, 0]]).bytes_array.should == [1, 133, 141, 6, 1]
69
+ end
70
+
71
+ it 'compresses tertiary weights' do
72
+ SortKeyBuilder.new([[0, 0, 5], [0, 0, 5], [0, 0, 39], [0, 0, 5], [0, 0, 5]]).bytes_array.should == [1, 1, 132, 167, 6]
73
+ end
74
+
75
+ it 'compresses secondary and tertiary weights into multiple bytes if necessary' do
76
+ SortKeyBuilder.new([[0, 5, 5]] * 100).bytes_array.should == [1, 69, 40, 1, 48, 48, 18]
77
+ end
78
+ end
79
+
80
+ end
@@ -0,0 +1,137 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Collation
9
+
10
+ describe 'Unicode collation tailoring' do
11
+
12
+ describe 'tailoring support' do
13
+ before(:each) do
14
+ stub(Collator).default_fce_trie { TrieBuilder.parse_trie(fractional_uca_short_stub) }
15
+ stub(TwitterCldr::Normalization::NFD).normalize_code_points { |code_points| code_points }
16
+ stub(TwitterCldr).get_resource(:collation, :tailoring, locale) { YAML.load(tailoring_resource_stub) }
17
+ end
18
+
19
+ let(:locale) { :some_locale }
20
+ let(:default_collator) { Collator.new }
21
+ let(:tailored_collator) { Collator.new(locale) }
22
+
23
+ describe 'tailoring rules support' do
24
+ it 'tailored collation elements are used' do
25
+ default_collator.get_collation_elements(%w[0490]).should == [[0x5C1A, 5, 0x93], [0, 0xDBB9, 9]]
26
+ tailored_collator.get_collation_elements(%w[0490]).should == [[0x5C1B, 5, 0x86]]
27
+
28
+ default_collator.get_collation_elements(%w[0491]).should == [[0x5C1A, 5, 9], [0, 0xDBB9, 9]]
29
+ tailored_collator.get_collation_elements(%w[0491]).should == [[0x5C1B, 5, 5]]
30
+ end
31
+
32
+ it 'original contractions for tailored elements are applied' do
33
+ default_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
34
+ tailored_collator.get_collation_elements(%w[0491 0306]).should == [[0x5C, 0xDB, 9]]
35
+ end
36
+ end
37
+
38
+ describe 'contractions suppressing support' do
39
+ it 'suppressed contractions are ignored' do
40
+ default_collator.get_collation_elements(%w[041A 0301]).should == [[0x5CCC, 5, 0x8F]]
41
+ tailored_collator.get_collation_elements(%w[041A 0301]).should == [[0x5C6C, 5, 0x8F], [0, 0x8D, 5]]
42
+ end
43
+
44
+ it 'non-suppressed contractions are used' do
45
+ default_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
46
+ tailored_collator.get_collation_elements(%w[0415 0306]).should == [[0x5C36, 5, 0x8F]]
47
+ end
48
+ end
49
+
50
+ let(:fractional_uca_short_stub) do
51
+ <<END
52
+ # collation elements from default FCE table
53
+ 0301; [, 8D, 05]
54
+ 0306; [, 91, 05]
55
+ 041A; [5C 6C, 05, 8F] # К
56
+ 0413; [5C 1A, 05, 8F] # Г
57
+ 0415; [5C 34, 05, 8F] # Е
58
+
59
+ # tailored (in UK locale) with "Г < ґ <<< Ґ"
60
+ 0491; [5C 1A, 05, 09][, DB B9, 09] # ґ
61
+ 0490; [5C 1A, 05, 93][, DB B9, 09] # Ґ
62
+
63
+ # contraction for a tailored collation element
64
+ 0491 0306; [5C, DB, 09] # ґ̆
65
+
66
+ # contractions suppressed in tailoring (for RU locale)
67
+ 041A 0301; [5C CC, 05, 8F] # Ќ
68
+ 0413 0301; [5C 30, 05, 8F] # Ѓ
69
+
70
+ # contractions non-suppressed in tailoring
71
+ 0415 0306; [5C 36, 05, 8F] # Ӗ
72
+ END
73
+ end
74
+
75
+ let(:tailoring_resource_stub) do
76
+ <<END
77
+ ---
78
+ :tailored_table: ! '0491; [5C1B, 5, 5]
79
+
80
+ 0490; [5C1B, 5, 86]'
81
+ :suppressed_contractions: ГК
82
+ ...
83
+ END
84
+ end
85
+
86
+ end
87
+
88
+ # Test data is taken from http://unicode.org/cldr/trac/browser/tags/release-2-0-1/test/
89
+ # Test files format: # - comments, // - pending tests.
90
+ #
91
+ it 'passes tailoring test for each supported locale', :slow => true do
92
+ TwitterCldr.supported_locales.each do |locale|
93
+ collator = Collator.new(locale)
94
+
95
+ print "#{locale}\t-\t"
96
+
97
+ lines = open(File.join(File.dirname(__FILE__), 'tailoring_tests', "#{locale}.txt")) { |f| f.lines.map(&:strip) }
98
+
99
+ active_tests = lines.count(&method(:tailoring_test?))
100
+ pending_tests = lines.count(&method(:pending_tailoring_test?))
101
+ print "tests: %-4d active, %5.1f%% %5s pending\t-\t" % [active_tests, (100.0 * pending_tests / (pending_tests + active_tests)), "(#{pending_tests})"]
102
+
103
+ last_number = last = nil
104
+
105
+ failures = lines.each_with_index.inject([]) do |memo, (current, number)|
106
+ if tailoring_test?(current)
107
+ memo << [last_number + 1, last, current] if tailoring_test?(last) && collator.compare(last, current) == 1
108
+
109
+ last = current
110
+ last_number = number
111
+ elsif pending_tailoring_test?(current)
112
+ last_number = last = nil
113
+ end
114
+
115
+ memo
116
+ end
117
+
118
+ if failures.empty?
119
+ puts "OK"
120
+ else
121
+ failures_info = "#{failures.size} failures: #{failures.inspect}"
122
+
123
+ puts failures_info
124
+ failures.should(be_empty, "#{locale} - #{failures_info}")
125
+ end
126
+ end
127
+ end
128
+
129
+ def pending_tailoring_test?(line)
130
+ !!(line =~ %r{^//})
131
+ end
132
+
133
+ def tailoring_test?(line)
134
+ !!(line && line !~ %r{^(//|#|\s*$)})
135
+ end
136
+
137
+ end