twitter_cldr 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/Gemfile +32 -0
  2. data/History.txt +78 -0
  3. data/README.md +72 -62
  4. data/Rakefile +22 -0
  5. data/js/lib/compiler.rb +40 -0
  6. data/js/lib/mustache/bundle.coffee +14 -0
  7. data/js/lib/mustache/calendars/datetime.coffee +240 -0
  8. data/js/lib/mustache/calendars/timespan.coffee +52 -0
  9. data/js/lib/mustache/plurals/rules.coffee +14 -0
  10. data/js/lib/renderers/base.rb +18 -0
  11. data/js/lib/renderers/bundle.rb +18 -0
  12. data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
  13. data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
  14. data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
  15. data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
  16. data/js/lib/twitter_cldr_js.rb +85 -0
  17. data/js/spec/js/calendars/datetime_spec.js +418 -0
  18. data/js/spec/js/calendars/timespan_spec.js +91 -0
  19. data/js/spec/js/plurals/plural_rules_spec.js +28 -0
  20. data/js/spec/js/support/jasmine.yml +8 -0
  21. data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
  22. data/js/spec/rb/spec_helper.rb +13 -0
  23. data/lib/twitter_cldr.rb +2 -1
  24. data/lib/twitter_cldr/collation.rb +2 -1
  25. data/lib/twitter_cldr/collation/collator.rb +49 -31
  26. data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
  27. data/lib/twitter_cldr/collation/trie.rb +116 -24
  28. data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
  29. data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
  30. data/lib/twitter_cldr/core_ext/array.rb +14 -1
  31. data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
  32. data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
  33. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
  34. data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
  35. data/lib/twitter_cldr/resources.rb +11 -0
  36. data/lib/twitter_cldr/resources/import.rb +12 -0
  37. data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
  38. data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
  39. data/lib/twitter_cldr/shared.rb +0 -1
  40. data/lib/twitter_cldr/tokenizers/base.rb +9 -9
  41. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
  42. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
  43. data/lib/twitter_cldr/utils.rb +11 -0
  44. data/lib/twitter_cldr/version.rb +1 -1
  45. data/resources/collation/tailoring/af.yml +3 -0
  46. data/resources/collation/tailoring/ar.yml +21 -0
  47. data/resources/collation/tailoring/ca.yml +9 -0
  48. data/resources/collation/tailoring/cs.yml +25 -0
  49. data/resources/collation/tailoring/da.yml +59 -0
  50. data/resources/collation/tailoring/de.yml +3 -0
  51. data/resources/collation/tailoring/el.yml +3 -0
  52. data/resources/collation/tailoring/en.yml +3 -0
  53. data/resources/collation/tailoring/es.yml +5 -0
  54. data/resources/collation/tailoring/eu.yml +3 -0
  55. data/resources/collation/tailoring/fa.yml +73 -0
  56. data/resources/collation/tailoring/fi.yml +61 -0
  57. data/resources/collation/tailoring/fil.yml +11 -0
  58. data/resources/collation/tailoring/fr.yml +3 -0
  59. data/resources/collation/tailoring/he.yml +3 -0
  60. data/resources/collation/tailoring/hi.yml +7 -0
  61. data/resources/collation/tailoring/hu.yml +125 -0
  62. data/resources/collation/tailoring/id.yml +3 -0
  63. data/resources/collation/tailoring/it.yml +3 -0
  64. data/resources/collation/tailoring/ja.yml +14647 -0
  65. data/resources/collation/tailoring/ko.yml +14953 -0
  66. data/resources/collation/tailoring/ms.yml +3 -0
  67. data/resources/collation/tailoring/nb.yml +59 -0
  68. data/resources/collation/tailoring/nl.yml +3 -0
  69. data/resources/collation/tailoring/pl.yml +37 -0
  70. data/resources/collation/tailoring/pt.yml +3 -0
  71. data/resources/collation/tailoring/ru.yml +3 -0
  72. data/resources/collation/tailoring/sv.yml +63 -0
  73. data/resources/collation/tailoring/th.yml +19 -0
  74. data/resources/collation/tailoring/tr.yml +27 -0
  75. data/resources/collation/tailoring/uk.yml +5 -0
  76. data/resources/collation/tailoring/ur.yml +163 -0
  77. data/resources/collation/tailoring/zh-Hant.yml +3 -0
  78. data/resources/collation/tailoring/zh.yml +149 -0
  79. data/resources/custom/locales/af/units.yml +19 -0
  80. data/resources/custom/locales/ar/units.yml +35 -0
  81. data/resources/custom/locales/ca/units.yml +19 -0
  82. data/resources/custom/locales/cs/units.yml +23 -0
  83. data/resources/custom/locales/da/units.yml +19 -0
  84. data/resources/custom/locales/de/units.yml +19 -0
  85. data/resources/custom/locales/el/units.yml +19 -0
  86. data/resources/custom/locales/en/units.yml +18 -0
  87. data/resources/custom/locales/es/units.yml +19 -0
  88. data/resources/custom/locales/eu/units.yml +19 -0
  89. data/resources/custom/locales/fa/units.yml +15 -0
  90. data/resources/custom/locales/fi/units.yml +19 -0
  91. data/resources/custom/locales/fil/units.yml +19 -0
  92. data/resources/custom/locales/fr/units.yml +19 -0
  93. data/resources/custom/locales/he/units.yml +19 -0
  94. data/resources/custom/locales/hi/units.yml +19 -0
  95. data/resources/custom/locales/hu/units.yml +15 -0
  96. data/resources/custom/locales/id/units.yml +15 -0
  97. data/resources/custom/locales/it/units.yml +19 -0
  98. data/resources/custom/locales/ja/units.yml +15 -0
  99. data/resources/custom/locales/ko/units.yml +15 -0
  100. data/resources/custom/locales/ms/units.yml +15 -0
  101. data/resources/custom/locales/nb/units.yml +19 -0
  102. data/resources/custom/locales/nl/units.yml +19 -0
  103. data/resources/custom/locales/pl/units.yml +23 -0
  104. data/resources/custom/locales/pt/units.yml +19 -0
  105. data/resources/custom/locales/ru/units.yml +27 -0
  106. data/resources/custom/locales/sv/units.yml +19 -0
  107. data/resources/custom/locales/th/units.yml +15 -0
  108. data/resources/custom/locales/tr/units.yml +15 -0
  109. data/resources/custom/locales/uk/units.yml +27 -0
  110. data/resources/custom/locales/ur/units.yml +19 -0
  111. data/resources/custom/locales/zh-Hant/units.yml +15 -0
  112. data/resources/custom/locales/zh/units.yml +15 -0
  113. data/resources/locales/af/units.yml +112 -65
  114. data/resources/locales/ar/units.yml +196 -126
  115. data/resources/locales/ca/units.yml +112 -70
  116. data/resources/locales/cs/units.yml +140 -91
  117. data/resources/locales/da/units.yml +98 -56
  118. data/resources/locales/de/units.yml +112 -70
  119. data/resources/locales/el/units.yml +119 -84
  120. data/resources/locales/en/units.yml +84 -42
  121. data/resources/locales/es/units.yml +112 -70
  122. data/resources/locales/eu/units.yml +105 -68
  123. data/resources/locales/fa/units.yml +98 -63
  124. data/resources/locales/fi/units.yml +112 -70
  125. data/resources/locales/fil/units.yml +98 -56
  126. data/resources/locales/fr/units.yml +112 -70
  127. data/resources/locales/he/units.yml +98 -56
  128. data/resources/locales/hi/units.yml +98 -56
  129. data/resources/locales/hu/units.yml +84 -49
  130. data/resources/locales/id/units.yml +84 -49
  131. data/resources/locales/it/units.yml +98 -56
  132. data/resources/locales/ja/units.yml +84 -49
  133. data/resources/locales/ko/units.yml +84 -49
  134. data/resources/locales/ms/units.yml +112 -63
  135. data/resources/locales/nb/units.yml +106 -64
  136. data/resources/locales/nl/units.yml +98 -56
  137. data/resources/locales/pl/units.yml +181 -112
  138. data/resources/locales/pt/units.yml +112 -70
  139. data/resources/locales/ru/units.yml +168 -112
  140. data/resources/locales/sv/units.yml +112 -70
  141. data/resources/locales/th/units.yml +84 -49
  142. data/resources/locales/tr/units.yml +84 -49
  143. data/resources/locales/uk/units.yml +168 -112
  144. data/resources/locales/ur/units.yml +112 -63
  145. data/resources/locales/zh-Hant/units.yml +84 -49
  146. data/resources/locales/zh/units.yml +84 -49
  147. data/spec/collation/collation_spec.rb +1 -1
  148. data/spec/collation/collator_spec.rb +120 -48
  149. data/spec/collation/sort_key_builder_spec.rb +80 -0
  150. data/spec/collation/tailoring_spec.rb +137 -0
  151. data/spec/collation/tailoring_tests/af.txt +321 -0
  152. data/spec/collation/tailoring_tests/ar.txt +188 -0
  153. data/spec/collation/tailoring_tests/ca.txt +446 -0
  154. data/spec/collation/tailoring_tests/cs.txt +273 -0
  155. data/spec/collation/tailoring_tests/da.txt +293 -0
  156. data/spec/collation/tailoring_tests/de.txt +414 -0
  157. data/spec/collation/tailoring_tests/el.txt +228 -0
  158. data/spec/collation/tailoring_tests/en.txt +399 -0
  159. data/spec/collation/tailoring_tests/es.txt +402 -0
  160. data/spec/collation/tailoring_tests/eu.txt +183 -0
  161. data/spec/collation/tailoring_tests/fa.txt +263 -0
  162. data/spec/collation/tailoring_tests/fi.txt +389 -0
  163. data/spec/collation/tailoring_tests/fil.txt +279 -0
  164. data/spec/collation/tailoring_tests/fr.txt +363 -0
  165. data/spec/collation/tailoring_tests/he.txt +167 -0
  166. data/spec/collation/tailoring_tests/hi.txt +230 -0
  167. data/spec/collation/tailoring_tests/hu.txt +773 -0
  168. data/spec/collation/tailoring_tests/id.txt +171 -0
  169. data/spec/collation/tailoring_tests/it.txt +231 -0
  170. data/spec/collation/tailoring_tests/ja.txt +4287 -0
  171. data/spec/collation/tailoring_tests/ko.txt +1761 -0
  172. data/spec/collation/tailoring_tests/ms.txt +531 -0
  173. data/spec/collation/tailoring_tests/nb.txt +375 -0
  174. data/spec/collation/tailoring_tests/nl.txt +273 -0
  175. data/spec/collation/tailoring_tests/pl.txt +225 -0
  176. data/spec/collation/tailoring_tests/pt.txt +405 -0
  177. data/spec/collation/tailoring_tests/ru.txt +213 -0
  178. data/spec/collation/tailoring_tests/sv.txt +353 -0
  179. data/spec/collation/tailoring_tests/th.txt +239 -0
  180. data/spec/collation/tailoring_tests/tr.txt +414 -0
  181. data/spec/collation/tailoring_tests/uk.txt +218 -0
  182. data/spec/collation/tailoring_tests/ur.txt +284 -0
  183. data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
  184. data/spec/collation/tailoring_tests/zh.txt +717 -0
  185. data/spec/collation/trie_builder_spec.rb +131 -51
  186. data/spec/collation/trie_spec.rb +301 -26
  187. data/spec/collation/trie_with_fallback_spec.rb +41 -0
  188. data/spec/core_ext/array_spec.rb +46 -3
  189. data/spec/core_ext/calendars/date_spec.rb +24 -24
  190. data/spec/core_ext/calendars/datetime_spec.rb +7 -0
  191. data/spec/core_ext/calendars/time_spec.rb +2 -2
  192. data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
  193. data/spec/formatters/plurals/rules_spec.rb +3 -11
  194. data/spec/readme_spec.rb +15 -15
  195. data/spec/resources/loader_spec.rb +94 -0
  196. data/spec/spec_helper.rb +6 -0
  197. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
  198. data/spec/twitter_cldr_spec.rb +3 -3
  199. data/spec/utils_spec.rb +38 -0
  200. data/twitter_cldr.gemspec +25 -0
  201. metadata +156 -110
  202. data/spec/collation/sort_key_spec.rb +0 -56
  203. data/spec/shared/resources_spec.rb +0 -75
@@ -0,0 +1,91 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ //= require '../../../build/twitter_cldr_en.js'
5
+
6
+ describe("TimespanFormatter", function() {
7
+ beforeEach(function() {
8
+ formatter = new TwitterCldr.TimespanFormatter();
9
+ });
10
+
11
+ describe("#format", function() {
12
+ it("works for a variety of units for a non-directional timespan", function() {
13
+ expect(formatter.format(3273932, {
14
+ unit: "year",
15
+ direction: "none"
16
+ })).toEqual('0 years');
17
+ expect(formatter.format(3273932, {
18
+ unit: "month"
19
+ direction: "none",
20
+ })).toEqual('1 month');
21
+ expect(formatter.format(3273932, {
22
+ unit: "week",
23
+ direction: "none"
24
+ })).toEqual('5 weeks');
25
+ expect(formatter.format(3273932, {
26
+ unit: "day",
27
+ direction: "none"
28
+ })).toEqual('38 days');
29
+ expect(formatter.format(3273932, {
30
+ unit: "hour",
31
+ direction: "none"
32
+ })).toEqual('909 hours');
33
+ expect(formatter.format(3273932, {
34
+ unit: "minute",
35
+ direction: "none"
36
+ })).toEqual('54566 minutes');
37
+ expect(formatter.format(3273932, {
38
+ unit: "second",
39
+ direction: "none"
40
+ })).toEqual('3273932 seconds');
41
+ }),
42
+
43
+ it("works for a variety of units in the past", function() {
44
+ expect(formatter.format(-3273932, {
45
+ unit: "year"
46
+ })).toEqual('0 years ago');
47
+ expect(formatter.format(-3273932, {
48
+ unit: "month"
49
+ })).toEqual('1 month ago');
50
+ expect(formatter.format(-3273932, {
51
+ unit: "week"
52
+ })).toEqual('5 weeks ago');
53
+ expect(formatter.format(-3273932, {
54
+ unit: "day"
55
+ })).toEqual('38 days ago');
56
+ expect(formatter.format(-3273932, {
57
+ unit: "hour"
58
+ })).toEqual('909 hours ago');
59
+ expect(formatter.format(-3273932, {
60
+ unit: "minute"
61
+ })).toEqual('54566 minutes ago');
62
+ expect(formatter.format(-3273932, {
63
+ unit: "second"
64
+ })).toEqual('3273932 seconds ago');
65
+ });
66
+
67
+ it("works for a variety of units in the future", function() {
68
+ expect(formatter.format(3273932, {
69
+ unit: "year"
70
+ })).toEqual('In 0 years');
71
+ expect(formatter.format(3273932, {
72
+ unit: "month"
73
+ })).toEqual('In 1 month');
74
+ expect(formatter.format(3273932, {
75
+ unit: "week"
76
+ })).toEqual('In 5 weeks');
77
+ expect(formatter.format(3273932, {
78
+ unit: "day"
79
+ })).toEqual('In 38 days');
80
+ expect(formatter.format(3273932, {
81
+ unit: "hour"
82
+ })).toEqual('In 909 hours');
83
+ expect(formatter.format(3273932, {
84
+ unit: "minute"
85
+ })).toEqual('In 54566 minutes');
86
+ expect(formatter.format(3273932, {
87
+ unit: "second"
88
+ })).toEqual('In 3273932 seconds');
89
+ });
90
+ });
91
+ });
@@ -0,0 +1,28 @@
1
+ // Copyright 2012 Twitter, Inc
2
+ // http://www.apache.org/licenses/LICENSE-2.0
3
+
4
+ //= require '../../../build/twitter_cldr_en.js'
5
+
6
+ describe("PluralRules", function() {
7
+ describe("#all", function() {
8
+ it("returns an array of all English plural rules", function() {
9
+ expect(TwitterCldr.PluralRules.all()).toEqual(["one", "other"]);
10
+ });
11
+ });
12
+
13
+ describe("#rule_for", function() {
14
+ it("returns 'one' for the number 1", function() {
15
+ expect(TwitterCldr.PluralRules.rule_for(1)).toEqual("one");
16
+ });
17
+
18
+ it("returns 'other' for any number greater than 1", function() {
19
+ for (var i = 2; i < 10; i ++) {
20
+ expect(TwitterCldr.PluralRules.rule_for(i)).toEqual("other");
21
+ }
22
+ });
23
+
24
+ it("returns 'other' for the number 0", function() {
25
+ expect(TwitterCldr.PluralRules.rule_for(0)).toEqual("other");
26
+ });
27
+ });
28
+ });
@@ -0,0 +1,8 @@
1
+ src_files:
2
+ - "**/*.*"
3
+ spec_files:
4
+ - "**/*[Ss]pec.js"
5
+ helpers:
6
+ - helpers/**/*
7
+ src_dir: ../../build
8
+ spec_dir: ../
@@ -0,0 +1,52 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
7
+
8
+ include TwitterCldr::Js::Renderers::PluralRules
9
+
10
+ describe PluralRulesCompiler do
11
+ describe "#rule_to_js" do
12
+ it "handles a single plural rule" do
13
+ PluralRulesCompiler.rule_to_js(":other").should == 'function(n) { return "other" }'
14
+ end
15
+
16
+ it "handles a conditional plural rule (eg. English)" do
17
+ PluralRulesCompiler.rule_to_js("n == 1 ? :one : :other").should == 'function(n) { return (function() { if (n == 1) { return "one" } else { return "other" } })(); }'
18
+ end
19
+
20
+ it "handles an include? call" do
21
+ PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n)").should == "function(n) { return [2, 3, 4].indexOf(n) >= 0 }"
22
+ end
23
+
24
+ it "handles the modulus operator" do
25
+ PluralRulesCompiler.rule_to_js("n % 10").should == "function(n) { return n % 10 }"
26
+ end
27
+
28
+ it "handles < and > operators" do
29
+ PluralRulesCompiler.rule_to_js("n > 10").should == "function(n) { return n > 10 }"
30
+ PluralRulesCompiler.rule_to_js("n < 10").should == "function(n) { return n < 10 }"
31
+ end
32
+
33
+ it "handles 'and', 'or', and 'not' operators" do
34
+ PluralRulesCompiler.rule_to_js("n and n").should == "function(n) { return n && n }"
35
+ PluralRulesCompiler.rule_to_js("n or n").should == "function(n) { return n || n }"
36
+ PluralRulesCompiler.rule_to_js("not n").should == "function(n) { return !(n) }"
37
+ end
38
+
39
+ it "compounds include? and the modulus operator" do
40
+ PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n % 10)").should == "function(n) { return [2, 3, 4].indexOf(n % 10) >= 0 }"
41
+ end
42
+
43
+ it "compounds include?, modulus, and an if statement" do
44
+ PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n % 10) ? :one : :other").should == 'function(n) { return (function() { if ([2, 3, 4].indexOf(n % 10) >= 0) { return "one" } else { return "other" } })(); }'
45
+ end
46
+
47
+ it "chains two if statements (eg. Polish)" do
48
+ ruby_string = "n == 1 ? :one : [2, 3, 4].include?(n % 10) && ![12, 13, 14].include?(n % 100) && ![22, 23, 24].include?(n % 100) ? :few : :other"
49
+ PluralRulesCompiler.rule_to_js(ruby_string).should == 'function(n) { return (function() { if (n == 1) { return "one" } else { return (function() { if ([2, 3, 4].indexOf(n % 10) >= 0 && !([12, 13, 14].indexOf(n % 100) >= 0) && !([22, 23, 24].indexOf(n % 100) >= 0)) { return "few" } else { return "other" } })(); } })(); }'
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'rspec'
7
+ require 'twitter_cldr'
8
+
9
+ TwitterCldr.require_js
10
+
11
+ RSpec.configure do |config|
12
+ config.mock_with :rr
13
+ end
data/lib/twitter_cldr.rb CHANGED
@@ -23,6 +23,7 @@ module TwitterCldr
23
23
  autoload :Formatters, 'twitter_cldr/formatters'
24
24
  autoload :Collation, 'twitter_cldr/collation'
25
25
  autoload :Normalization, 'twitter_cldr/normalization'
26
+ autoload :Resources, 'twitter_cldr/resources'
26
27
  autoload :Shared, 'twitter_cldr/shared'
27
28
  autoload :Tokenizers, 'twitter_cldr/tokenizers'
28
29
  autoload :Utils, 'twitter_cldr/utils'
@@ -54,7 +55,7 @@ module TwitterCldr
54
55
  class << self
55
56
 
56
57
  def resources
57
- @resources ||= TwitterCldr::Shared::Resources.new
58
+ @resources ||= TwitterCldr::Resources::Loader.new
58
59
  end
59
60
 
60
61
  def get_locale
@@ -7,8 +7,9 @@ module TwitterCldr
7
7
  module Collation
8
8
  autoload :Collator, 'twitter_cldr/collation/collator'
9
9
  autoload :ImplicitCollationElements, 'twitter_cldr/collation/implicit_collation_elements'
10
- autoload :SortKey, 'twitter_cldr/collation/sort_key'
10
+ autoload :SortKeyBuilder, 'twitter_cldr/collation/sort_key_builder'
11
11
  autoload :Trie, 'twitter_cldr/collation/trie'
12
12
  autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
13
+ autoload :TrieWithFallback, 'twitter_cldr/collation/trie_with_fallback'
13
14
  end
14
15
  end
@@ -13,52 +13,49 @@ module TwitterCldr
13
13
 
14
14
  FRACTIONAL_UCA_SHORT_RESOURCE = 'collation/FractionalUCA_SHORT.txt'
15
15
 
16
- def sort(strings)
17
- strings.map{ |s| [s, comparison_key(s)] }.sort{ |a, b| compare_keys(a[1], b[1]) }.map(&:first)
16
+ attr_accessor :locale
17
+
18
+ def initialize(locale = nil)
19
+ @locale = TwitterCldr.convert_locale(locale) if locale
20
+ @trie = load_trie
18
21
  end
19
22
 
20
- def compare(string_a, string_b)
21
- string_a == string_b ? 0 : compare_keys(comparison_key(string_a), comparison_key(string_b))
23
+ def sort(strings)
24
+ strings.map{ |s| [s, get_sort_key(s)] }.sort{ |a, b| a[1] <=> b[1] }.map(&:first)
22
25
  end
23
26
 
24
- def sort_key(string_or_code_points)
25
- sort_key_for_code_points(get_code_points(string_or_code_points))
27
+ def sort!(strings)
28
+ sort_keys = Hash.new { |hash, string| hash[string] = get_sort_key(string) }
29
+ strings.replace(strings.sort_by { |s| sort_keys[s] })
26
30
  end
27
31
 
28
- def trie
29
- @trie ||= self.class.trie
32
+ def compare(string_a, string_b)
33
+ string_a == string_b ? 0 : get_sort_key(string_a) <=> get_sort_key(string_b)
30
34
  end
31
35
 
32
- def self.trie
33
- @trie ||= TwitterCldr::Collation::TrieBuilder.load_trie(FRACTIONAL_UCA_SHORT_RESOURCE)
36
+ def get_sort_key(string_or_code_points)
37
+ TwitterCldr::Collation::SortKeyBuilder.build(get_collation_elements(string_or_code_points))
34
38
  end
35
39
 
36
- private
40
+ def get_collation_elements(string_or_code_points)
41
+ integer_code_points = get_normalized_code_points(string_or_code_points)
37
42
 
38
- def comparison_key(string)
39
- code_points = TwitterCldr::Utils::CodePoints.from_string(string)
40
- { :code_points => code_points, :sort_key => sort_key(code_points) }
43
+ result = []
44
+ result.concat(code_point_collation_elements(integer_code_points)) until integer_code_points.empty?
45
+ result
41
46
  end
42
47
 
43
- def compare_keys(a, b)
44
- (a[:sort_key] <=> b[:sort_key]).nonzero? || get_integer_code_points(a[:code_points]) <=> get_integer_code_points(b[:code_points])
45
- end
48
+ private
46
49
 
47
- def sort_key_for_code_points(integer_code_points)
48
- TwitterCldr::Collation::SortKey.build(get_collation_elements(integer_code_points))
50
+ def load_trie
51
+ @locale ? self.class.tailored_fce_trie(@locale) : self.class.default_fce_trie
49
52
  end
50
53
 
51
54
  def get_integer_code_points(code_points)
52
55
  code_points.map { |code_point| code_point.to_i(16) }
53
56
  end
54
57
 
55
- def get_collation_elements(integer_code_points)
56
- result = []
57
- result.concat(code_point_collation_elements(integer_code_points)) until integer_code_points.empty?
58
- result
59
- end
60
-
61
- def get_code_points(str_or_code_points)
58
+ def get_normalized_code_points(str_or_code_points)
62
59
  code_points = str_or_code_points.is_a?(String) ? TwitterCldr::Utils::CodePoints.from_string(str_or_code_points) : str_or_code_points
63
60
 
64
61
  # Normalization makes the collation process significantly slower (like seven times slower on the UCA
@@ -89,7 +86,7 @@ module TwitterCldr
89
86
  #
90
87
  def explicit_collation_elements(integer_code_points)
91
88
  # find the longest prefix in the trie
92
- collation_elements, suffixes, prefix_size = trie.find_prefix(integer_code_points)
89
+ collation_elements, prefix_size, suffixes = @trie.find_prefix(integer_code_points)
93
90
 
94
91
  return unless collation_elements
95
92
 
@@ -101,9 +98,6 @@ module TwitterCldr
101
98
  used_combining_classes = {}
102
99
 
103
100
  while non_starter_pos < integer_code_points.size && !suffixes.empty?
104
- # create a trie from a hash of suffixes available for the chosen prefix
105
- subtrie = TwitterCldr::Collation::Trie.new(suffixes)
106
-
107
101
  # get next code point (possibly non-starter)
108
102
  non_starter_code_point = integer_code_points[non_starter_pos]
109
103
  combining_class = TwitterCldr::Normalization::Base.combining_class_for(non_starter_code_point.to_s(16))
@@ -115,7 +109,7 @@ module TwitterCldr
115
109
 
116
110
  # Try to find collation elements for [prefix + non-starter] code points sequence. As the subtrie contains
117
111
  # suffixes (without prefix) we pass only non-starter itself.
118
- new_collation_elements, new_suffixes = subtrie.find_prefix([non_starter_code_point]).first(2)
112
+ new_collation_elements, _, new_suffixes = suffixes.find_prefix([non_starter_code_point])
119
113
 
120
114
  if new_collation_elements
121
115
  # non-starter with a collation elements sequence corresponding to [prefix + non-starter] accepted
@@ -137,6 +131,30 @@ module TwitterCldr
137
131
  TwitterCldr::Collation::ImplicitCollationElements.for_code_point(integer_code_points.shift)
138
132
  end
139
133
 
134
+ class << self
135
+
136
+ # Loads and memoizes the default Fractional Collation Elements trie.
137
+ #
138
+ def default_fce_trie
139
+ @default_fce_trie ||= TwitterCldr::Collation::TrieBuilder.load_trie(FRACTIONAL_UCA_SHORT_RESOURCE).lock
140
+ end
141
+
142
+ def tailored_fce_trie(locale)
143
+ tailored_fce_tries_cache[locale]
144
+ end
145
+
146
+ private
147
+
148
+ def tailored_fce_tries_cache
149
+ @tailored_fce_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
150
+ end
151
+
152
+ def load_tailored_trie(locale)
153
+ TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, default_fce_trie).lock
154
+ end
155
+
156
+ end
157
+
140
158
  end
141
159
 
142
160
  end
@@ -6,15 +6,23 @@
6
6
  module TwitterCldr
7
7
  module Collation
8
8
 
9
- # SortKey builds a collation sort key from an array of collation elements.
9
+ # SortKeyBuilder builds a collation sort key from an array of collation elements.
10
10
  #
11
- class SortKey
11
+ # Weights compression algorithms for every level are described in http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
12
+ #
13
+ class SortKeyBuilder
12
14
 
13
15
  PRIMARY_LEVEL, SECONDARY_LEVEL, TERTIARY_LEVEL = 0, 1, 2
14
16
 
15
17
  LEVEL_SEPARATOR = 1 # separate levels in a sort key '01' bytes
16
18
 
17
- TERTIARY_LEVEL_MASK = 0x3F # mask for removing case bits from tertiary weight ('CC' bits in 'CC00 0000')
19
+ TERTIARY_LEVEL_MASK = 0x3F # mask for removing case bits or continuation flag from a tertiary weight
20
+
21
+ PRIMARY_BYTE_MIN = 0x3
22
+ PRIMARY_BYTE_MAX = 0xFF
23
+
24
+ MIN_NON_LATIN_PRIMARY = 0x5B
25
+ MAX_REGULAR_PRIMARY = 0x7A
18
26
 
19
27
  attr_reader :collation_elements
20
28
 
@@ -56,11 +64,30 @@ module TwitterCldr
56
64
  end
57
65
 
58
66
  def append_primary_bytes
67
+ @last_leading_byte = nil
68
+
59
69
  @collation_elements.each do |collation_element|
60
- append_weight(level_weight(collation_element, PRIMARY_LEVEL))
70
+ bytes = fixnum_to_bytes_array(level_weight(collation_element, PRIMARY_LEVEL))
71
+
72
+ unless bytes.empty?
73
+ leading_byte = bytes.shift
74
+
75
+ if leading_byte != @last_leading_byte
76
+ @bytes_array << (leading_byte < @last_leading_byte ? PRIMARY_BYTE_MIN : PRIMARY_BYTE_MAX) if @last_leading_byte
77
+ @bytes_array << leading_byte
78
+
79
+ @last_leading_byte = !bytes.empty? && compressible_primary?(leading_byte) ? leading_byte : nil
80
+ end
81
+
82
+ @bytes_array.concat(bytes)
83
+ end
61
84
  end
62
85
  end
63
86
 
87
+ def compressible_primary?(leading_byte)
88
+ (MIN_NON_LATIN_PRIMARY..MAX_REGULAR_PRIMARY).include?(leading_byte)
89
+ end
90
+
64
91
  def append_secondary_bytes
65
92
  @bytes_array << LEVEL_SEPARATOR
66
93
 
@@ -136,10 +163,6 @@ module TwitterCldr
136
163
  level_weight(collation_element, TERTIARY_LEVEL) & TERTIARY_LEVEL_MASK
137
164
  end
138
165
 
139
- def append_weight(weight)
140
- @bytes_array.concat(fixnum_to_bytes_array(weight))
141
- end
142
-
143
166
  def level_weight(collation_element, level)
144
167
  collation_element[level] || 0
145
168
  end
@@ -17,54 +17,146 @@ module TwitterCldr
17
17
  class Trie
18
18
 
19
19
  # Initializes a new trie. If `trie_hash` value is passed it's used as the initial data for the trie. Usually,
20
- # `trie_hash` is extracted from other trie and represents its sub-trie.
20
+ # `trie_hash` is extracted from other trie and represents its subtrie.
21
21
  #
22
- def initialize(trie_hash = {})
23
- @root = [nil, trie_hash]
22
+ def initialize(root = Node.new)
23
+ @root = root
24
+ @locked = false
25
+ end
26
+
27
+ def lock
28
+ @locked = true
29
+ self
30
+ end
31
+
32
+ def locked?
33
+ @locked
34
+ end
35
+
36
+ def starters
37
+ @root.keys
38
+ end
39
+
40
+ def each_starting_with(starter, &block)
41
+ starting_node = @root.child(starter)
42
+ each_pair(starting_node, [starter], &block) if starting_node
43
+ end
44
+
45
+ def empty?
46
+ !@root.has_children?
24
47
  end
25
48
 
26
49
  def add(key, value)
27
- final = key.inject(@root) do |node, key_element|
28
- node[1][key_element] ||= [nil, {}]
29
- end
50
+ store(key, value, false)
51
+ end
30
52
 
31
- final[0] = value
53
+ def set(key, value)
54
+ store(key, value)
32
55
  end
33
56
 
34
57
  def get(key)
35
58
  final = key.inject(@root) do |node, key_element|
36
- subtree = node[1][key_element]
37
- return unless subtree
38
- subtree
59
+ return unless node
60
+ node.child(key_element)
39
61
  end
40
62
 
41
- final[0]
63
+ final && final.value
42
64
  end
43
65
 
44
66
  # Finds the longest substring of the `key` that matches, as a key, a node in the trie.
45
67
  #
46
68
  # Returns a three elements array:
47
69
  #
48
- # 1. value in the last node that was visited
49
- # 2. sub-trie of this node (as a hash)
50
- # 3. size of the `key` prefix that matches this node
70
+ # 1. value in the last node that was visited and has non-nil value
71
+ # 2. size of the `key` prefix that matches this node
72
+ # 3. subtrie for which that node is a root
51
73
  #
52
74
  def find_prefix(key)
53
- prefix_size = 0
54
- node = @root
75
+ last_prefix_size = 0
76
+ last_with_value = @root
77
+
78
+ key.each_with_index.inject(@root) do |node, (key_element, index)|
79
+ child = node.child(key_element)
80
+
81
+ break unless child
82
+
83
+ if child.value
84
+ last_prefix_size = index + 1
85
+ last_with_value = child
86
+ end
87
+
88
+ child
89
+ end
90
+
91
+ [last_with_value.value, last_prefix_size, last_with_value.to_trie]
92
+ end
93
+
94
+ def to_hash
95
+ @root.subtrie_hash
96
+ end
97
+
98
+ alias inspect to_s # to prevent printing of a possibly huge children list in the IRB
99
+
100
+ private
55
101
 
56
- key.each do |key_element|
57
- subtree = node[1][key_element]
102
+ def store(key, value, override = true)
103
+ raise RuntimeError, "can't store value in a locked trie" if locked?
104
+
105
+ final = key.inject(@root) do |node, key_element|
106
+ node.child(key_element) || node.set_child(key_element, Node.new)
107
+ end
108
+
109
+ final.value = value unless final.value && !override
110
+ end
111
+
112
+ def each_pair(node, key, &block)
113
+ yield [key, node.value] if node.value
114
+
115
+ node.each_key_and_child do |key_element, child|
116
+ each_pair(child, key + [key_element], &block)
117
+ end
118
+ end
119
+
120
+ class Node
121
+
122
+ attr_accessor :value
123
+
124
+ def initialize(value = nil, children = {})
125
+ @value = value
126
+ @children = children
127
+ end
128
+
129
+ def child(key)
130
+ @children[key]
131
+ end
132
+
133
+ def set_child(key, child)
134
+ @children[key] = child
135
+ end
136
+
137
+ def has_children?
138
+ !@children.empty?
139
+ end
140
+
141
+ def each_key_and_child(&block)
142
+ @children.each(&block)
143
+ end
144
+
145
+ def keys
146
+ @children.keys
147
+ end
148
+
149
+ def to_trie
150
+ Trie.new(self.class.new(nil, @children)).lock
151
+ end
58
152
 
59
- if subtree
60
- prefix_size += 1
61
- node = subtree
62
- else
63
- break
153
+ def subtrie_hash
154
+ @children.inject({}) do |memo, (key, child)|
155
+ memo[key] = [child.value, child.subtrie_hash]
156
+ memo
64
157
  end
65
158
  end
66
159
 
67
- node + [prefix_size]
68
160
  end
69
161
 
70
162
  end