twitter_cldr 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +32 -0
- data/History.txt +78 -0
- data/README.md +72 -62
- data/Rakefile +22 -0
- data/js/lib/compiler.rb +40 -0
- data/js/lib/mustache/bundle.coffee +14 -0
- data/js/lib/mustache/calendars/datetime.coffee +240 -0
- data/js/lib/mustache/calendars/timespan.coffee +52 -0
- data/js/lib/mustache/plurals/rules.coffee +14 -0
- data/js/lib/renderers/base.rb +18 -0
- data/js/lib/renderers/bundle.rb +18 -0
- data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
- data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
- data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
- data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
- data/js/lib/twitter_cldr_js.rb +85 -0
- data/js/spec/js/calendars/datetime_spec.js +418 -0
- data/js/spec/js/calendars/timespan_spec.js +91 -0
- data/js/spec/js/plurals/plural_rules_spec.js +28 -0
- data/js/spec/js/support/jasmine.yml +8 -0
- data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
- data/js/spec/rb/spec_helper.rb +13 -0
- data/lib/twitter_cldr.rb +2 -1
- data/lib/twitter_cldr/collation.rb +2 -1
- data/lib/twitter_cldr/collation/collator.rb +49 -31
- data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
- data/lib/twitter_cldr/collation/trie.rb +116 -24
- data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
- data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
- data/lib/twitter_cldr/core_ext/array.rb +14 -1
- data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
- data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
- data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
- data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
- data/lib/twitter_cldr/resources.rb +11 -0
- data/lib/twitter_cldr/resources/import.rb +12 -0
- data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
- data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
- data/lib/twitter_cldr/shared.rb +0 -1
- data/lib/twitter_cldr/tokenizers/base.rb +9 -9
- data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
- data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
- data/lib/twitter_cldr/utils.rb +11 -0
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +3 -0
- data/resources/collation/tailoring/ar.yml +21 -0
- data/resources/collation/tailoring/ca.yml +9 -0
- data/resources/collation/tailoring/cs.yml +25 -0
- data/resources/collation/tailoring/da.yml +59 -0
- data/resources/collation/tailoring/de.yml +3 -0
- data/resources/collation/tailoring/el.yml +3 -0
- data/resources/collation/tailoring/en.yml +3 -0
- data/resources/collation/tailoring/es.yml +5 -0
- data/resources/collation/tailoring/eu.yml +3 -0
- data/resources/collation/tailoring/fa.yml +73 -0
- data/resources/collation/tailoring/fi.yml +61 -0
- data/resources/collation/tailoring/fil.yml +11 -0
- data/resources/collation/tailoring/fr.yml +3 -0
- data/resources/collation/tailoring/he.yml +3 -0
- data/resources/collation/tailoring/hi.yml +7 -0
- data/resources/collation/tailoring/hu.yml +125 -0
- data/resources/collation/tailoring/id.yml +3 -0
- data/resources/collation/tailoring/it.yml +3 -0
- data/resources/collation/tailoring/ja.yml +14647 -0
- data/resources/collation/tailoring/ko.yml +14953 -0
- data/resources/collation/tailoring/ms.yml +3 -0
- data/resources/collation/tailoring/nb.yml +59 -0
- data/resources/collation/tailoring/nl.yml +3 -0
- data/resources/collation/tailoring/pl.yml +37 -0
- data/resources/collation/tailoring/pt.yml +3 -0
- data/resources/collation/tailoring/ru.yml +3 -0
- data/resources/collation/tailoring/sv.yml +63 -0
- data/resources/collation/tailoring/th.yml +19 -0
- data/resources/collation/tailoring/tr.yml +27 -0
- data/resources/collation/tailoring/uk.yml +5 -0
- data/resources/collation/tailoring/ur.yml +163 -0
- data/resources/collation/tailoring/zh-Hant.yml +3 -0
- data/resources/collation/tailoring/zh.yml +149 -0
- data/resources/custom/locales/af/units.yml +19 -0
- data/resources/custom/locales/ar/units.yml +35 -0
- data/resources/custom/locales/ca/units.yml +19 -0
- data/resources/custom/locales/cs/units.yml +23 -0
- data/resources/custom/locales/da/units.yml +19 -0
- data/resources/custom/locales/de/units.yml +19 -0
- data/resources/custom/locales/el/units.yml +19 -0
- data/resources/custom/locales/en/units.yml +18 -0
- data/resources/custom/locales/es/units.yml +19 -0
- data/resources/custom/locales/eu/units.yml +19 -0
- data/resources/custom/locales/fa/units.yml +15 -0
- data/resources/custom/locales/fi/units.yml +19 -0
- data/resources/custom/locales/fil/units.yml +19 -0
- data/resources/custom/locales/fr/units.yml +19 -0
- data/resources/custom/locales/he/units.yml +19 -0
- data/resources/custom/locales/hi/units.yml +19 -0
- data/resources/custom/locales/hu/units.yml +15 -0
- data/resources/custom/locales/id/units.yml +15 -0
- data/resources/custom/locales/it/units.yml +19 -0
- data/resources/custom/locales/ja/units.yml +15 -0
- data/resources/custom/locales/ko/units.yml +15 -0
- data/resources/custom/locales/ms/units.yml +15 -0
- data/resources/custom/locales/nb/units.yml +19 -0
- data/resources/custom/locales/nl/units.yml +19 -0
- data/resources/custom/locales/pl/units.yml +23 -0
- data/resources/custom/locales/pt/units.yml +19 -0
- data/resources/custom/locales/ru/units.yml +27 -0
- data/resources/custom/locales/sv/units.yml +19 -0
- data/resources/custom/locales/th/units.yml +15 -0
- data/resources/custom/locales/tr/units.yml +15 -0
- data/resources/custom/locales/uk/units.yml +27 -0
- data/resources/custom/locales/ur/units.yml +19 -0
- data/resources/custom/locales/zh-Hant/units.yml +15 -0
- data/resources/custom/locales/zh/units.yml +15 -0
- data/resources/locales/af/units.yml +112 -65
- data/resources/locales/ar/units.yml +196 -126
- data/resources/locales/ca/units.yml +112 -70
- data/resources/locales/cs/units.yml +140 -91
- data/resources/locales/da/units.yml +98 -56
- data/resources/locales/de/units.yml +112 -70
- data/resources/locales/el/units.yml +119 -84
- data/resources/locales/en/units.yml +84 -42
- data/resources/locales/es/units.yml +112 -70
- data/resources/locales/eu/units.yml +105 -68
- data/resources/locales/fa/units.yml +98 -63
- data/resources/locales/fi/units.yml +112 -70
- data/resources/locales/fil/units.yml +98 -56
- data/resources/locales/fr/units.yml +112 -70
- data/resources/locales/he/units.yml +98 -56
- data/resources/locales/hi/units.yml +98 -56
- data/resources/locales/hu/units.yml +84 -49
- data/resources/locales/id/units.yml +84 -49
- data/resources/locales/it/units.yml +98 -56
- data/resources/locales/ja/units.yml +84 -49
- data/resources/locales/ko/units.yml +84 -49
- data/resources/locales/ms/units.yml +112 -63
- data/resources/locales/nb/units.yml +106 -64
- data/resources/locales/nl/units.yml +98 -56
- data/resources/locales/pl/units.yml +181 -112
- data/resources/locales/pt/units.yml +112 -70
- data/resources/locales/ru/units.yml +168 -112
- data/resources/locales/sv/units.yml +112 -70
- data/resources/locales/th/units.yml +84 -49
- data/resources/locales/tr/units.yml +84 -49
- data/resources/locales/uk/units.yml +168 -112
- data/resources/locales/ur/units.yml +112 -63
- data/resources/locales/zh-Hant/units.yml +84 -49
- data/resources/locales/zh/units.yml +84 -49
- data/spec/collation/collation_spec.rb +1 -1
- data/spec/collation/collator_spec.rb +120 -48
- data/spec/collation/sort_key_builder_spec.rb +80 -0
- data/spec/collation/tailoring_spec.rb +137 -0
- data/spec/collation/tailoring_tests/af.txt +321 -0
- data/spec/collation/tailoring_tests/ar.txt +188 -0
- data/spec/collation/tailoring_tests/ca.txt +446 -0
- data/spec/collation/tailoring_tests/cs.txt +273 -0
- data/spec/collation/tailoring_tests/da.txt +293 -0
- data/spec/collation/tailoring_tests/de.txt +414 -0
- data/spec/collation/tailoring_tests/el.txt +228 -0
- data/spec/collation/tailoring_tests/en.txt +399 -0
- data/spec/collation/tailoring_tests/es.txt +402 -0
- data/spec/collation/tailoring_tests/eu.txt +183 -0
- data/spec/collation/tailoring_tests/fa.txt +263 -0
- data/spec/collation/tailoring_tests/fi.txt +389 -0
- data/spec/collation/tailoring_tests/fil.txt +279 -0
- data/spec/collation/tailoring_tests/fr.txt +363 -0
- data/spec/collation/tailoring_tests/he.txt +167 -0
- data/spec/collation/tailoring_tests/hi.txt +230 -0
- data/spec/collation/tailoring_tests/hu.txt +773 -0
- data/spec/collation/tailoring_tests/id.txt +171 -0
- data/spec/collation/tailoring_tests/it.txt +231 -0
- data/spec/collation/tailoring_tests/ja.txt +4287 -0
- data/spec/collation/tailoring_tests/ko.txt +1761 -0
- data/spec/collation/tailoring_tests/ms.txt +531 -0
- data/spec/collation/tailoring_tests/nb.txt +375 -0
- data/spec/collation/tailoring_tests/nl.txt +273 -0
- data/spec/collation/tailoring_tests/pl.txt +225 -0
- data/spec/collation/tailoring_tests/pt.txt +405 -0
- data/spec/collation/tailoring_tests/ru.txt +213 -0
- data/spec/collation/tailoring_tests/sv.txt +353 -0
- data/spec/collation/tailoring_tests/th.txt +239 -0
- data/spec/collation/tailoring_tests/tr.txt +414 -0
- data/spec/collation/tailoring_tests/uk.txt +218 -0
- data/spec/collation/tailoring_tests/ur.txt +284 -0
- data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
- data/spec/collation/tailoring_tests/zh.txt +717 -0
- data/spec/collation/trie_builder_spec.rb +131 -51
- data/spec/collation/trie_spec.rb +301 -26
- data/spec/collation/trie_with_fallback_spec.rb +41 -0
- data/spec/core_ext/array_spec.rb +46 -3
- data/spec/core_ext/calendars/date_spec.rb +24 -24
- data/spec/core_ext/calendars/datetime_spec.rb +7 -0
- data/spec/core_ext/calendars/time_spec.rb +2 -2
- data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
- data/spec/formatters/plurals/rules_spec.rb +3 -11
- data/spec/readme_spec.rb +15 -15
- data/spec/resources/loader_spec.rb +94 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
- data/spec/twitter_cldr_spec.rb +3 -3
- data/spec/utils_spec.rb +38 -0
- data/twitter_cldr.gemspec +25 -0
- metadata +156 -110
- data/spec/collation/sort_key_spec.rb +0 -56
- data/spec/shared/resources_spec.rb +0 -75
@@ -0,0 +1,91 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
//= require '../../../build/twitter_cldr_en.js'
|
5
|
+
|
6
|
+
describe("TimespanFormatter", function() {
|
7
|
+
beforeEach(function() {
|
8
|
+
formatter = new TwitterCldr.TimespanFormatter();
|
9
|
+
});
|
10
|
+
|
11
|
+
describe("#format", function() {
|
12
|
+
it("works for a variety of units for a non-directional timespan", function() {
|
13
|
+
expect(formatter.format(3273932, {
|
14
|
+
unit: "year",
|
15
|
+
direction: "none"
|
16
|
+
})).toEqual('0 years');
|
17
|
+
expect(formatter.format(3273932, {
|
18
|
+
unit: "month"
|
19
|
+
direction: "none",
|
20
|
+
})).toEqual('1 month');
|
21
|
+
expect(formatter.format(3273932, {
|
22
|
+
unit: "week",
|
23
|
+
direction: "none"
|
24
|
+
})).toEqual('5 weeks');
|
25
|
+
expect(formatter.format(3273932, {
|
26
|
+
unit: "day",
|
27
|
+
direction: "none"
|
28
|
+
})).toEqual('38 days');
|
29
|
+
expect(formatter.format(3273932, {
|
30
|
+
unit: "hour",
|
31
|
+
direction: "none"
|
32
|
+
})).toEqual('909 hours');
|
33
|
+
expect(formatter.format(3273932, {
|
34
|
+
unit: "minute",
|
35
|
+
direction: "none"
|
36
|
+
})).toEqual('54566 minutes');
|
37
|
+
expect(formatter.format(3273932, {
|
38
|
+
unit: "second",
|
39
|
+
direction: "none"
|
40
|
+
})).toEqual('3273932 seconds');
|
41
|
+
}),
|
42
|
+
|
43
|
+
it("works for a variety of units in the past", function() {
|
44
|
+
expect(formatter.format(-3273932, {
|
45
|
+
unit: "year"
|
46
|
+
})).toEqual('0 years ago');
|
47
|
+
expect(formatter.format(-3273932, {
|
48
|
+
unit: "month"
|
49
|
+
})).toEqual('1 month ago');
|
50
|
+
expect(formatter.format(-3273932, {
|
51
|
+
unit: "week"
|
52
|
+
})).toEqual('5 weeks ago');
|
53
|
+
expect(formatter.format(-3273932, {
|
54
|
+
unit: "day"
|
55
|
+
})).toEqual('38 days ago');
|
56
|
+
expect(formatter.format(-3273932, {
|
57
|
+
unit: "hour"
|
58
|
+
})).toEqual('909 hours ago');
|
59
|
+
expect(formatter.format(-3273932, {
|
60
|
+
unit: "minute"
|
61
|
+
})).toEqual('54566 minutes ago');
|
62
|
+
expect(formatter.format(-3273932, {
|
63
|
+
unit: "second"
|
64
|
+
})).toEqual('3273932 seconds ago');
|
65
|
+
});
|
66
|
+
|
67
|
+
it("works for a variety of units in the future", function() {
|
68
|
+
expect(formatter.format(3273932, {
|
69
|
+
unit: "year"
|
70
|
+
})).toEqual('In 0 years');
|
71
|
+
expect(formatter.format(3273932, {
|
72
|
+
unit: "month"
|
73
|
+
})).toEqual('In 1 month');
|
74
|
+
expect(formatter.format(3273932, {
|
75
|
+
unit: "week"
|
76
|
+
})).toEqual('In 5 weeks');
|
77
|
+
expect(formatter.format(3273932, {
|
78
|
+
unit: "day"
|
79
|
+
})).toEqual('In 38 days');
|
80
|
+
expect(formatter.format(3273932, {
|
81
|
+
unit: "hour"
|
82
|
+
})).toEqual('In 909 hours');
|
83
|
+
expect(formatter.format(3273932, {
|
84
|
+
unit: "minute"
|
85
|
+
})).toEqual('In 54566 minutes');
|
86
|
+
expect(formatter.format(3273932, {
|
87
|
+
unit: "second"
|
88
|
+
})).toEqual('In 3273932 seconds');
|
89
|
+
});
|
90
|
+
});
|
91
|
+
});
|
@@ -0,0 +1,28 @@
|
|
1
|
+
// Copyright 2012 Twitter, Inc
|
2
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
3
|
+
|
4
|
+
//= require '../../../build/twitter_cldr_en.js'
|
5
|
+
|
6
|
+
describe("PluralRules", function() {
|
7
|
+
describe("#all", function() {
|
8
|
+
it("returns an array of all English plural rules", function() {
|
9
|
+
expect(TwitterCldr.PluralRules.all()).toEqual(["one", "other"]);
|
10
|
+
});
|
11
|
+
});
|
12
|
+
|
13
|
+
describe("#rule_for", function() {
|
14
|
+
it("returns 'one' for the number 1", function() {
|
15
|
+
expect(TwitterCldr.PluralRules.rule_for(1)).toEqual("one");
|
16
|
+
});
|
17
|
+
|
18
|
+
it("returns 'other' for any number greater than 1", function() {
|
19
|
+
for (var i = 2; i < 10; i ++) {
|
20
|
+
expect(TwitterCldr.PluralRules.rule_for(i)).toEqual("other");
|
21
|
+
}
|
22
|
+
});
|
23
|
+
|
24
|
+
it("returns 'other' for the number 0", function() {
|
25
|
+
expect(TwitterCldr.PluralRules.rule_for(0)).toEqual("other");
|
26
|
+
});
|
27
|
+
});
|
28
|
+
});
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
|
7
|
+
|
8
|
+
include TwitterCldr::Js::Renderers::PluralRules
|
9
|
+
|
10
|
+
describe PluralRulesCompiler do
|
11
|
+
describe "#rule_to_js" do
|
12
|
+
it "handles a single plural rule" do
|
13
|
+
PluralRulesCompiler.rule_to_js(":other").should == 'function(n) { return "other" }'
|
14
|
+
end
|
15
|
+
|
16
|
+
it "handles a conditional plural rule (eg. English)" do
|
17
|
+
PluralRulesCompiler.rule_to_js("n == 1 ? :one : :other").should == 'function(n) { return (function() { if (n == 1) { return "one" } else { return "other" } })(); }'
|
18
|
+
end
|
19
|
+
|
20
|
+
it "handles an include? call" do
|
21
|
+
PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n)").should == "function(n) { return [2, 3, 4].indexOf(n) >= 0 }"
|
22
|
+
end
|
23
|
+
|
24
|
+
it "handles the modulus operator" do
|
25
|
+
PluralRulesCompiler.rule_to_js("n % 10").should == "function(n) { return n % 10 }"
|
26
|
+
end
|
27
|
+
|
28
|
+
it "handles < and > operators" do
|
29
|
+
PluralRulesCompiler.rule_to_js("n > 10").should == "function(n) { return n > 10 }"
|
30
|
+
PluralRulesCompiler.rule_to_js("n < 10").should == "function(n) { return n < 10 }"
|
31
|
+
end
|
32
|
+
|
33
|
+
it "handles 'and', 'or', and 'not' operators" do
|
34
|
+
PluralRulesCompiler.rule_to_js("n and n").should == "function(n) { return n && n }"
|
35
|
+
PluralRulesCompiler.rule_to_js("n or n").should == "function(n) { return n || n }"
|
36
|
+
PluralRulesCompiler.rule_to_js("not n").should == "function(n) { return !(n) }"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "compounds include? and the modulus operator" do
|
40
|
+
PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n % 10)").should == "function(n) { return [2, 3, 4].indexOf(n % 10) >= 0 }"
|
41
|
+
end
|
42
|
+
|
43
|
+
it "compounds include?, modulus, and an if statement" do
|
44
|
+
PluralRulesCompiler.rule_to_js("[2, 3, 4].include?(n % 10) ? :one : :other").should == 'function(n) { return (function() { if ([2, 3, 4].indexOf(n % 10) >= 0) { return "one" } else { return "other" } })(); }'
|
45
|
+
end
|
46
|
+
|
47
|
+
it "chains two if statements (eg. Polish)" do
|
48
|
+
ruby_string = "n == 1 ? :one : [2, 3, 4].include?(n % 10) && ![12, 13, 14].include?(n % 100) && ![22, 23, 24].include?(n % 100) ? :few : :other"
|
49
|
+
PluralRulesCompiler.rule_to_js(ruby_string).should == 'function(n) { return (function() { if (n == 1) { return "one" } else { return (function() { if ([2, 3, 4].indexOf(n % 10) >= 0 && !([12, 13, 14].indexOf(n % 100) >= 0) && !([22, 23, 24].indexOf(n % 100) >= 0)) { return "few" } else { return "other" } })(); } })(); }'
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/twitter_cldr.rb
CHANGED
@@ -23,6 +23,7 @@ module TwitterCldr
|
|
23
23
|
autoload :Formatters, 'twitter_cldr/formatters'
|
24
24
|
autoload :Collation, 'twitter_cldr/collation'
|
25
25
|
autoload :Normalization, 'twitter_cldr/normalization'
|
26
|
+
autoload :Resources, 'twitter_cldr/resources'
|
26
27
|
autoload :Shared, 'twitter_cldr/shared'
|
27
28
|
autoload :Tokenizers, 'twitter_cldr/tokenizers'
|
28
29
|
autoload :Utils, 'twitter_cldr/utils'
|
@@ -54,7 +55,7 @@ module TwitterCldr
|
|
54
55
|
class << self
|
55
56
|
|
56
57
|
def resources
|
57
|
-
@resources ||= TwitterCldr::
|
58
|
+
@resources ||= TwitterCldr::Resources::Loader.new
|
58
59
|
end
|
59
60
|
|
60
61
|
def get_locale
|
@@ -7,8 +7,9 @@ module TwitterCldr
|
|
7
7
|
module Collation
|
8
8
|
autoload :Collator, 'twitter_cldr/collation/collator'
|
9
9
|
autoload :ImplicitCollationElements, 'twitter_cldr/collation/implicit_collation_elements'
|
10
|
-
autoload :
|
10
|
+
autoload :SortKeyBuilder, 'twitter_cldr/collation/sort_key_builder'
|
11
11
|
autoload :Trie, 'twitter_cldr/collation/trie'
|
12
12
|
autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
|
13
|
+
autoload :TrieWithFallback, 'twitter_cldr/collation/trie_with_fallback'
|
13
14
|
end
|
14
15
|
end
|
@@ -13,52 +13,49 @@ module TwitterCldr
|
|
13
13
|
|
14
14
|
FRACTIONAL_UCA_SHORT_RESOURCE = 'collation/FractionalUCA_SHORT.txt'
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
attr_accessor :locale
|
17
|
+
|
18
|
+
def initialize(locale = nil)
|
19
|
+
@locale = TwitterCldr.convert_locale(locale) if locale
|
20
|
+
@trie = load_trie
|
18
21
|
end
|
19
22
|
|
20
|
-
def
|
21
|
-
|
23
|
+
def sort(strings)
|
24
|
+
strings.map{ |s| [s, get_sort_key(s)] }.sort{ |a, b| a[1] <=> b[1] }.map(&:first)
|
22
25
|
end
|
23
26
|
|
24
|
-
def
|
25
|
-
|
27
|
+
def sort!(strings)
|
28
|
+
sort_keys = Hash.new { |hash, string| hash[string] = get_sort_key(string) }
|
29
|
+
strings.replace(strings.sort_by { |s| sort_keys[s] })
|
26
30
|
end
|
27
31
|
|
28
|
-
def
|
29
|
-
|
32
|
+
def compare(string_a, string_b)
|
33
|
+
string_a == string_b ? 0 : get_sort_key(string_a) <=> get_sort_key(string_b)
|
30
34
|
end
|
31
35
|
|
32
|
-
def
|
33
|
-
|
36
|
+
def get_sort_key(string_or_code_points)
|
37
|
+
TwitterCldr::Collation::SortKeyBuilder.build(get_collation_elements(string_or_code_points))
|
34
38
|
end
|
35
39
|
|
36
|
-
|
40
|
+
def get_collation_elements(string_or_code_points)
|
41
|
+
integer_code_points = get_normalized_code_points(string_or_code_points)
|
37
42
|
|
38
|
-
|
39
|
-
|
40
|
-
|
43
|
+
result = []
|
44
|
+
result.concat(code_point_collation_elements(integer_code_points)) until integer_code_points.empty?
|
45
|
+
result
|
41
46
|
end
|
42
47
|
|
43
|
-
|
44
|
-
(a[:sort_key] <=> b[:sort_key]).nonzero? || get_integer_code_points(a[:code_points]) <=> get_integer_code_points(b[:code_points])
|
45
|
-
end
|
48
|
+
private
|
46
49
|
|
47
|
-
def
|
48
|
-
|
50
|
+
def load_trie
|
51
|
+
@locale ? self.class.tailored_fce_trie(@locale) : self.class.default_fce_trie
|
49
52
|
end
|
50
53
|
|
51
54
|
def get_integer_code_points(code_points)
|
52
55
|
code_points.map { |code_point| code_point.to_i(16) }
|
53
56
|
end
|
54
57
|
|
55
|
-
def
|
56
|
-
result = []
|
57
|
-
result.concat(code_point_collation_elements(integer_code_points)) until integer_code_points.empty?
|
58
|
-
result
|
59
|
-
end
|
60
|
-
|
61
|
-
def get_code_points(str_or_code_points)
|
58
|
+
def get_normalized_code_points(str_or_code_points)
|
62
59
|
code_points = str_or_code_points.is_a?(String) ? TwitterCldr::Utils::CodePoints.from_string(str_or_code_points) : str_or_code_points
|
63
60
|
|
64
61
|
# Normalization makes the collation process significantly slower (like seven times slower on the UCA
|
@@ -89,7 +86,7 @@ module TwitterCldr
|
|
89
86
|
#
|
90
87
|
def explicit_collation_elements(integer_code_points)
|
91
88
|
# find the longest prefix in the trie
|
92
|
-
collation_elements,
|
89
|
+
collation_elements, prefix_size, suffixes = @trie.find_prefix(integer_code_points)
|
93
90
|
|
94
91
|
return unless collation_elements
|
95
92
|
|
@@ -101,9 +98,6 @@ module TwitterCldr
|
|
101
98
|
used_combining_classes = {}
|
102
99
|
|
103
100
|
while non_starter_pos < integer_code_points.size && !suffixes.empty?
|
104
|
-
# create a trie from a hash of suffixes available for the chosen prefix
|
105
|
-
subtrie = TwitterCldr::Collation::Trie.new(suffixes)
|
106
|
-
|
107
101
|
# get next code point (possibly non-starter)
|
108
102
|
non_starter_code_point = integer_code_points[non_starter_pos]
|
109
103
|
combining_class = TwitterCldr::Normalization::Base.combining_class_for(non_starter_code_point.to_s(16))
|
@@ -115,7 +109,7 @@ module TwitterCldr
|
|
115
109
|
|
116
110
|
# Try to find collation elements for [prefix + non-starter] code points sequence. As the subtrie contains
|
117
111
|
# suffixes (without prefix) we pass only non-starter itself.
|
118
|
-
new_collation_elements, new_suffixes =
|
112
|
+
new_collation_elements, _, new_suffixes = suffixes.find_prefix([non_starter_code_point])
|
119
113
|
|
120
114
|
if new_collation_elements
|
121
115
|
# non-starter with a collation elements sequence corresponding to [prefix + non-starter] accepted
|
@@ -137,6 +131,30 @@ module TwitterCldr
|
|
137
131
|
TwitterCldr::Collation::ImplicitCollationElements.for_code_point(integer_code_points.shift)
|
138
132
|
end
|
139
133
|
|
134
|
+
class << self
|
135
|
+
|
136
|
+
# Loads and memoizes the default Fractional Collation Elements trie.
|
137
|
+
#
|
138
|
+
def default_fce_trie
|
139
|
+
@default_fce_trie ||= TwitterCldr::Collation::TrieBuilder.load_trie(FRACTIONAL_UCA_SHORT_RESOURCE).lock
|
140
|
+
end
|
141
|
+
|
142
|
+
def tailored_fce_trie(locale)
|
143
|
+
tailored_fce_tries_cache[locale]
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def tailored_fce_tries_cache
|
149
|
+
@tailored_fce_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
|
150
|
+
end
|
151
|
+
|
152
|
+
def load_tailored_trie(locale)
|
153
|
+
TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, default_fce_trie).lock
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
140
158
|
end
|
141
159
|
|
142
160
|
end
|
@@ -6,15 +6,23 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Collation
|
8
8
|
|
9
|
-
#
|
9
|
+
# SortKeyBuilder builds a collation sort key from an array of collation elements.
|
10
10
|
#
|
11
|
-
|
11
|
+
# Weights compression algorithms for every level are described in http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm
|
12
|
+
#
|
13
|
+
class SortKeyBuilder
|
12
14
|
|
13
15
|
PRIMARY_LEVEL, SECONDARY_LEVEL, TERTIARY_LEVEL = 0, 1, 2
|
14
16
|
|
15
17
|
LEVEL_SEPARATOR = 1 # separate levels in a sort key '01' bytes
|
16
18
|
|
17
|
-
TERTIARY_LEVEL_MASK = 0x3F # mask for removing case bits
|
19
|
+
TERTIARY_LEVEL_MASK = 0x3F # mask for removing case bits or continuation flag from a tertiary weight
|
20
|
+
|
21
|
+
PRIMARY_BYTE_MIN = 0x3
|
22
|
+
PRIMARY_BYTE_MAX = 0xFF
|
23
|
+
|
24
|
+
MIN_NON_LATIN_PRIMARY = 0x5B
|
25
|
+
MAX_REGULAR_PRIMARY = 0x7A
|
18
26
|
|
19
27
|
attr_reader :collation_elements
|
20
28
|
|
@@ -56,11 +64,30 @@ module TwitterCldr
|
|
56
64
|
end
|
57
65
|
|
58
66
|
def append_primary_bytes
|
67
|
+
@last_leading_byte = nil
|
68
|
+
|
59
69
|
@collation_elements.each do |collation_element|
|
60
|
-
|
70
|
+
bytes = fixnum_to_bytes_array(level_weight(collation_element, PRIMARY_LEVEL))
|
71
|
+
|
72
|
+
unless bytes.empty?
|
73
|
+
leading_byte = bytes.shift
|
74
|
+
|
75
|
+
if leading_byte != @last_leading_byte
|
76
|
+
@bytes_array << (leading_byte < @last_leading_byte ? PRIMARY_BYTE_MIN : PRIMARY_BYTE_MAX) if @last_leading_byte
|
77
|
+
@bytes_array << leading_byte
|
78
|
+
|
79
|
+
@last_leading_byte = !bytes.empty? && compressible_primary?(leading_byte) ? leading_byte : nil
|
80
|
+
end
|
81
|
+
|
82
|
+
@bytes_array.concat(bytes)
|
83
|
+
end
|
61
84
|
end
|
62
85
|
end
|
63
86
|
|
87
|
+
def compressible_primary?(leading_byte)
|
88
|
+
(MIN_NON_LATIN_PRIMARY..MAX_REGULAR_PRIMARY).include?(leading_byte)
|
89
|
+
end
|
90
|
+
|
64
91
|
def append_secondary_bytes
|
65
92
|
@bytes_array << LEVEL_SEPARATOR
|
66
93
|
|
@@ -136,10 +163,6 @@ module TwitterCldr
|
|
136
163
|
level_weight(collation_element, TERTIARY_LEVEL) & TERTIARY_LEVEL_MASK
|
137
164
|
end
|
138
165
|
|
139
|
-
def append_weight(weight)
|
140
|
-
@bytes_array.concat(fixnum_to_bytes_array(weight))
|
141
|
-
end
|
142
|
-
|
143
166
|
def level_weight(collation_element, level)
|
144
167
|
collation_element[level] || 0
|
145
168
|
end
|
@@ -17,54 +17,146 @@ module TwitterCldr
|
|
17
17
|
class Trie
|
18
18
|
|
19
19
|
# Initializes a new trie. If `trie_hash` value is passed it's used as the initial data for the trie. Usually,
|
20
|
-
# `trie_hash` is extracted from other trie and represents its
|
20
|
+
# `trie_hash` is extracted from other trie and represents its subtrie.
|
21
21
|
#
|
22
|
-
def initialize(
|
23
|
-
@root =
|
22
|
+
def initialize(root = Node.new)
|
23
|
+
@root = root
|
24
|
+
@locked = false
|
25
|
+
end
|
26
|
+
|
27
|
+
def lock
|
28
|
+
@locked = true
|
29
|
+
self
|
30
|
+
end
|
31
|
+
|
32
|
+
def locked?
|
33
|
+
@locked
|
34
|
+
end
|
35
|
+
|
36
|
+
def starters
|
37
|
+
@root.keys
|
38
|
+
end
|
39
|
+
|
40
|
+
def each_starting_with(starter, &block)
|
41
|
+
starting_node = @root.child(starter)
|
42
|
+
each_pair(starting_node, [starter], &block) if starting_node
|
43
|
+
end
|
44
|
+
|
45
|
+
def empty?
|
46
|
+
!@root.has_children?
|
24
47
|
end
|
25
48
|
|
26
49
|
def add(key, value)
|
27
|
-
|
28
|
-
|
29
|
-
end
|
50
|
+
store(key, value, false)
|
51
|
+
end
|
30
52
|
|
31
|
-
|
53
|
+
def set(key, value)
|
54
|
+
store(key, value)
|
32
55
|
end
|
33
56
|
|
34
57
|
def get(key)
|
35
58
|
final = key.inject(@root) do |node, key_element|
|
36
|
-
|
37
|
-
|
38
|
-
subtree
|
59
|
+
return unless node
|
60
|
+
node.child(key_element)
|
39
61
|
end
|
40
62
|
|
41
|
-
final
|
63
|
+
final && final.value
|
42
64
|
end
|
43
65
|
|
44
66
|
# Finds the longest substring of the `key` that matches, as a key, a node in the trie.
|
45
67
|
#
|
46
68
|
# Returns a three elements array:
|
47
69
|
#
|
48
|
-
# 1. value in the last node that was visited
|
49
|
-
# 2.
|
50
|
-
# 3.
|
70
|
+
# 1. value in the last node that was visited and has non-nil value
|
71
|
+
# 2. size of the `key` prefix that matches this node
|
72
|
+
# 3. subtrie for which that node is a root
|
51
73
|
#
|
52
74
|
def find_prefix(key)
|
53
|
-
|
54
|
-
|
75
|
+
last_prefix_size = 0
|
76
|
+
last_with_value = @root
|
77
|
+
|
78
|
+
key.each_with_index.inject(@root) do |node, (key_element, index)|
|
79
|
+
child = node.child(key_element)
|
80
|
+
|
81
|
+
break unless child
|
82
|
+
|
83
|
+
if child.value
|
84
|
+
last_prefix_size = index + 1
|
85
|
+
last_with_value = child
|
86
|
+
end
|
87
|
+
|
88
|
+
child
|
89
|
+
end
|
90
|
+
|
91
|
+
[last_with_value.value, last_prefix_size, last_with_value.to_trie]
|
92
|
+
end
|
93
|
+
|
94
|
+
def to_hash
|
95
|
+
@root.subtrie_hash
|
96
|
+
end
|
97
|
+
|
98
|
+
alias inspect to_s # to prevent printing of a possibly huge children list in the IRB
|
99
|
+
|
100
|
+
private
|
55
101
|
|
56
|
-
|
57
|
-
|
102
|
+
def store(key, value, override = true)
|
103
|
+
raise RuntimeError, "can't store value in a locked trie" if locked?
|
104
|
+
|
105
|
+
final = key.inject(@root) do |node, key_element|
|
106
|
+
node.child(key_element) || node.set_child(key_element, Node.new)
|
107
|
+
end
|
108
|
+
|
109
|
+
final.value = value unless final.value && !override
|
110
|
+
end
|
111
|
+
|
112
|
+
def each_pair(node, key, &block)
|
113
|
+
yield [key, node.value] if node.value
|
114
|
+
|
115
|
+
node.each_key_and_child do |key_element, child|
|
116
|
+
each_pair(child, key + [key_element], &block)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
class Node
|
121
|
+
|
122
|
+
attr_accessor :value
|
123
|
+
|
124
|
+
def initialize(value = nil, children = {})
|
125
|
+
@value = value
|
126
|
+
@children = children
|
127
|
+
end
|
128
|
+
|
129
|
+
def child(key)
|
130
|
+
@children[key]
|
131
|
+
end
|
132
|
+
|
133
|
+
def set_child(key, child)
|
134
|
+
@children[key] = child
|
135
|
+
end
|
136
|
+
|
137
|
+
def has_children?
|
138
|
+
!@children.empty?
|
139
|
+
end
|
140
|
+
|
141
|
+
def each_key_and_child(&block)
|
142
|
+
@children.each(&block)
|
143
|
+
end
|
144
|
+
|
145
|
+
def keys
|
146
|
+
@children.keys
|
147
|
+
end
|
148
|
+
|
149
|
+
def to_trie
|
150
|
+
Trie.new(self.class.new(nil, @children)).lock
|
151
|
+
end
|
58
152
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
break
|
153
|
+
def subtrie_hash
|
154
|
+
@children.inject({}) do |memo, (key, child)|
|
155
|
+
memo[key] = [child.value, child.subtrie_hash]
|
156
|
+
memo
|
64
157
|
end
|
65
158
|
end
|
66
159
|
|
67
|
-
node + [prefix_size]
|
68
160
|
end
|
69
161
|
|
70
162
|
end
|