twitter_cldr 1.5.0 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +32 -0
- data/History.txt +78 -0
- data/README.md +72 -62
- data/Rakefile +22 -0
- data/js/lib/compiler.rb +40 -0
- data/js/lib/mustache/bundle.coffee +14 -0
- data/js/lib/mustache/calendars/datetime.coffee +240 -0
- data/js/lib/mustache/calendars/timespan.coffee +52 -0
- data/js/lib/mustache/plurals/rules.coffee +14 -0
- data/js/lib/renderers/base.rb +18 -0
- data/js/lib/renderers/bundle.rb +18 -0
- data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
- data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
- data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
- data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
- data/js/lib/twitter_cldr_js.rb +85 -0
- data/js/spec/js/calendars/datetime_spec.js +418 -0
- data/js/spec/js/calendars/timespan_spec.js +91 -0
- data/js/spec/js/plurals/plural_rules_spec.js +28 -0
- data/js/spec/js/support/jasmine.yml +8 -0
- data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
- data/js/spec/rb/spec_helper.rb +13 -0
- data/lib/twitter_cldr.rb +2 -1
- data/lib/twitter_cldr/collation.rb +2 -1
- data/lib/twitter_cldr/collation/collator.rb +49 -31
- data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
- data/lib/twitter_cldr/collation/trie.rb +116 -24
- data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
- data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
- data/lib/twitter_cldr/core_ext/array.rb +14 -1
- data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
- data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
- data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
- data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
- data/lib/twitter_cldr/resources.rb +11 -0
- data/lib/twitter_cldr/resources/import.rb +12 -0
- data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
- data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
- data/lib/twitter_cldr/shared.rb +0 -1
- data/lib/twitter_cldr/tokenizers/base.rb +9 -9
- data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
- data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
- data/lib/twitter_cldr/utils.rb +11 -0
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +3 -0
- data/resources/collation/tailoring/ar.yml +21 -0
- data/resources/collation/tailoring/ca.yml +9 -0
- data/resources/collation/tailoring/cs.yml +25 -0
- data/resources/collation/tailoring/da.yml +59 -0
- data/resources/collation/tailoring/de.yml +3 -0
- data/resources/collation/tailoring/el.yml +3 -0
- data/resources/collation/tailoring/en.yml +3 -0
- data/resources/collation/tailoring/es.yml +5 -0
- data/resources/collation/tailoring/eu.yml +3 -0
- data/resources/collation/tailoring/fa.yml +73 -0
- data/resources/collation/tailoring/fi.yml +61 -0
- data/resources/collation/tailoring/fil.yml +11 -0
- data/resources/collation/tailoring/fr.yml +3 -0
- data/resources/collation/tailoring/he.yml +3 -0
- data/resources/collation/tailoring/hi.yml +7 -0
- data/resources/collation/tailoring/hu.yml +125 -0
- data/resources/collation/tailoring/id.yml +3 -0
- data/resources/collation/tailoring/it.yml +3 -0
- data/resources/collation/tailoring/ja.yml +14647 -0
- data/resources/collation/tailoring/ko.yml +14953 -0
- data/resources/collation/tailoring/ms.yml +3 -0
- data/resources/collation/tailoring/nb.yml +59 -0
- data/resources/collation/tailoring/nl.yml +3 -0
- data/resources/collation/tailoring/pl.yml +37 -0
- data/resources/collation/tailoring/pt.yml +3 -0
- data/resources/collation/tailoring/ru.yml +3 -0
- data/resources/collation/tailoring/sv.yml +63 -0
- data/resources/collation/tailoring/th.yml +19 -0
- data/resources/collation/tailoring/tr.yml +27 -0
- data/resources/collation/tailoring/uk.yml +5 -0
- data/resources/collation/tailoring/ur.yml +163 -0
- data/resources/collation/tailoring/zh-Hant.yml +3 -0
- data/resources/collation/tailoring/zh.yml +149 -0
- data/resources/custom/locales/af/units.yml +19 -0
- data/resources/custom/locales/ar/units.yml +35 -0
- data/resources/custom/locales/ca/units.yml +19 -0
- data/resources/custom/locales/cs/units.yml +23 -0
- data/resources/custom/locales/da/units.yml +19 -0
- data/resources/custom/locales/de/units.yml +19 -0
- data/resources/custom/locales/el/units.yml +19 -0
- data/resources/custom/locales/en/units.yml +18 -0
- data/resources/custom/locales/es/units.yml +19 -0
- data/resources/custom/locales/eu/units.yml +19 -0
- data/resources/custom/locales/fa/units.yml +15 -0
- data/resources/custom/locales/fi/units.yml +19 -0
- data/resources/custom/locales/fil/units.yml +19 -0
- data/resources/custom/locales/fr/units.yml +19 -0
- data/resources/custom/locales/he/units.yml +19 -0
- data/resources/custom/locales/hi/units.yml +19 -0
- data/resources/custom/locales/hu/units.yml +15 -0
- data/resources/custom/locales/id/units.yml +15 -0
- data/resources/custom/locales/it/units.yml +19 -0
- data/resources/custom/locales/ja/units.yml +15 -0
- data/resources/custom/locales/ko/units.yml +15 -0
- data/resources/custom/locales/ms/units.yml +15 -0
- data/resources/custom/locales/nb/units.yml +19 -0
- data/resources/custom/locales/nl/units.yml +19 -0
- data/resources/custom/locales/pl/units.yml +23 -0
- data/resources/custom/locales/pt/units.yml +19 -0
- data/resources/custom/locales/ru/units.yml +27 -0
- data/resources/custom/locales/sv/units.yml +19 -0
- data/resources/custom/locales/th/units.yml +15 -0
- data/resources/custom/locales/tr/units.yml +15 -0
- data/resources/custom/locales/uk/units.yml +27 -0
- data/resources/custom/locales/ur/units.yml +19 -0
- data/resources/custom/locales/zh-Hant/units.yml +15 -0
- data/resources/custom/locales/zh/units.yml +15 -0
- data/resources/locales/af/units.yml +112 -65
- data/resources/locales/ar/units.yml +196 -126
- data/resources/locales/ca/units.yml +112 -70
- data/resources/locales/cs/units.yml +140 -91
- data/resources/locales/da/units.yml +98 -56
- data/resources/locales/de/units.yml +112 -70
- data/resources/locales/el/units.yml +119 -84
- data/resources/locales/en/units.yml +84 -42
- data/resources/locales/es/units.yml +112 -70
- data/resources/locales/eu/units.yml +105 -68
- data/resources/locales/fa/units.yml +98 -63
- data/resources/locales/fi/units.yml +112 -70
- data/resources/locales/fil/units.yml +98 -56
- data/resources/locales/fr/units.yml +112 -70
- data/resources/locales/he/units.yml +98 -56
- data/resources/locales/hi/units.yml +98 -56
- data/resources/locales/hu/units.yml +84 -49
- data/resources/locales/id/units.yml +84 -49
- data/resources/locales/it/units.yml +98 -56
- data/resources/locales/ja/units.yml +84 -49
- data/resources/locales/ko/units.yml +84 -49
- data/resources/locales/ms/units.yml +112 -63
- data/resources/locales/nb/units.yml +106 -64
- data/resources/locales/nl/units.yml +98 -56
- data/resources/locales/pl/units.yml +181 -112
- data/resources/locales/pt/units.yml +112 -70
- data/resources/locales/ru/units.yml +168 -112
- data/resources/locales/sv/units.yml +112 -70
- data/resources/locales/th/units.yml +84 -49
- data/resources/locales/tr/units.yml +84 -49
- data/resources/locales/uk/units.yml +168 -112
- data/resources/locales/ur/units.yml +112 -63
- data/resources/locales/zh-Hant/units.yml +84 -49
- data/resources/locales/zh/units.yml +84 -49
- data/spec/collation/collation_spec.rb +1 -1
- data/spec/collation/collator_spec.rb +120 -48
- data/spec/collation/sort_key_builder_spec.rb +80 -0
- data/spec/collation/tailoring_spec.rb +137 -0
- data/spec/collation/tailoring_tests/af.txt +321 -0
- data/spec/collation/tailoring_tests/ar.txt +188 -0
- data/spec/collation/tailoring_tests/ca.txt +446 -0
- data/spec/collation/tailoring_tests/cs.txt +273 -0
- data/spec/collation/tailoring_tests/da.txt +293 -0
- data/spec/collation/tailoring_tests/de.txt +414 -0
- data/spec/collation/tailoring_tests/el.txt +228 -0
- data/spec/collation/tailoring_tests/en.txt +399 -0
- data/spec/collation/tailoring_tests/es.txt +402 -0
- data/spec/collation/tailoring_tests/eu.txt +183 -0
- data/spec/collation/tailoring_tests/fa.txt +263 -0
- data/spec/collation/tailoring_tests/fi.txt +389 -0
- data/spec/collation/tailoring_tests/fil.txt +279 -0
- data/spec/collation/tailoring_tests/fr.txt +363 -0
- data/spec/collation/tailoring_tests/he.txt +167 -0
- data/spec/collation/tailoring_tests/hi.txt +230 -0
- data/spec/collation/tailoring_tests/hu.txt +773 -0
- data/spec/collation/tailoring_tests/id.txt +171 -0
- data/spec/collation/tailoring_tests/it.txt +231 -0
- data/spec/collation/tailoring_tests/ja.txt +4287 -0
- data/spec/collation/tailoring_tests/ko.txt +1761 -0
- data/spec/collation/tailoring_tests/ms.txt +531 -0
- data/spec/collation/tailoring_tests/nb.txt +375 -0
- data/spec/collation/tailoring_tests/nl.txt +273 -0
- data/spec/collation/tailoring_tests/pl.txt +225 -0
- data/spec/collation/tailoring_tests/pt.txt +405 -0
- data/spec/collation/tailoring_tests/ru.txt +213 -0
- data/spec/collation/tailoring_tests/sv.txt +353 -0
- data/spec/collation/tailoring_tests/th.txt +239 -0
- data/spec/collation/tailoring_tests/tr.txt +414 -0
- data/spec/collation/tailoring_tests/uk.txt +218 -0
- data/spec/collation/tailoring_tests/ur.txt +284 -0
- data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
- data/spec/collation/tailoring_tests/zh.txt +717 -0
- data/spec/collation/trie_builder_spec.rb +131 -51
- data/spec/collation/trie_spec.rb +301 -26
- data/spec/collation/trie_with_fallback_spec.rb +41 -0
- data/spec/core_ext/array_spec.rb +46 -3
- data/spec/core_ext/calendars/date_spec.rb +24 -24
- data/spec/core_ext/calendars/datetime_spec.rb +7 -0
- data/spec/core_ext/calendars/time_spec.rb +2 -2
- data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
- data/spec/formatters/plurals/rules_spec.rb +3 -11
- data/spec/readme_spec.rb +15 -15
- data/spec/resources/loader_spec.rb +94 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
- data/spec/twitter_cldr_spec.rb +3 -3
- data/spec/utils_spec.rb +38 -0
- data/twitter_cldr.gemspec +25 -0
- metadata +156 -110
- data/spec/collation/sort_key_spec.rb +0 -56
- data/spec/shared/resources_spec.rb +0 -75
@@ -6,48 +6,74 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Collation
|
8
8
|
|
9
|
-
# Builds a collation elements Trie from the file containing a fractional collation elements table.
|
9
|
+
# Builds a fractional collation elements Trie from the file containing a fractional collation elements table.
|
10
10
|
#
|
11
|
-
|
11
|
+
module TrieBuilder
|
12
12
|
|
13
|
-
|
13
|
+
# Fractional collation element regexp
|
14
|
+
FCE_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
|
14
15
|
|
15
|
-
|
16
|
-
new(file_path).build
|
17
|
-
end
|
16
|
+
class << self
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
def load_trie(resource)
|
19
|
+
parse_trie(load_resource(resource))
|
20
|
+
end
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
def load_tailored_trie(locale, fallback)
|
23
|
+
build_tailored_trie(TwitterCldr.get_resource(:collation, :tailoring, locale), fallback)
|
24
|
+
end
|
26
25
|
|
27
|
-
|
26
|
+
def parse_trie(table, trie = TwitterCldr::Collation::Trie.new)
|
27
|
+
table.lines.each do |line|
|
28
|
+
trie.set(parse_code_points($1), parse_collation_element($2)) if FCE_REGEXP =~ line
|
29
|
+
end
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
+
trie
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
31
35
|
|
32
|
-
|
33
|
-
|
36
|
+
def load_resource(resource)
|
37
|
+
open(File.join(TwitterCldr::RESOURCES_DIR, resource), 'r')
|
34
38
|
end
|
35
39
|
|
36
|
-
|
37
|
-
|
40
|
+
def parse_code_points(string)
|
41
|
+
string.split.map { |cp| cp.to_i(16) }
|
42
|
+
end
|
38
43
|
|
39
|
-
|
40
|
-
|
41
|
-
|
44
|
+
def parse_collation_element(string)
|
45
|
+
string.scan(/\[.*?\]/).map do |match|
|
46
|
+
match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
|
47
|
+
end
|
48
|
+
end
|
42
49
|
|
43
|
-
|
44
|
-
|
45
|
-
|
50
|
+
def build_tailored_trie(tailoring_data, fallback)
|
51
|
+
trie = TwitterCldr::Collation::TrieWithFallback.new(fallback)
|
52
|
+
|
53
|
+
parse_trie(tailoring_data[:tailored_table], trie)
|
54
|
+
copy_expansions(trie, fallback, parse_suppressed_starters(tailoring_data[:suppressed_contractions]))
|
46
55
|
|
47
|
-
|
48
|
-
string.scan(/\[.*?\]/).map do |match|
|
49
|
-
match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
|
56
|
+
trie
|
50
57
|
end
|
58
|
+
|
59
|
+
def copy_expansions(trie, source_trie, suppressed_starters)
|
60
|
+
suppressed_starters.each do |starter|
|
61
|
+
trie.add([starter], source_trie.get([starter]))
|
62
|
+
end
|
63
|
+
|
64
|
+
(trie.starters - suppressed_starters).each do |starter|
|
65
|
+
source_trie.each_starting_with(starter) do |key, value|
|
66
|
+
trie.add(key, value)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def parse_suppressed_starters(suppressed_contractions)
|
72
|
+
suppressed_contractions.chars.map do |starter|
|
73
|
+
TwitterCldr::Utils::CodePoints.from_string(starter).first.to_i(16)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
51
77
|
end
|
52
78
|
|
53
79
|
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Collation
|
8
|
+
|
9
|
+
# Trie that delegates all not found keys to the fallback.
|
10
|
+
#
|
11
|
+
# Note: methods #get and #find_prefix have a bit different behavior. The first one, #get, delegates to the fallback
|
12
|
+
# any key that was not found. On the other hand, #find_refix delegates the key only if none of its prefixes was
|
13
|
+
# found.
|
14
|
+
#
|
15
|
+
# E.g., if the fallback contains key [1, 2] with value '12' and the trie itself contains only key [1] with value '1'
|
16
|
+
# results will be the following:
|
17
|
+
#
|
18
|
+
# trie.get([1, 2]) #=> '12' - key [1, 2] wasn't found in the trie, so it was delegated to the fallback where the
|
19
|
+
# value '12' was found.
|
20
|
+
#
|
21
|
+
# trie.find_prefix([1, 2]) #=> ['1', 1, suffixes] - key [1, 2] is not present in the trie, but its prefix [1] was
|
22
|
+
# found, so the fallback wasn't used.
|
23
|
+
#
|
24
|
+
# trie.find_prefix([3, 2]) - the trie itself includes neither key [3, 2] nor its prefix [3], so this call is
|
25
|
+
# delegated to the fallback.
|
26
|
+
#
|
27
|
+
# This special behavior of the #find_prefix method allows 'hiding' fallback keys that contain more than one element
|
28
|
+
# by adding their one element prefixes to the trie itself. This feature is useful for some applications, e.g., for
|
29
|
+
# suppressing contractions in a tailored FCE trie.
|
30
|
+
#
|
31
|
+
class TrieWithFallback < TwitterCldr::Collation::Trie
|
32
|
+
|
33
|
+
def initialize(fallback)
|
34
|
+
super()
|
35
|
+
@fallback = fallback
|
36
|
+
end
|
37
|
+
|
38
|
+
def get(key)
|
39
|
+
super || @fallback.get(key)
|
40
|
+
end
|
41
|
+
|
42
|
+
def find_prefix(key)
|
43
|
+
value, prefix_size, suffixes = super
|
44
|
+
|
45
|
+
if prefix_size > 0
|
46
|
+
[value, prefix_size, suffixes]
|
47
|
+
else
|
48
|
+
@fallback.find_prefix(key)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
@@ -12,11 +12,24 @@ end
|
|
12
12
|
module TwitterCldr
|
13
13
|
class LocalizedArray < LocalizedObject
|
14
14
|
def code_points_to_string
|
15
|
-
TwitterCldr::Utils::CodePoints.to_string(
|
15
|
+
TwitterCldr::Utils::CodePoints.to_string(base_obj)
|
16
|
+
end
|
17
|
+
|
18
|
+
def sort
|
19
|
+
TwitterCldr::Collation::Collator.new(locale).sort(base_obj).localize
|
20
|
+
end
|
21
|
+
|
22
|
+
def sort!
|
23
|
+
TwitterCldr::Collation::Collator.new(locale).sort!(base_obj)
|
24
|
+
self
|
16
25
|
end
|
17
26
|
|
18
27
|
def formatter_const
|
19
28
|
nil
|
20
29
|
end
|
30
|
+
|
31
|
+
def to_a
|
32
|
+
@base_obj.dup
|
33
|
+
end
|
21
34
|
end
|
22
35
|
end
|
@@ -24,18 +24,24 @@ module TwitterCldr
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
+
def to_timespan(options = {})
|
28
|
+
base_time = options[:base_time] || Time.now
|
29
|
+
seconds = (self.to_time.base_obj.to_i - base_time.to_i).abs
|
30
|
+
TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale, :direction => :none))
|
31
|
+
end
|
32
|
+
|
27
33
|
def ago(options = {})
|
28
34
|
base_time = options[:base_time] || Time.now
|
29
35
|
seconds = self.to_time.base_obj.to_i - base_time.to_i
|
30
36
|
raise ArgumentError.new('Start date is after end date. Consider using "until" function.') if seconds > 0
|
31
|
-
TwitterCldr::LocalizedTimespan.new(seconds,
|
37
|
+
TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale))
|
32
38
|
end
|
33
39
|
|
34
40
|
def until(options = {})
|
35
41
|
base_time = options[:base_time] || Time.now
|
36
42
|
seconds = self.to_time.base_obj.to_i - base_time.to_i
|
37
43
|
raise ArgumentError.new('End date is before start date. Consider using "ago" function.') if seconds < 0
|
38
|
-
TwitterCldr::LocalizedTimespan.new(seconds,
|
44
|
+
TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale))
|
39
45
|
end
|
40
46
|
|
41
47
|
def to_s
|
@@ -6,13 +6,13 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
class LocalizedTimespan < LocalizedObject
|
8
8
|
|
9
|
-
def initialize(seconds,
|
10
|
-
|
11
|
-
@
|
9
|
+
def initialize(seconds, options = {})
|
10
|
+
super(seconds, options[:locale], options)
|
11
|
+
@formatter = TwitterCldr::Formatters::TimespanFormatter.new(options)
|
12
12
|
end
|
13
13
|
|
14
|
-
def to_s(
|
15
|
-
@formatter.format(@
|
14
|
+
def to_s(options = {})
|
15
|
+
@formatter.format(@base_obj, options)
|
16
16
|
end
|
17
17
|
|
18
18
|
protected
|
@@ -6,6 +6,8 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Formatters
|
8
8
|
class TimespanFormatter < Base
|
9
|
+
DEFAULT_TYPE = :default
|
10
|
+
|
9
11
|
TIME_IN_SECONDS = {
|
10
12
|
:second => 1,
|
11
13
|
:minute => 60,
|
@@ -17,21 +19,19 @@ module TwitterCldr
|
|
17
19
|
}
|
18
20
|
|
19
21
|
def initialize(options = {})
|
22
|
+
@direction = options[:direction]
|
20
23
|
@tokenizer = TwitterCldr::Tokenizers::TimespanTokenizer.new(:locale => extract_locale(options))
|
21
24
|
end
|
22
25
|
|
23
|
-
def format(seconds,
|
24
|
-
direction
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
number = calculate_time(seconds.abs, unit)
|
26
|
+
def format(seconds, options = {})
|
27
|
+
options[:direction] ||= @direction || (seconds < 0 ? :ago : :until)
|
28
|
+
options[:unit] ||= self.calculate_unit(seconds.abs)
|
29
|
+
options[:number] = calculate_time(seconds.abs, options[:unit])
|
30
|
+
options[:type] ||= DEFAULT_TYPE
|
31
31
|
|
32
|
-
tokens = @tokenizer.tokens(
|
32
|
+
tokens = @tokenizer.tokens(options)
|
33
33
|
strings = tokens.map { |token| token[:value]}
|
34
|
-
strings.join.gsub(/\{[0-9]\}/, number.to_s)
|
34
|
+
strings.join.gsub(/\{[0-9]\}/, options[:number].to_s)
|
35
35
|
end
|
36
36
|
|
37
37
|
def calculate_unit(seconds)
|
@@ -15,15 +15,13 @@ module TwitterCldr
|
|
15
15
|
end
|
16
16
|
|
17
17
|
def all_for(locale)
|
18
|
-
|
19
|
-
get_resource(locale)[locale][:i18n][:plural][:keys]
|
18
|
+
get_resource(locale)[:keys]
|
20
19
|
rescue
|
21
20
|
nil
|
22
21
|
end
|
23
22
|
|
24
23
|
def rule_for(number, locale = TwitterCldr.get_locale)
|
25
|
-
locale
|
26
|
-
get_resource(locale)[locale][:i18n][:plural][:rule].call(number)
|
24
|
+
get_resource(locale)[:rule].call(number)
|
27
25
|
rescue
|
28
26
|
:other
|
29
27
|
end
|
@@ -32,7 +30,7 @@ module TwitterCldr
|
|
32
30
|
|
33
31
|
def get_resource(locale)
|
34
32
|
locale = TwitterCldr.convert_locale(locale)
|
35
|
-
eval(TwitterCldr.get_locale_resource(locale, :plurals)[locale])
|
33
|
+
eval(TwitterCldr.get_locale_resource(locale, :plurals)[locale])[locale][:i18n][:plural]
|
36
34
|
end
|
37
35
|
|
38
36
|
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'yaml'
|
8
|
+
require 'java'
|
9
|
+
|
10
|
+
module TwitterCldr
|
11
|
+
module Resources
|
12
|
+
module Import
|
13
|
+
|
14
|
+
# This class should be used with JRuby 1.7 in 1.9 mode and ICU4J version 49.1 (available at
|
15
|
+
# http://download.icu-project.org/files/icu4j/49.1/icu4j-49_1.jar).
|
16
|
+
#
|
17
|
+
class Tailoring
|
18
|
+
|
19
|
+
SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
|
20
|
+
SIMPLE_RULES = %w[p s t i]
|
21
|
+
LEVEL_RULE_REGEXP = /^(p|s|t|i)(c?)$/
|
22
|
+
|
23
|
+
IGNORED_TAGS = %w[reset text #comment]
|
24
|
+
|
25
|
+
LAST_BYTE_MASK = 0xFF
|
26
|
+
|
27
|
+
LOCALES_MAP = {
|
28
|
+
:'zh-Hant' => :'zh_Hant',
|
29
|
+
:id => :root,
|
30
|
+
:it => :root,
|
31
|
+
:ms => :root,
|
32
|
+
:nl => :root,
|
33
|
+
:pt => :root
|
34
|
+
}
|
35
|
+
|
36
|
+
EMPTY_TAILORING_DATA = { 'tailored_table' => '', 'suppressed_contractions' => '' }
|
37
|
+
|
38
|
+
class ImportError < RuntimeError; end
|
39
|
+
|
40
|
+
# Arguments:
|
41
|
+
#
|
42
|
+
# input_path - path to a directory containing CLDR tailoring data (available at
|
43
|
+
# http://unicode.org/cldr/trac/browser/tags/release-21/common/collation/
|
44
|
+
# or as a part of CLDR release at http://cldr.unicode.org/index/downloads)
|
45
|
+
#
|
46
|
+
# output_path - output directory for imported YAML files
|
47
|
+
#
|
48
|
+
# icu4j_path - path to ICU4J jar file
|
49
|
+
#
|
50
|
+
def initialize(input_path, output_path, icu4j_path)
|
51
|
+
require icu4j_path
|
52
|
+
|
53
|
+
@input_path = input_path
|
54
|
+
@output_path = output_path
|
55
|
+
end
|
56
|
+
|
57
|
+
def import(locale)
|
58
|
+
print "Importing %8s\t--\t" % locale
|
59
|
+
|
60
|
+
if tailoring_present?(locale)
|
61
|
+
YAML.dump(tailoring_data(locale), open(resource_file_path(locale), 'w'))
|
62
|
+
puts "Done."
|
63
|
+
else
|
64
|
+
YAML.dump(EMPTY_TAILORING_DATA, open(resource_file_path(locale), 'w'))
|
65
|
+
puts "Missing (generated empty tailoring resource)."
|
66
|
+
end
|
67
|
+
rescue ImportError => e
|
68
|
+
puts "Error: #{e.message}"
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def tailoring_present?(locale)
|
74
|
+
File.file?(locale_file_path(locale))
|
75
|
+
end
|
76
|
+
|
77
|
+
def translated_locale(locale)
|
78
|
+
LOCALES_MAP.fetch(locale, locale)
|
79
|
+
end
|
80
|
+
|
81
|
+
def locale_file_path(locale)
|
82
|
+
File.join(@input_path, "#{translated_locale(locale)}.xml")
|
83
|
+
end
|
84
|
+
|
85
|
+
def resource_file_path(locale)
|
86
|
+
File.join(@output_path, "#{locale}.yml")
|
87
|
+
end
|
88
|
+
|
89
|
+
def tailoring_data(locale)
|
90
|
+
doc = Nokogiri::XML(open(locale_file_path(locale)))
|
91
|
+
collations = doc.at_xpath('//collations')
|
92
|
+
|
93
|
+
collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
|
94
|
+
aliased_locale = collation_alias && collation_alias.attr('source')
|
95
|
+
|
96
|
+
return tailoring_data(aliased_locale) if aliased_locale
|
97
|
+
|
98
|
+
standard_tailoring = collations.at_xpath('collation[@type="standard"]')
|
99
|
+
|
100
|
+
{
|
101
|
+
'tailored_table' => parse_tailorings(standard_tailoring, locale),
|
102
|
+
'suppressed_contractions' => parse_suppressed_contractions(standard_tailoring)
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def parse_tailorings(data, locale)
|
107
|
+
rules = data && data.at_xpath('rules')
|
108
|
+
|
109
|
+
return '' unless rules
|
110
|
+
|
111
|
+
collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
|
112
|
+
|
113
|
+
rules.children.map do |child|
|
114
|
+
validate_tailoring_rule(child)
|
115
|
+
|
116
|
+
if child.name =~ LEVEL_RULE_REGEXP
|
117
|
+
if $2.empty?
|
118
|
+
table_entry_for_rule(collator, child.text)
|
119
|
+
else
|
120
|
+
child.text.chars.map { |char| table_entry_for_rule(collator, char) }
|
121
|
+
end
|
122
|
+
elsif child.name == 'x'
|
123
|
+
context = ''
|
124
|
+
child.children.each_with_object([]) do |c, memo|
|
125
|
+
if SIMPLE_RULES.include?(c.name)
|
126
|
+
memo << table_entry_for_rule(collator, context + c.text)
|
127
|
+
elsif c.name == 'context'
|
128
|
+
context = c.text
|
129
|
+
elsif c.name != 'extend'
|
130
|
+
raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
|
131
|
+
end
|
132
|
+
end
|
133
|
+
else
|
134
|
+
raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
|
135
|
+
end
|
136
|
+
end.flatten.compact.join("\n")
|
137
|
+
end
|
138
|
+
|
139
|
+
def table_entry_for_rule(collator, tailored_value)
|
140
|
+
code_points = get_code_points(tailored_value)
|
141
|
+
|
142
|
+
collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
|
143
|
+
ce.map { |l| l.to_s(16).upcase }.join(', ')
|
144
|
+
end
|
145
|
+
|
146
|
+
"#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
|
147
|
+
end
|
148
|
+
|
149
|
+
def parse_suppressed_contractions(data)
|
150
|
+
return '' unless data
|
151
|
+
|
152
|
+
Array(data.xpath('suppress_contractions')).map do |contractions|
|
153
|
+
Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(contractions.text)).to_a
|
154
|
+
end.flatten.join
|
155
|
+
end
|
156
|
+
|
157
|
+
def validate_tailoring_rule(rule)
|
158
|
+
return if IGNORED_TAGS.include?(rule.name)
|
159
|
+
|
160
|
+
raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
|
161
|
+
end
|
162
|
+
|
163
|
+
def get_collation_elements(collator, string)
|
164
|
+
iter = collator.get_collation_element_iterator(string)
|
165
|
+
|
166
|
+
collation_elements = []
|
167
|
+
ce = iter.next
|
168
|
+
|
169
|
+
while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
|
170
|
+
p1 = (ce >> 24) & LAST_BYTE_MASK
|
171
|
+
p2 = (ce >> 16) & LAST_BYTE_MASK
|
172
|
+
|
173
|
+
primary = p2.zero? ? p1 : (p1 << 8) + p2
|
174
|
+
secondary = (ce >> 8) & LAST_BYTE_MASK
|
175
|
+
tertiarly = ce & LAST_BYTE_MASK
|
176
|
+
|
177
|
+
collation_elements << [primary, secondary, tertiarly]
|
178
|
+
|
179
|
+
ce = iter.next
|
180
|
+
end
|
181
|
+
|
182
|
+
collation_elements
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_code_points(string)
|
186
|
+
TwitterCldr::Normalization::NFD.normalize_code_points(TwitterCldr::Utils::CodePoints.from_string(string))
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|