twitter_cldr 1.5.0 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (203) hide show
  1. data/Gemfile +32 -0
  2. data/History.txt +78 -0
  3. data/README.md +72 -62
  4. data/Rakefile +22 -0
  5. data/js/lib/compiler.rb +40 -0
  6. data/js/lib/mustache/bundle.coffee +14 -0
  7. data/js/lib/mustache/calendars/datetime.coffee +240 -0
  8. data/js/lib/mustache/calendars/timespan.coffee +52 -0
  9. data/js/lib/mustache/plurals/rules.coffee +14 -0
  10. data/js/lib/renderers/base.rb +18 -0
  11. data/js/lib/renderers/bundle.rb +18 -0
  12. data/js/lib/renderers/calendars/datetime_renderer.rb +34 -0
  13. data/js/lib/renderers/calendars/timespan_renderer.rb +39 -0
  14. data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +89 -0
  15. data/js/lib/renderers/plurals/rules/plural_rules_renderer.rb +26 -0
  16. data/js/lib/twitter_cldr_js.rb +85 -0
  17. data/js/spec/js/calendars/datetime_spec.js +418 -0
  18. data/js/spec/js/calendars/timespan_spec.js +91 -0
  19. data/js/spec/js/plurals/plural_rules_spec.js +28 -0
  20. data/js/spec/js/support/jasmine.yml +8 -0
  21. data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +52 -0
  22. data/js/spec/rb/spec_helper.rb +13 -0
  23. data/lib/twitter_cldr.rb +2 -1
  24. data/lib/twitter_cldr/collation.rb +2 -1
  25. data/lib/twitter_cldr/collation/collator.rb +49 -31
  26. data/lib/twitter_cldr/collation/{sort_key.rb → sort_key_builder.rb} +31 -8
  27. data/lib/twitter_cldr/collation/trie.rb +116 -24
  28. data/lib/twitter_cldr/collation/trie_builder.rb +54 -28
  29. data/lib/twitter_cldr/collation/trie_with_fallback.rb +55 -0
  30. data/lib/twitter_cldr/core_ext/array.rb +14 -1
  31. data/lib/twitter_cldr/core_ext/calendars/datetime.rb +8 -2
  32. data/lib/twitter_cldr/core_ext/calendars/timespan.rb +5 -5
  33. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +10 -10
  34. data/lib/twitter_cldr/formatters/plurals/rules.rb +3 -5
  35. data/lib/twitter_cldr/resources.rb +11 -0
  36. data/lib/twitter_cldr/resources/import.rb +12 -0
  37. data/lib/twitter_cldr/resources/import/tailoring.rb +193 -0
  38. data/lib/twitter_cldr/{shared/resources.rb → resources/loader.rb} +17 -4
  39. data/lib/twitter_cldr/shared.rb +0 -1
  40. data/lib/twitter_cldr/tokenizers/base.rb +9 -9
  41. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -4
  42. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +21 -7
  43. data/lib/twitter_cldr/utils.rb +11 -0
  44. data/lib/twitter_cldr/version.rb +1 -1
  45. data/resources/collation/tailoring/af.yml +3 -0
  46. data/resources/collation/tailoring/ar.yml +21 -0
  47. data/resources/collation/tailoring/ca.yml +9 -0
  48. data/resources/collation/tailoring/cs.yml +25 -0
  49. data/resources/collation/tailoring/da.yml +59 -0
  50. data/resources/collation/tailoring/de.yml +3 -0
  51. data/resources/collation/tailoring/el.yml +3 -0
  52. data/resources/collation/tailoring/en.yml +3 -0
  53. data/resources/collation/tailoring/es.yml +5 -0
  54. data/resources/collation/tailoring/eu.yml +3 -0
  55. data/resources/collation/tailoring/fa.yml +73 -0
  56. data/resources/collation/tailoring/fi.yml +61 -0
  57. data/resources/collation/tailoring/fil.yml +11 -0
  58. data/resources/collation/tailoring/fr.yml +3 -0
  59. data/resources/collation/tailoring/he.yml +3 -0
  60. data/resources/collation/tailoring/hi.yml +7 -0
  61. data/resources/collation/tailoring/hu.yml +125 -0
  62. data/resources/collation/tailoring/id.yml +3 -0
  63. data/resources/collation/tailoring/it.yml +3 -0
  64. data/resources/collation/tailoring/ja.yml +14647 -0
  65. data/resources/collation/tailoring/ko.yml +14953 -0
  66. data/resources/collation/tailoring/ms.yml +3 -0
  67. data/resources/collation/tailoring/nb.yml +59 -0
  68. data/resources/collation/tailoring/nl.yml +3 -0
  69. data/resources/collation/tailoring/pl.yml +37 -0
  70. data/resources/collation/tailoring/pt.yml +3 -0
  71. data/resources/collation/tailoring/ru.yml +3 -0
  72. data/resources/collation/tailoring/sv.yml +63 -0
  73. data/resources/collation/tailoring/th.yml +19 -0
  74. data/resources/collation/tailoring/tr.yml +27 -0
  75. data/resources/collation/tailoring/uk.yml +5 -0
  76. data/resources/collation/tailoring/ur.yml +163 -0
  77. data/resources/collation/tailoring/zh-Hant.yml +3 -0
  78. data/resources/collation/tailoring/zh.yml +149 -0
  79. data/resources/custom/locales/af/units.yml +19 -0
  80. data/resources/custom/locales/ar/units.yml +35 -0
  81. data/resources/custom/locales/ca/units.yml +19 -0
  82. data/resources/custom/locales/cs/units.yml +23 -0
  83. data/resources/custom/locales/da/units.yml +19 -0
  84. data/resources/custom/locales/de/units.yml +19 -0
  85. data/resources/custom/locales/el/units.yml +19 -0
  86. data/resources/custom/locales/en/units.yml +18 -0
  87. data/resources/custom/locales/es/units.yml +19 -0
  88. data/resources/custom/locales/eu/units.yml +19 -0
  89. data/resources/custom/locales/fa/units.yml +15 -0
  90. data/resources/custom/locales/fi/units.yml +19 -0
  91. data/resources/custom/locales/fil/units.yml +19 -0
  92. data/resources/custom/locales/fr/units.yml +19 -0
  93. data/resources/custom/locales/he/units.yml +19 -0
  94. data/resources/custom/locales/hi/units.yml +19 -0
  95. data/resources/custom/locales/hu/units.yml +15 -0
  96. data/resources/custom/locales/id/units.yml +15 -0
  97. data/resources/custom/locales/it/units.yml +19 -0
  98. data/resources/custom/locales/ja/units.yml +15 -0
  99. data/resources/custom/locales/ko/units.yml +15 -0
  100. data/resources/custom/locales/ms/units.yml +15 -0
  101. data/resources/custom/locales/nb/units.yml +19 -0
  102. data/resources/custom/locales/nl/units.yml +19 -0
  103. data/resources/custom/locales/pl/units.yml +23 -0
  104. data/resources/custom/locales/pt/units.yml +19 -0
  105. data/resources/custom/locales/ru/units.yml +27 -0
  106. data/resources/custom/locales/sv/units.yml +19 -0
  107. data/resources/custom/locales/th/units.yml +15 -0
  108. data/resources/custom/locales/tr/units.yml +15 -0
  109. data/resources/custom/locales/uk/units.yml +27 -0
  110. data/resources/custom/locales/ur/units.yml +19 -0
  111. data/resources/custom/locales/zh-Hant/units.yml +15 -0
  112. data/resources/custom/locales/zh/units.yml +15 -0
  113. data/resources/locales/af/units.yml +112 -65
  114. data/resources/locales/ar/units.yml +196 -126
  115. data/resources/locales/ca/units.yml +112 -70
  116. data/resources/locales/cs/units.yml +140 -91
  117. data/resources/locales/da/units.yml +98 -56
  118. data/resources/locales/de/units.yml +112 -70
  119. data/resources/locales/el/units.yml +119 -84
  120. data/resources/locales/en/units.yml +84 -42
  121. data/resources/locales/es/units.yml +112 -70
  122. data/resources/locales/eu/units.yml +105 -68
  123. data/resources/locales/fa/units.yml +98 -63
  124. data/resources/locales/fi/units.yml +112 -70
  125. data/resources/locales/fil/units.yml +98 -56
  126. data/resources/locales/fr/units.yml +112 -70
  127. data/resources/locales/he/units.yml +98 -56
  128. data/resources/locales/hi/units.yml +98 -56
  129. data/resources/locales/hu/units.yml +84 -49
  130. data/resources/locales/id/units.yml +84 -49
  131. data/resources/locales/it/units.yml +98 -56
  132. data/resources/locales/ja/units.yml +84 -49
  133. data/resources/locales/ko/units.yml +84 -49
  134. data/resources/locales/ms/units.yml +112 -63
  135. data/resources/locales/nb/units.yml +106 -64
  136. data/resources/locales/nl/units.yml +98 -56
  137. data/resources/locales/pl/units.yml +181 -112
  138. data/resources/locales/pt/units.yml +112 -70
  139. data/resources/locales/ru/units.yml +168 -112
  140. data/resources/locales/sv/units.yml +112 -70
  141. data/resources/locales/th/units.yml +84 -49
  142. data/resources/locales/tr/units.yml +84 -49
  143. data/resources/locales/uk/units.yml +168 -112
  144. data/resources/locales/ur/units.yml +112 -63
  145. data/resources/locales/zh-Hant/units.yml +84 -49
  146. data/resources/locales/zh/units.yml +84 -49
  147. data/spec/collation/collation_spec.rb +1 -1
  148. data/spec/collation/collator_spec.rb +120 -48
  149. data/spec/collation/sort_key_builder_spec.rb +80 -0
  150. data/spec/collation/tailoring_spec.rb +137 -0
  151. data/spec/collation/tailoring_tests/af.txt +321 -0
  152. data/spec/collation/tailoring_tests/ar.txt +188 -0
  153. data/spec/collation/tailoring_tests/ca.txt +446 -0
  154. data/spec/collation/tailoring_tests/cs.txt +273 -0
  155. data/spec/collation/tailoring_tests/da.txt +293 -0
  156. data/spec/collation/tailoring_tests/de.txt +414 -0
  157. data/spec/collation/tailoring_tests/el.txt +228 -0
  158. data/spec/collation/tailoring_tests/en.txt +399 -0
  159. data/spec/collation/tailoring_tests/es.txt +402 -0
  160. data/spec/collation/tailoring_tests/eu.txt +183 -0
  161. data/spec/collation/tailoring_tests/fa.txt +263 -0
  162. data/spec/collation/tailoring_tests/fi.txt +389 -0
  163. data/spec/collation/tailoring_tests/fil.txt +279 -0
  164. data/spec/collation/tailoring_tests/fr.txt +363 -0
  165. data/spec/collation/tailoring_tests/he.txt +167 -0
  166. data/spec/collation/tailoring_tests/hi.txt +230 -0
  167. data/spec/collation/tailoring_tests/hu.txt +773 -0
  168. data/spec/collation/tailoring_tests/id.txt +171 -0
  169. data/spec/collation/tailoring_tests/it.txt +231 -0
  170. data/spec/collation/tailoring_tests/ja.txt +4287 -0
  171. data/spec/collation/tailoring_tests/ko.txt +1761 -0
  172. data/spec/collation/tailoring_tests/ms.txt +531 -0
  173. data/spec/collation/tailoring_tests/nb.txt +375 -0
  174. data/spec/collation/tailoring_tests/nl.txt +273 -0
  175. data/spec/collation/tailoring_tests/pl.txt +225 -0
  176. data/spec/collation/tailoring_tests/pt.txt +405 -0
  177. data/spec/collation/tailoring_tests/ru.txt +213 -0
  178. data/spec/collation/tailoring_tests/sv.txt +353 -0
  179. data/spec/collation/tailoring_tests/th.txt +239 -0
  180. data/spec/collation/tailoring_tests/tr.txt +414 -0
  181. data/spec/collation/tailoring_tests/uk.txt +218 -0
  182. data/spec/collation/tailoring_tests/ur.txt +284 -0
  183. data/spec/collation/tailoring_tests/zh-Hant.txt +626 -0
  184. data/spec/collation/tailoring_tests/zh.txt +717 -0
  185. data/spec/collation/trie_builder_spec.rb +131 -51
  186. data/spec/collation/trie_spec.rb +301 -26
  187. data/spec/collation/trie_with_fallback_spec.rb +41 -0
  188. data/spec/core_ext/array_spec.rb +46 -3
  189. data/spec/core_ext/calendars/date_spec.rb +24 -24
  190. data/spec/core_ext/calendars/datetime_spec.rb +7 -0
  191. data/spec/core_ext/calendars/time_spec.rb +2 -2
  192. data/spec/formatters/calendars/timespan_formatter_spec.rb +47 -18
  193. data/spec/formatters/plurals/rules_spec.rb +3 -11
  194. data/spec/readme_spec.rb +15 -15
  195. data/spec/resources/loader_spec.rb +94 -0
  196. data/spec/spec_helper.rb +6 -0
  197. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +1 -1
  198. data/spec/twitter_cldr_spec.rb +3 -3
  199. data/spec/utils_spec.rb +38 -0
  200. data/twitter_cldr.gemspec +25 -0
  201. metadata +156 -110
  202. data/spec/collation/sort_key_spec.rb +0 -56
  203. data/spec/shared/resources_spec.rb +0 -75
@@ -6,48 +6,74 @@
6
6
  module TwitterCldr
7
7
  module Collation
8
8
 
9
- # Builds a collation elements Trie from the file containing a fractional collation elements table.
9
+ # Builds a fractional collation elements Trie from the file containing a fractional collation elements table.
10
10
  #
11
- class TrieBuilder
11
+ module TrieBuilder
12
12
 
13
- FRACTIONAL_UCA_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
13
+ # Fractional collation element regexp
14
+ FCE_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
14
15
 
15
- def self.load_trie(file_path)
16
- new(file_path).build
17
- end
16
+ class << self
18
17
 
19
- def initialize(resource)
20
- @file_path = File.join(TwitterCldr::RESOURCES_DIR, resource)
21
- end
18
+ def load_trie(resource)
19
+ parse_trie(load_resource(resource))
20
+ end
22
21
 
23
- def build
24
- parse_trie(load_collation_elements_table)
25
- end
22
+ def load_tailored_trie(locale, fallback)
23
+ build_tailored_trie(TwitterCldr.get_resource(:collation, :tailoring, locale), fallback)
24
+ end
26
25
 
27
- private
26
+ def parse_trie(table, trie = TwitterCldr::Collation::Trie.new)
27
+ table.lines.each do |line|
28
+ trie.set(parse_code_points($1), parse_collation_element($2)) if FCE_REGEXP =~ line
29
+ end
28
30
 
29
- def parse_trie(table)
30
- trie = TwitterCldr::Collation::Trie.new
31
+ trie
32
+ end
33
+
34
+ private
31
35
 
32
- table.lines.each do |line|
33
- trie.add(parse_code_points($1), parse_collation_element($2)) if FRACTIONAL_UCA_REGEXP =~ line
36
+ def load_resource(resource)
37
+ open(File.join(TwitterCldr::RESOURCES_DIR, resource), 'r')
34
38
  end
35
39
 
36
- trie
37
- end
40
+ def parse_code_points(string)
41
+ string.split.map { |cp| cp.to_i(16) }
42
+ end
38
43
 
39
- def load_collation_elements_table
40
- open(@file_path, 'r')
41
- end
44
+ def parse_collation_element(string)
45
+ string.scan(/\[.*?\]/).map do |match|
46
+ match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
47
+ end
48
+ end
42
49
 
43
- def parse_code_points(string)
44
- string.split.map { |cp| cp.to_i(16) }
45
- end
50
+ def build_tailored_trie(tailoring_data, fallback)
51
+ trie = TwitterCldr::Collation::TrieWithFallback.new(fallback)
52
+
53
+ parse_trie(tailoring_data[:tailored_table], trie)
54
+ copy_expansions(trie, fallback, parse_suppressed_starters(tailoring_data[:suppressed_contractions]))
46
55
 
47
- def parse_collation_element(string)
48
- string.scan(/\[.*?\]/).map do |match|
49
- match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
56
+ trie
50
57
  end
58
+
59
+ def copy_expansions(trie, source_trie, suppressed_starters)
60
+ suppressed_starters.each do |starter|
61
+ trie.add([starter], source_trie.get([starter]))
62
+ end
63
+
64
+ (trie.starters - suppressed_starters).each do |starter|
65
+ source_trie.each_starting_with(starter) do |key, value|
66
+ trie.add(key, value)
67
+ end
68
+ end
69
+ end
70
+
71
+ def parse_suppressed_starters(suppressed_contractions)
72
+ suppressed_contractions.chars.map do |starter|
73
+ TwitterCldr::Utils::CodePoints.from_string(starter).first.to_i(16)
74
+ end
75
+ end
76
+
51
77
  end
52
78
 
53
79
  end
@@ -0,0 +1,55 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Collation
8
+
9
+ # Trie that delegates all not found keys to the fallback.
10
+ #
11
+ # Note: methods #get and #find_prefix have a bit different behavior. The first one, #get, delegates to the fallback
12
+ # any key that was not found. On the other hand, #find_refix delegates the key only if none of its prefixes was
13
+ # found.
14
+ #
15
+ # E.g., if the fallback contains key [1, 2] with value '12' and the trie itself contains only key [1] with value '1'
16
+ # results will be the following:
17
+ #
18
+ # trie.get([1, 2]) #=> '12' - key [1, 2] wasn't found in the trie, so it was delegated to the fallback where the
19
+ # value '12' was found.
20
+ #
21
+ # trie.find_prefix([1, 2]) #=> ['1', 1, suffixes] - key [1, 2] is not present in the trie, but its prefix [1] was
22
+ # found, so the fallback wasn't used.
23
+ #
24
+ # trie.find_prefix([3, 2]) - the trie itself includes neither key [3, 2] nor its prefix [3], so this call is
25
+ # delegated to the fallback.
26
+ #
27
+ # This special behavior of the #find_prefix method allows 'hiding' fallback keys that contain more than one element
28
+ # by adding their one element prefixes to the trie itself. This feature is useful for some applications, e.g., for
29
+ # suppressing contractions in a tailored FCE trie.
30
+ #
31
+ class TrieWithFallback < TwitterCldr::Collation::Trie
32
+
33
+ def initialize(fallback)
34
+ super()
35
+ @fallback = fallback
36
+ end
37
+
38
+ def get(key)
39
+ super || @fallback.get(key)
40
+ end
41
+
42
+ def find_prefix(key)
43
+ value, prefix_size, suffixes = super
44
+
45
+ if prefix_size > 0
46
+ [value, prefix_size, suffixes]
47
+ else
48
+ @fallback.find_prefix(key)
49
+ end
50
+ end
51
+
52
+ end
53
+
54
+ end
55
+ end
@@ -12,11 +12,24 @@ end
12
12
  module TwitterCldr
13
13
  class LocalizedArray < LocalizedObject
14
14
  def code_points_to_string
15
- TwitterCldr::Utils::CodePoints.to_string(self.base_obj)
15
+ TwitterCldr::Utils::CodePoints.to_string(base_obj)
16
+ end
17
+
18
+ def sort
19
+ TwitterCldr::Collation::Collator.new(locale).sort(base_obj).localize
20
+ end
21
+
22
+ def sort!
23
+ TwitterCldr::Collation::Collator.new(locale).sort!(base_obj)
24
+ self
16
25
  end
17
26
 
18
27
  def formatter_const
19
28
  nil
20
29
  end
30
+
31
+ def to_a
32
+ @base_obj.dup
33
+ end
21
34
  end
22
35
  end
@@ -24,18 +24,24 @@ module TwitterCldr
24
24
  end
25
25
  end
26
26
 
27
+ def to_timespan(options = {})
28
+ base_time = options[:base_time] || Time.now
29
+ seconds = (self.to_time.base_obj.to_i - base_time.to_i).abs
30
+ TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale, :direction => :none))
31
+ end
32
+
27
33
  def ago(options = {})
28
34
  base_time = options[:base_time] || Time.now
29
35
  seconds = self.to_time.base_obj.to_i - base_time.to_i
30
36
  raise ArgumentError.new('Start date is after end date. Consider using "until" function.') if seconds > 0
31
- TwitterCldr::LocalizedTimespan.new(seconds, @locale).to_s(options[:unit])
37
+ TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale))
32
38
  end
33
39
 
34
40
  def until(options = {})
35
41
  base_time = options[:base_time] || Time.now
36
42
  seconds = self.to_time.base_obj.to_i - base_time.to_i
37
43
  raise ArgumentError.new('End date is before start date. Consider using "ago" function.') if seconds < 0
38
- TwitterCldr::LocalizedTimespan.new(seconds, @locale).to_s(options[:unit])
44
+ TwitterCldr::LocalizedTimespan.new(seconds, options.merge(:locale => @locale))
39
45
  end
40
46
 
41
47
  def to_s
@@ -6,13 +6,13 @@
6
6
  module TwitterCldr
7
7
  class LocalizedTimespan < LocalizedObject
8
8
 
9
- def initialize(seconds, locale)
10
- @formatter = TwitterCldr::Formatters::TimespanFormatter.new(:locale => locale)
11
- @seconds = seconds
9
+ def initialize(seconds, options = {})
10
+ super(seconds, options[:locale], options)
11
+ @formatter = TwitterCldr::Formatters::TimespanFormatter.new(options)
12
12
  end
13
13
 
14
- def to_s(unit = :default)
15
- @formatter.format(@seconds, unit)
14
+ def to_s(options = {})
15
+ @formatter.format(@base_obj, options)
16
16
  end
17
17
 
18
18
  protected
@@ -6,6 +6,8 @@
6
6
  module TwitterCldr
7
7
  module Formatters
8
8
  class TimespanFormatter < Base
9
+ DEFAULT_TYPE = :default
10
+
9
11
  TIME_IN_SECONDS = {
10
12
  :second => 1,
11
13
  :minute => 60,
@@ -17,21 +19,19 @@ module TwitterCldr
17
19
  }
18
20
 
19
21
  def initialize(options = {})
22
+ @direction = options[:direction]
20
23
  @tokenizer = TwitterCldr::Tokenizers::TimespanTokenizer.new(:locale => extract_locale(options))
21
24
  end
22
25
 
23
- def format(seconds, unit)
24
- direction = seconds < 0 ? :ago : :until
25
-
26
- if unit.nil? || unit == :default
27
- unit = self.calculate_unit(seconds.abs)
28
- end
29
-
30
- number = calculate_time(seconds.abs, unit)
26
+ def format(seconds, options = {})
27
+ options[:direction] ||= @direction || (seconds < 0 ? :ago : :until)
28
+ options[:unit] ||= self.calculate_unit(seconds.abs)
29
+ options[:number] = calculate_time(seconds.abs, options[:unit])
30
+ options[:type] ||= DEFAULT_TYPE
31
31
 
32
- tokens = @tokenizer.tokens(:direction => direction, :unit => unit, :number => number)
32
+ tokens = @tokenizer.tokens(options)
33
33
  strings = tokens.map { |token| token[:value]}
34
- strings.join.gsub(/\{[0-9]\}/, number.to_s)
34
+ strings.join.gsub(/\{[0-9]\}/, options[:number].to_s)
35
35
  end
36
36
 
37
37
  def calculate_unit(seconds)
@@ -15,15 +15,13 @@ module TwitterCldr
15
15
  end
16
16
 
17
17
  def all_for(locale)
18
- locale = TwitterCldr.convert_locale(locale.to_sym)
19
- get_resource(locale)[locale][:i18n][:plural][:keys]
18
+ get_resource(locale)[:keys]
20
19
  rescue
21
20
  nil
22
21
  end
23
22
 
24
23
  def rule_for(number, locale = TwitterCldr.get_locale)
25
- locale = TwitterCldr.convert_locale(locale.to_sym)
26
- get_resource(locale)[locale][:i18n][:plural][:rule].call(number)
24
+ get_resource(locale)[:rule].call(number)
27
25
  rescue
28
26
  :other
29
27
  end
@@ -32,7 +30,7 @@ module TwitterCldr
32
30
 
33
31
  def get_resource(locale)
34
32
  locale = TwitterCldr.convert_locale(locale)
35
- eval(TwitterCldr.get_locale_resource(locale, :plurals)[locale])
33
+ eval(TwitterCldr.get_locale_resource(locale, :plurals)[locale])[locale][:i18n][:plural]
36
34
  end
37
35
 
38
36
  end
@@ -0,0 +1,11 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+ autoload :Loader, 'twitter_cldr/resources/loader'
9
+ autoload :Import, 'twitter_cldr/resources/import'
10
+ end
11
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+ module Import
9
+ autoload :Tailoring, 'twitter_cldr/resources/import/tailoring'
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,193 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'nokogiri'
7
+ require 'yaml'
8
+ require 'java'
9
+
10
+ module TwitterCldr
11
+ module Resources
12
+ module Import
13
+
14
+ # This class should be used with JRuby 1.7 in 1.9 mode and ICU4J version 49.1 (available at
15
+ # http://download.icu-project.org/files/icu4j/49.1/icu4j-49_1.jar).
16
+ #
17
+ class Tailoring
18
+
19
+ SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
20
+ SIMPLE_RULES = %w[p s t i]
21
+ LEVEL_RULE_REGEXP = /^(p|s|t|i)(c?)$/
22
+
23
+ IGNORED_TAGS = %w[reset text #comment]
24
+
25
+ LAST_BYTE_MASK = 0xFF
26
+
27
+ LOCALES_MAP = {
28
+ :'zh-Hant' => :'zh_Hant',
29
+ :id => :root,
30
+ :it => :root,
31
+ :ms => :root,
32
+ :nl => :root,
33
+ :pt => :root
34
+ }
35
+
36
+ EMPTY_TAILORING_DATA = { 'tailored_table' => '', 'suppressed_contractions' => '' }
37
+
38
+ class ImportError < RuntimeError; end
39
+
40
+ # Arguments:
41
+ #
42
+ # input_path - path to a directory containing CLDR tailoring data (available at
43
+ # http://unicode.org/cldr/trac/browser/tags/release-21/common/collation/
44
+ # or as a part of CLDR release at http://cldr.unicode.org/index/downloads)
45
+ #
46
+ # output_path - output directory for imported YAML files
47
+ #
48
+ # icu4j_path - path to ICU4J jar file
49
+ #
50
+ def initialize(input_path, output_path, icu4j_path)
51
+ require icu4j_path
52
+
53
+ @input_path = input_path
54
+ @output_path = output_path
55
+ end
56
+
57
+ def import(locale)
58
+ print "Importing %8s\t--\t" % locale
59
+
60
+ if tailoring_present?(locale)
61
+ YAML.dump(tailoring_data(locale), open(resource_file_path(locale), 'w'))
62
+ puts "Done."
63
+ else
64
+ YAML.dump(EMPTY_TAILORING_DATA, open(resource_file_path(locale), 'w'))
65
+ puts "Missing (generated empty tailoring resource)."
66
+ end
67
+ rescue ImportError => e
68
+ puts "Error: #{e.message}"
69
+ end
70
+
71
+ private
72
+
73
+ def tailoring_present?(locale)
74
+ File.file?(locale_file_path(locale))
75
+ end
76
+
77
+ def translated_locale(locale)
78
+ LOCALES_MAP.fetch(locale, locale)
79
+ end
80
+
81
+ def locale_file_path(locale)
82
+ File.join(@input_path, "#{translated_locale(locale)}.xml")
83
+ end
84
+
85
+ def resource_file_path(locale)
86
+ File.join(@output_path, "#{locale}.yml")
87
+ end
88
+
89
+ def tailoring_data(locale)
90
+ doc = Nokogiri::XML(open(locale_file_path(locale)))
91
+ collations = doc.at_xpath('//collations')
92
+
93
+ collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
94
+ aliased_locale = collation_alias && collation_alias.attr('source')
95
+
96
+ return tailoring_data(aliased_locale) if aliased_locale
97
+
98
+ standard_tailoring = collations.at_xpath('collation[@type="standard"]')
99
+
100
+ {
101
+ 'tailored_table' => parse_tailorings(standard_tailoring, locale),
102
+ 'suppressed_contractions' => parse_suppressed_contractions(standard_tailoring)
103
+ }
104
+ end
105
+
106
+ def parse_tailorings(data, locale)
107
+ rules = data && data.at_xpath('rules')
108
+
109
+ return '' unless rules
110
+
111
+ collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
112
+
113
+ rules.children.map do |child|
114
+ validate_tailoring_rule(child)
115
+
116
+ if child.name =~ LEVEL_RULE_REGEXP
117
+ if $2.empty?
118
+ table_entry_for_rule(collator, child.text)
119
+ else
120
+ child.text.chars.map { |char| table_entry_for_rule(collator, char) }
121
+ end
122
+ elsif child.name == 'x'
123
+ context = ''
124
+ child.children.each_with_object([]) do |c, memo|
125
+ if SIMPLE_RULES.include?(c.name)
126
+ memo << table_entry_for_rule(collator, context + c.text)
127
+ elsif c.name == 'context'
128
+ context = c.text
129
+ elsif c.name != 'extend'
130
+ raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
131
+ end
132
+ end
133
+ else
134
+ raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
135
+ end
136
+ end.flatten.compact.join("\n")
137
+ end
138
+
139
+ def table_entry_for_rule(collator, tailored_value)
140
+ code_points = get_code_points(tailored_value)
141
+
142
+ collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
143
+ ce.map { |l| l.to_s(16).upcase }.join(', ')
144
+ end
145
+
146
+ "#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
147
+ end
148
+
149
+ def parse_suppressed_contractions(data)
150
+ return '' unless data
151
+
152
+ Array(data.xpath('suppress_contractions')).map do |contractions|
153
+ Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(contractions.text)).to_a
154
+ end.flatten.join
155
+ end
156
+
157
+ def validate_tailoring_rule(rule)
158
+ return if IGNORED_TAGS.include?(rule.name)
159
+
160
+ raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
161
+ end
162
+
163
+ def get_collation_elements(collator, string)
164
+ iter = collator.get_collation_element_iterator(string)
165
+
166
+ collation_elements = []
167
+ ce = iter.next
168
+
169
+ while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
170
+ p1 = (ce >> 24) & LAST_BYTE_MASK
171
+ p2 = (ce >> 16) & LAST_BYTE_MASK
172
+
173
+ primary = p2.zero? ? p1 : (p1 << 8) + p2
174
+ secondary = (ce >> 8) & LAST_BYTE_MASK
175
+ tertiarly = ce & LAST_BYTE_MASK
176
+
177
+ collation_elements << [primary, secondary, tertiarly]
178
+
179
+ ce = iter.next
180
+ end
181
+
182
+ collation_elements
183
+ end
184
+
185
+ def get_code_points(string)
186
+ TwitterCldr::Normalization::NFD.normalize_code_points(TwitterCldr::Utils::CodePoints.from_string(string))
187
+ end
188
+
189
+ end
190
+
191
+ end
192
+ end
193
+ end