twitter_cldr 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. data/NOTICE +95 -1
  2. data/README.md +4 -4
  3. data/Rakefile +18 -28
  4. data/lib/ext/calendars/date.rb +3 -0
  5. data/lib/ext/calendars/datetime.rb +3 -0
  6. data/lib/ext/calendars/time.rb +3 -0
  7. data/lib/ext/localized_object.rb +3 -0
  8. data/lib/ext/numbers/bignum.rb +3 -0
  9. data/lib/ext/numbers/fixnum.rb +3 -0
  10. data/lib/ext/numbers/float.rb +3 -0
  11. data/lib/ext/numbers/localized_number.rb +3 -0
  12. data/lib/ext/strings/string.rb +31 -0
  13. data/lib/ext/strings/symbol.rb +3 -0
  14. data/lib/formatters/base.rb +3 -0
  15. data/lib/formatters/calendars/date_formatter.rb +3 -0
  16. data/lib/formatters/calendars/datetime_formatter.rb +3 -0
  17. data/lib/formatters/calendars/time_formatter.rb +3 -0
  18. data/lib/formatters/numbers/currency_formatter.rb +3 -0
  19. data/lib/formatters/numbers/decimal_formatter.rb +3 -0
  20. data/lib/formatters/numbers/helpers/base.rb +3 -0
  21. data/lib/formatters/numbers/helpers/fraction.rb +3 -0
  22. data/lib/formatters/numbers/helpers/integer.rb +3 -0
  23. data/lib/formatters/numbers/number_formatter.rb +3 -0
  24. data/lib/formatters/numbers/percent_formatter.rb +3 -0
  25. data/lib/formatters/plurals/plural_formatter.rb +141 -0
  26. data/lib/formatters/plurals/rules.rb +4 -1
  27. data/lib/normalizers/base.rb +17 -0
  28. data/lib/normalizers/canonical/nfd.rb +81 -0
  29. data/lib/shared/currencies.rb +4 -1
  30. data/lib/shared/languages.rb +4 -1
  31. data/lib/shared/resources.rb +8 -28
  32. data/lib/shared/timezones.rb +3 -0
  33. data/lib/shared/unicode_data.rb +44 -0
  34. data/lib/tokenizers/base.rb +3 -0
  35. data/lib/tokenizers/calendars/date_tokenizer.rb +3 -0
  36. data/lib/tokenizers/calendars/datetime_tokenizer.rb +4 -1
  37. data/lib/tokenizers/calendars/time_tokenizer.rb +3 -0
  38. data/lib/tokenizers/key_path.rb +3 -0
  39. data/lib/tokenizers/numbers/number_tokenizer.rb +4 -1
  40. data/lib/tokenizers/token.rb +3 -0
  41. data/lib/twitter_cldr.rb +52 -29
  42. data/lib/utils/interpolation.rb +105 -0
  43. data/lib/utils.rb +28 -0
  44. data/lib/version.rb +6 -1
  45. data/resources/unicode_data/aegean_numbers.yml +913 -0
  46. data/resources/unicode_data/alchemical_symbols.yml +1857 -0
  47. data/resources/unicode_data/alphabetic_presentation_forms.yml +929 -0
  48. data/resources/unicode_data/ancient_greek_musical_notation.yml +1121 -0
  49. data/resources/unicode_data/ancient_greek_numbers.yml +1201 -0
  50. data/resources/unicode_data/ancient_symbols.yml +193 -0
  51. data/resources/unicode_data/arabic.yml +4049 -0
  52. data/resources/unicode_data/arabic_extended_a.yml +625 -0
  53. data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +2289 -0
  54. data/resources/unicode_data/arabic_presentation_forms_a.yml +9777 -0
  55. data/resources/unicode_data/arabic_presentation_forms_b.yml +2257 -0
  56. data/resources/unicode_data/arabic_supplement.yml +769 -0
  57. data/resources/unicode_data/armenian.yml +1393 -0
  58. data/resources/unicode_data/arrows.yml +1793 -0
  59. data/resources/unicode_data/avestan.yml +977 -0
  60. data/resources/unicode_data/balinese.yml +1937 -0
  61. data/resources/unicode_data/bamum.yml +1409 -0
  62. data/resources/unicode_data/bamum_supplement.yml +9105 -0
  63. data/resources/unicode_data/basic_latin.yml +2049 -0
  64. data/resources/unicode_data/batak.yml +897 -0
  65. data/resources/unicode_data/bengali.yml +1473 -0
  66. data/resources/unicode_data/block_elements.yml +513 -0
  67. data/resources/unicode_data/blocks.yml +881 -0
  68. data/resources/unicode_data/bopomofo.yml +657 -0
  69. data/resources/unicode_data/bopomofo_extended.yml +433 -0
  70. data/resources/unicode_data/box_drawing.yml +2049 -0
  71. data/resources/unicode_data/brahmi.yml +1729 -0
  72. data/resources/unicode_data/braille_patterns.yml +4097 -0
  73. data/resources/unicode_data/buginese.yml +481 -0
  74. data/resources/unicode_data/buhid.yml +321 -0
  75. data/resources/unicode_data/byzantine_musical_symbols.yml +3937 -0
  76. data/resources/unicode_data/carian.yml +785 -0
  77. data/resources/unicode_data/chakma.yml +1073 -0
  78. data/resources/unicode_data/cham.yml +1329 -0
  79. data/resources/unicode_data/cherokee.yml +1361 -0
  80. data/resources/unicode_data/cjk_compatibility.yml +4097 -0
  81. data/resources/unicode_data/cjk_compatibility_forms.yml +513 -0
  82. data/resources/unicode_data/cjk_compatibility_ideographs.yml +7553 -0
  83. data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +8673 -0
  84. data/resources/unicode_data/cjk_radicals_supplement.yml +1841 -0
  85. data/resources/unicode_data/cjk_strokes.yml +577 -0
  86. data/resources/unicode_data/cjk_symbols_and_punctuation.yml +1025 -0
  87. data/resources/unicode_data/cjk_unified_ideographs.yml +33 -0
  88. data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +33 -0
  89. data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +33 -0
  90. data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +33 -0
  91. data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +33 -0
  92. data/resources/unicode_data/combining_diacritical_marks.yml +1793 -0
  93. data/resources/unicode_data/combining_diacritical_marks_for_symbols.yml +529 -0
  94. data/resources/unicode_data/combining_diacritical_marks_supplement.yml +689 -0
  95. data/resources/unicode_data/combining_half_marks.yml +113 -0
  96. data/resources/unicode_data/common_indic_number_forms.yml +161 -0
  97. data/resources/unicode_data/control_pictures.yml +625 -0
  98. data/resources/unicode_data/coptic.yml +1969 -0
  99. data/resources/unicode_data/counting_rod_numerals.yml +289 -0
  100. data/resources/unicode_data/cuneiform.yml +14065 -0
  101. data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +1649 -0
  102. data/resources/unicode_data/currency_symbols.yml +417 -0
  103. data/resources/unicode_data/cypriot_syllabary.yml +881 -0
  104. data/resources/unicode_data/cyrillic.yml +4097 -0
  105. data/resources/unicode_data/cyrillic_extended_a.yml +513 -0
  106. data/resources/unicode_data/cyrillic_extended_b.yml +1425 -0
  107. data/resources/unicode_data/cyrillic_supplement.yml +641 -0
  108. data/resources/unicode_data/deseret.yml +1281 -0
  109. data/resources/unicode_data/devanagari.yml +2033 -0
  110. data/resources/unicode_data/devanagari_extended.yml +449 -0
  111. data/resources/unicode_data/dingbats.yml +3057 -0
  112. data/resources/unicode_data/domino_tiles.yml +1601 -0
  113. data/resources/unicode_data/egyptian_hieroglyphs.yml +17137 -0
  114. data/resources/unicode_data/emoticons.yml +1217 -0
  115. data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +2737 -0
  116. data/resources/unicode_data/enclosed_alphanumerics.yml +2561 -0
  117. data/resources/unicode_data/enclosed_cjk_letters_and_months.yml +4065 -0
  118. data/resources/unicode_data/enclosed_ideographic_supplement.yml +913 -0
  119. data/resources/unicode_data/ethiopic.yml +5729 -0
  120. data/resources/unicode_data/ethiopic_extended.yml +1265 -0
  121. data/resources/unicode_data/ethiopic_extended_a.yml +513 -0
  122. data/resources/unicode_data/ethiopic_supplement.yml +417 -0
  123. data/resources/unicode_data/general_punctuation.yml +1713 -0
  124. data/resources/unicode_data/geometric_shapes.yml +1537 -0
  125. data/resources/unicode_data/georgian.yml +1409 -0
  126. data/resources/unicode_data/georgian_supplement.yml +641 -0
  127. data/resources/unicode_data/glagolitic.yml +1505 -0
  128. data/resources/unicode_data/gothic.yml +433 -0
  129. data/resources/unicode_data/greek_and_coptic.yml +2145 -0
  130. data/resources/unicode_data/greek_extended.yml +3729 -0
  131. data/resources/unicode_data/gujarati.yml +1345 -0
  132. data/resources/unicode_data/gurmukhi.yml +1265 -0
  133. data/resources/unicode_data/halfwidth_and_fullwidth_forms.yml +3601 -0
  134. data/resources/unicode_data/hangul_compatibility_jamo.yml +1505 -0
  135. data/resources/unicode_data/hangul_jamo.yml +4097 -0
  136. data/resources/unicode_data/hangul_jamo_extended_a.yml +465 -0
  137. data/resources/unicode_data/hangul_jamo_extended_b.yml +1153 -0
  138. data/resources/unicode_data/hangul_syllables.yml +33 -0
  139. data/resources/unicode_data/hanunoo.yml +369 -0
  140. data/resources/unicode_data/hebrew.yml +1393 -0
  141. data/resources/unicode_data/high_private_use_surrogates.yml +33 -0
  142. data/resources/unicode_data/high_surrogates.yml +33 -0
  143. data/resources/unicode_data/hiragana.yml +1489 -0
  144. data/resources/unicode_data/ideographic_description_characters.yml +193 -0
  145. data/resources/unicode_data/imperial_aramaic.yml +497 -0
  146. data/resources/unicode_data/inscriptional_pahlavi.yml +433 -0
  147. data/resources/unicode_data/inscriptional_parthian.yml +481 -0
  148. data/resources/unicode_data/ipa_extensions.yml +1537 -0
  149. data/resources/unicode_data/javanese.yml +1457 -0
  150. data/resources/unicode_data/kaithi.yml +1057 -0
  151. data/resources/unicode_data/kana_supplement.yml +33 -0
  152. data/resources/unicode_data/kanbun.yml +257 -0
  153. data/resources/unicode_data/kangxi_radicals.yml +3425 -0
  154. data/resources/unicode_data/kannada.yml +1377 -0
  155. data/resources/unicode_data/katakana.yml +1537 -0
  156. data/resources/unicode_data/katakana_phonetic_extensions.yml +257 -0
  157. data/resources/unicode_data/kayah_li.yml +769 -0
  158. data/resources/unicode_data/kharoshthi.yml +1041 -0
  159. data/resources/unicode_data/khmer.yml +1825 -0
  160. data/resources/unicode_data/khmer_symbols.yml +513 -0
  161. data/resources/unicode_data/lao.yml +1073 -0
  162. data/resources/unicode_data/latin_1_supplement.yml +2049 -0
  163. data/resources/unicode_data/latin_extended_a.yml +2049 -0
  164. data/resources/unicode_data/latin_extended_additional.yml +4097 -0
  165. data/resources/unicode_data/latin_extended_b.yml +3329 -0
  166. data/resources/unicode_data/latin_extended_c.yml +513 -0
  167. data/resources/unicode_data/latin_extended_d.yml +2145 -0
  168. data/resources/unicode_data/lepcha.yml +1185 -0
  169. data/resources/unicode_data/letterlike_symbols.yml +1281 -0
  170. data/resources/unicode_data/limbu.yml +1057 -0
  171. data/resources/unicode_data/linear_b_ideograms.yml +1969 -0
  172. data/resources/unicode_data/linear_b_syllabary.yml +1409 -0
  173. data/resources/unicode_data/lisu.yml +769 -0
  174. data/resources/unicode_data/low_surrogates.yml +33 -0
  175. data/resources/unicode_data/lycian.yml +465 -0
  176. data/resources/unicode_data/lydian.yml +433 -0
  177. data/resources/unicode_data/mahjong_tiles.yml +705 -0
  178. data/resources/unicode_data/malayalam.yml +1569 -0
  179. data/resources/unicode_data/mandaic.yml +465 -0
  180. data/resources/unicode_data/mathematical_alphanumeric_symbols.yml +15937 -0
  181. data/resources/unicode_data/mathematical_operators.yml +4097 -0
  182. data/resources/unicode_data/meetei_mayek.yml +897 -0
  183. data/resources/unicode_data/meetei_mayek_extensions.yml +369 -0
  184. data/resources/unicode_data/meroitic_cursive.yml +417 -0
  185. data/resources/unicode_data/meroitic_hieroglyphs.yml +513 -0
  186. data/resources/unicode_data/miao.yml +2129 -0
  187. data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +769 -0
  188. data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +2049 -0
  189. data/resources/unicode_data/miscellaneous_symbols.yml +4097 -0
  190. data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +1393 -0
  191. data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +8529 -0
  192. data/resources/unicode_data/miscellaneous_technical.yml +3905 -0
  193. data/resources/unicode_data/modifier_tone_letters.yml +513 -0
  194. data/resources/unicode_data/mongolian.yml +2497 -0
  195. data/resources/unicode_data/musical_symbols.yml +3521 -0
  196. data/resources/unicode_data/myanmar.yml +2561 -0
  197. data/resources/unicode_data/myanmar_extended_a.yml +449 -0
  198. data/resources/unicode_data/new_tai_lue.yml +1329 -0
  199. data/resources/unicode_data/nko.yml +945 -0
  200. data/resources/unicode_data/number_forms.yml +929 -0
  201. data/resources/unicode_data/ogham.yml +465 -0
  202. data/resources/unicode_data/ol_chiki.yml +769 -0
  203. data/resources/unicode_data/old_italic.yml +561 -0
  204. data/resources/unicode_data/old_persian.yml +801 -0
  205. data/resources/unicode_data/old_south_arabian.yml +513 -0
  206. data/resources/unicode_data/old_turkic.yml +1169 -0
  207. data/resources/unicode_data/optical_character_recognition.yml +177 -0
  208. data/resources/unicode_data/oriya.yml +1441 -0
  209. data/resources/unicode_data/osmanya.yml +641 -0
  210. data/resources/unicode_data/phags_pa.yml +897 -0
  211. data/resources/unicode_data/phaistos_disc.yml +737 -0
  212. data/resources/unicode_data/phoenician.yml +465 -0
  213. data/resources/unicode_data/phonetic_extensions.yml +2049 -0
  214. data/resources/unicode_data/phonetic_extensions_supplement.yml +1025 -0
  215. data/resources/unicode_data/playing_cards.yml +945 -0
  216. data/resources/unicode_data/private_use_area.yml +33 -0
  217. data/resources/unicode_data/rejang.yml +593 -0
  218. data/resources/unicode_data/rumi_numeral_symbols.yml +497 -0
  219. data/resources/unicode_data/runic.yml +1297 -0
  220. data/resources/unicode_data/samaritan.yml +977 -0
  221. data/resources/unicode_data/saurashtra.yml +1297 -0
  222. data/resources/unicode_data/sharada.yml +1329 -0
  223. data/resources/unicode_data/shavian.yml +769 -0
  224. data/resources/unicode_data/sinhala.yml +1281 -0
  225. data/resources/unicode_data/small_form_variants.yml +417 -0
  226. data/resources/unicode_data/sora_sompeng.yml +561 -0
  227. data/resources/unicode_data/spacing_modifier_letters.yml +1281 -0
  228. data/resources/unicode_data/specials.yml +81 -0
  229. data/resources/unicode_data/sundanese.yml +1025 -0
  230. data/resources/unicode_data/sundanese_supplement.yml +129 -0
  231. data/resources/unicode_data/superscripts_and_subscripts.yml +673 -0
  232. data/resources/unicode_data/supplemental_arrows_a.yml +257 -0
  233. data/resources/unicode_data/supplemental_arrows_b.yml +2049 -0
  234. data/resources/unicode_data/supplemental_mathematical_operators.yml +4097 -0
  235. data/resources/unicode_data/supplemental_punctuation.yml +961 -0
  236. data/resources/unicode_data/supplementary_private_use_area_a.yml +33 -0
  237. data/resources/unicode_data/supplementary_private_use_area_b.yml +33 -0
  238. data/resources/unicode_data/syloti_nagri.yml +705 -0
  239. data/resources/unicode_data/syriac.yml +1233 -0
  240. data/resources/unicode_data/tagalog.yml +321 -0
  241. data/resources/unicode_data/tagbanwa.yml +289 -0
  242. data/resources/unicode_data/tags.yml +1553 -0
  243. data/resources/unicode_data/tai_le.yml +561 -0
  244. data/resources/unicode_data/tai_tham.yml +2033 -0
  245. data/resources/unicode_data/tai_viet.yml +1153 -0
  246. data/resources/unicode_data/tai_xuan_jing_symbols.yml +1393 -0
  247. data/resources/unicode_data/takri.yml +1057 -0
  248. data/resources/unicode_data/tamil.yml +1153 -0
  249. data/resources/unicode_data/telugu.yml +1489 -0
  250. data/resources/unicode_data/thaana.yml +801 -0
  251. data/resources/unicode_data/thai.yml +1393 -0
  252. data/resources/unicode_data/tibetan.yml +3377 -0
  253. data/resources/unicode_data/tifinagh.yml +945 -0
  254. data/resources/unicode_data/transport_and_map_symbols.yml +1121 -0
  255. data/resources/unicode_data/ugaritic.yml +497 -0
  256. data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +10241 -0
  257. data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
  258. data/resources/unicode_data/vai.yml +4801 -0
  259. data/resources/unicode_data/variation_selectors.yml +257 -0
  260. data/resources/unicode_data/variation_selectors_supplement.yml +3841 -0
  261. data/resources/unicode_data/vedic_extensions.yml +625 -0
  262. data/resources/unicode_data/vertical_forms.yml +161 -0
  263. data/resources/unicode_data/yi_radicals.yml +881 -0
  264. data/resources/unicode_data/yi_syllables.yml +18641 -0
  265. data/resources/unicode_data/yijing_hexagram_symbols.yml +1025 -0
  266. data/spec/ext/calendars/date_spec.rb +5 -1
  267. data/spec/ext/calendars/datetime_spec.rb +5 -1
  268. data/spec/ext/calendars/time_spec.rb +5 -1
  269. data/spec/ext/numbers/bignum_spec.rb +5 -1
  270. data/spec/ext/numbers/fixnum_spec.rb +5 -1
  271. data/spec/ext/numbers/float_spec.rb +5 -1
  272. data/spec/ext/numbers/localized_number_spec.rb +5 -1
  273. data/spec/ext/strings/string_spec.rb +102 -0
  274. data/spec/ext/strings/symbol_spec.rb +5 -1
  275. data/spec/formatters/base_spec.rb +5 -1
  276. data/spec/formatters/calendars/datetime_formatter_spec.rb +5 -1
  277. data/spec/formatters/numbers/currency_formatter_spec.rb +5 -1
  278. data/spec/formatters/numbers/decimal_formatter_spec.rb +5 -1
  279. data/spec/formatters/numbers/helpers/fraction_spec.rb +5 -1
  280. data/spec/formatters/numbers/helpers/integer_spec.rb +5 -1
  281. data/spec/formatters/numbers/number_formatter_spec.rb +6 -2
  282. data/spec/formatters/numbers/percent_formatter_spec.rb +5 -1
  283. data/spec/formatters/plurals/plural_formatter_spec.rb +205 -0
  284. data/spec/formatters/plurals/rules_spec.rb +28 -28
  285. data/spec/normalizers/NormalizationTest.txt +602 -0
  286. data/spec/normalizers/base_spec.rb +16 -0
  287. data/spec/normalizers/canonical/nfd_spec.rb +50 -0
  288. data/spec/shared/currencies_spec.rb +5 -1
  289. data/spec/shared/languages_spec.rb +5 -1
  290. data/spec/shared/resources_spec.rb +5 -18
  291. data/spec/shared/unicode_data_spec.rb +51 -0
  292. data/spec/spec_helper.rb +6 -3
  293. data/spec/tokenizers/base_spec.rb +3 -0
  294. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +5 -1
  295. data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +5 -1
  296. data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -1
  297. data/spec/tokenizers/key_path_spec.rb +3 -0
  298. data/spec/tokenizers/numbers/number_tokenizer_spec.rb +5 -1
  299. data/spec/tokenizers/token_spec.rb +5 -1
  300. data/spec/twitter_cldr_spec.rb +23 -1
  301. data/spec/utils/interpolation_spec.rb +124 -0
  302. data/spec/utils_spec.rb +32 -0
  303. metadata +285 -21
@@ -0,0 +1,81 @@
1
+ # encoding: UTF-8
2
+
3
+ module TwitterCldr
4
+ module Normalizers
5
+ class NFD < Base
6
+ @@hangul_constants = {:SBase => "AC00".hex, :LBase => "1100".hex, :VBase => "1161".hex, :TBase => "11A7".hex,
7
+ :Scount => 11172, :LCount => 19, :VCount => 21, :TCount => 28, :NCount => 588, :Scount => 1172}
8
+ class << self
9
+ def normalize(string)
10
+ #Convert string to code points
11
+ code_points = string.split('').map { |char| char_to_code_point(char) }
12
+
13
+ #Normalize code points
14
+ normalized_code_points = normalize_code_points(code_points)
15
+
16
+ #Convert normalized code points back to string
17
+ normalized_code_points.map { |code_point| code_point_to_char(code_point) }.join
18
+ end
19
+
20
+ def normalize_code_points(code_points)
21
+ code_points = code_points.map { |code_point| decompose code_point }.flatten
22
+ reorder code_points
23
+ code_points
24
+ end
25
+
26
+ #Recursively replace the given code point with the values in its Decomposition_Mapping property
27
+ def decompose(code_point)
28
+ unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point)
29
+ return code_point unless unicode_data
30
+ decomposition_mapping = unicode_data.decomposition.split
31
+
32
+ # Special decomposition for Hangul syllables.
33
+ # Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
34
+ if unicode_data.name.include? 'Hangul'
35
+ sIndex = code_point.hex - @@hangul_constants[:SBase]
36
+
37
+ lIndex = sIndex / @@hangul_constants[:NCount]
38
+ vIndex = (sIndex % @@hangul_constants[:NCount]) / @@hangul_constants[:TCount]
39
+ tIndex = sIndex % @@hangul_constants[:TCount]
40
+
41
+ lPart = (@@hangul_constants[:LBase] + lIndex).to_s(16).upcase
42
+ vPart = (@@hangul_constants[:VBase] + vIndex).to_s(16).upcase
43
+ tPart = (@@hangul_constants[:TBase] + tIndex).to_s(16).upcase if tIndex > 0
44
+
45
+ [lPart, vPart, tPart].compact
46
+
47
+ #Return the code point if compatibility mapping or if no mapping exists
48
+ elsif decomposition_mapping.first =~ /<.*>/ || decomposition_mapping.empty?
49
+ code_point
50
+ else
51
+ decomposition_mapping.map do |decomposition_code_point|
52
+ decompose(decomposition_code_point)
53
+ end.flatten
54
+ end
55
+ end
56
+
57
+ #Swap any two adjacent code points A & B if ccc(A) > ccc(B) > 0
58
+ def reorder(code_points)
59
+ (code_points.size).times do
60
+ code_points.each_with_index do |cp, i|
61
+ unless i == (code_points.size - 1)
62
+ ccc_a, ccc_b = combining_class_for(cp), combining_class_for(code_points[i+1])
63
+ if (ccc_a > ccc_b) && (ccc_b > 0)
64
+ code_points[i], code_points[i+1] = code_points[i+1], code_points[i]
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+
71
+ def combining_class_for(code_point)
72
+ begin
73
+ unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point).combining_class.to_i
74
+ rescue NoMethodError
75
+ 0
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,9 +1,12 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Shared
5
8
  class Currencies
6
- @@resource = TwitterCldr.resources.resource_for("shared", "currencies")[:shared][:currencies]
9
+ @@resource = TwitterCldr.get_resource("shared", "currencies")[:shared][:currencies]
7
10
 
8
11
  class << self
9
12
  def countries
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Shared
5
8
  class Languages
@@ -42,7 +45,7 @@ module TwitterCldr
42
45
 
43
46
  def get_resource(locale)
44
47
  locale = TwitterCldr.convert_locale(locale)
45
- TwitterCldr.resources.resource_for(locale, "languages")[locale]
48
+ TwitterCldr.get_resource(locale, "languages")[locale]
46
49
  end
47
50
  end
48
51
  end
@@ -1,47 +1,27 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Shared
5
8
  class Resources
6
9
  def initialize
7
- @resources_by_locale = {}
10
+ @resources_by_locale = Hash.new do |hash, locale|
11
+ hash[locale] = Hash.new { |h, resource| h[resource] = data_for(locale, resource) }
12
+ end
8
13
  end
9
14
 
10
15
  def resource_for(locale, resource)
11
- locale = locale.to_sym
12
- unless @resources_by_locale.include?(locale)
13
- @resources_by_locale[locale] = {}
14
- end
15
-
16
- unless @resources_by_locale[locale].include?(resource)
17
- @resources_by_locale[locale][resource] = data_for(locale, resource)
18
- end
19
-
20
- @resources_by_locale[locale][resource]
16
+ @resources_by_locale[locale.to_sym][resource]
21
17
  end
22
18
 
23
19
  protected
24
20
 
25
21
  def data_for(locale, resource)
26
- deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
22
+ TwitterCldr::Utils.deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
27
23
  end
28
24
 
29
- # adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
30
- def deep_symbolize_keys(arg)
31
- case arg
32
- when Array then
33
- arg.map { |elem| deep_symbolize_keys(elem) }
34
- when Hash then
35
- Hash[
36
- arg.map do |key, value|
37
- k = key.is_a?(String) ? key.to_sym : key
38
- v = deep_symbolize_keys(value)
39
- [k, v]
40
- end]
41
- else
42
- arg
43
- end
44
- end
45
25
  end
46
26
  end
47
27
  end
@@ -1,3 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  # not yet implemented
@@ -0,0 +1,44 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Shared
8
+ class UnicodeData
9
+ Attributes = Struct.new(:code_point, :name, :category, :combining_class, :bidi_class, :decomposition,
10
+ :digit_value, :non_decimal_digit_value, :numeric_value, :bidi_mirrored, :unicode1_name,
11
+ :iso_comment, :simple_uppercase_map, :simple_lowercase_map, :simple_titlecase_map)
12
+ class << self
13
+ def for_code_point(code_point)
14
+ blocks = TwitterCldr.get_resource("unicode_data", "blocks")
15
+
16
+ #Find the target block
17
+ target = blocks.find do |block_name, range|
18
+ range.include? code_point.to_i(16)
19
+ end
20
+
21
+ if target
22
+ block_data = TwitterCldr.get_resource("unicode_data", target.first)
23
+ code_point_data = block_data.fetch(code_point.to_sym) { |code_point_sym| get_range_start(code_point_sym, block_data) }
24
+ Attributes.new(*code_point_data) if code_point_data
25
+ end
26
+ end
27
+
28
+ private
29
+ # Check if block constitutes a range. The code point beginning a range will have a name enclosed in <>, ending with 'First'
30
+ # eg: <CJK Ideograph Extension A, First>
31
+ # http://unicode.org/reports/tr44/#Code_Point_Ranges
32
+ def get_range_start(code_point, block_data)
33
+ start_code_point = block_data.keys.sort_by { |key| key.to_s.to_i(16) }.first
34
+ start_data = block_data[start_code_point].clone
35
+ if start_data[1] =~ /<.*, First>/
36
+ start_data[0] = code_point.to_s
37
+ start_data[1] = start_data[1].sub(', First', '')
38
+ start_data
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class Base
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class DateTokenizer < TwitterCldr::Tokenizers::DateTimeTokenizer
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class DateTimeTokenizer < Base
@@ -32,7 +35,7 @@ module TwitterCldr
32
35
  protected
33
36
 
34
37
  def init_resources
35
- @resource = TwitterCldr.resources.resource_for(@locale, "calendars")[TwitterCldr.convert_locale(@locale)]
38
+ @resource = TwitterCldr.get_resource(@locale, "calendars")[TwitterCldr.convert_locale(@locale)]
36
39
  end
37
40
 
38
41
  def init_placeholders
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class TimeTokenizer < TwitterCldr::Tokenizers::DateTimeTokenizer
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class KeyPath
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class NumberTokenizer < Base
@@ -40,7 +43,7 @@ module TwitterCldr
40
43
  end
41
44
 
42
45
  def init_resources
43
- @resource = TwitterCldr.resources.resource_for(@locale, "numbers")[TwitterCldr.convert_locale(@locale)]
46
+ @resource = TwitterCldr.get_resource(@locale, "numbers")[TwitterCldr.convert_locale(@locale)]
44
47
  end
45
48
 
46
49
  def pattern_for(resource)
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  module TwitterCldr
4
7
  module Tokenizers
5
8
  class Token
data/lib/twitter_cldr.rb CHANGED
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  $:.push(File.dirname(__FILE__))
4
7
 
5
8
  $KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
@@ -7,6 +10,7 @@ $KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
7
10
  require 'yaml'
8
11
  require 'date'
9
12
  require 'time'
13
+ require 'forwardable'
10
14
 
11
15
  require 'version'
12
16
 
@@ -20,12 +24,18 @@ require 'ext/numbers/bignum'
20
24
  require 'ext/numbers/fixnum'
21
25
  require 'ext/numbers/float'
22
26
  require 'ext/strings/symbol'
27
+ require 'ext/strings/string'
28
+
29
+ require 'utils'
23
30
 
24
31
  # manages access to CLDR resources (yaml files in resources dir)
25
32
  require 'shared/resources'
26
33
 
27
34
 
28
35
  module TwitterCldr
36
+
37
+ extend SingleForwardable
38
+
29
39
  DEFAULT_LOCALE = :en
30
40
  RESOURCE_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), "resources")
31
41
 
@@ -36,51 +46,59 @@ module TwitterCldr
36
46
 
37
47
  @@resources = TwitterCldr::Shared::Resources.new
38
48
 
39
- def self.get_resource_file(locale, resource)
40
- File.join(RESOURCE_DIR, self.convert_locale(locale).to_s, "#{resource}.yml")
41
- end
49
+ def_delegator :resources, :resource_for, :get_resource
42
50
 
43
- def self.resources
44
- @@resources
45
- end
51
+ class << self
46
52
 
47
- def self.get_locale
48
- if defined?(FastGettext)
49
- locale = FastGettext.locale
50
- locale = DEFAULT_LOCALE if locale.to_s.empty?
51
- else
52
- locale = DEFAULT_LOCALE
53
+ def get_resource_file(locale, resource)
54
+ File.join(RESOURCE_DIR, convert_locale(locale).to_s, "#{resource}.yml")
53
55
  end
54
56
 
55
- (self.supported_locale?(locale) ? locale : DEFAULT_LOCALE).to_sym
56
- end
57
+ def resources
58
+ @@resources
59
+ end
57
60
 
58
- def self.convert_locale(locale)
59
- locale = locale.to_sym
60
- TWITTER_LOCALE_MAP.include?(locale) ? TWITTER_LOCALE_MAP[locale] : locale
61
- end
61
+ def get_locale
62
+ if defined?(FastGettext)
63
+ locale = FastGettext.locale
64
+ locale = DEFAULT_LOCALE if locale.to_s.empty?
65
+ else
66
+ locale = DEFAULT_LOCALE
67
+ end
62
68
 
63
- def self.supported_locales
64
- unless defined?(@@supported_locales)
65
- rejectable = [:shared]
66
- @@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
67
- File.basename(file).to_sym
68
- end.reject { |file| rejectable.include?(file) }
69
+ (supported_locale?(locale) ? locale : DEFAULT_LOCALE).to_sym
69
70
  end
70
71
 
71
- @@supported_locales
72
- end
72
+ def convert_locale(locale)
73
+ locale = locale.to_sym
74
+ TWITTER_LOCALE_MAP.include?(locale) ? TWITTER_LOCALE_MAP[locale] : locale
75
+ end
76
+
77
+ def supported_locales
78
+ unless defined?(@@supported_locales)
79
+ rejectable = [:shared]
80
+ @@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
81
+ File.basename(file).to_sym
82
+ end.reject { |file| rejectable.include?(file) }
83
+ end
84
+
85
+ @@supported_locales
86
+ end
87
+
88
+ def supported_locale?(locale)
89
+ locale = locale.to_sym
90
+ supported_locales.include?(locale) || supported_locales.include?(convert_locale(locale))
91
+ end
73
92
 
74
- def self.supported_locale?(locale)
75
- locale = locale.to_sym
76
- self.supported_locales.include?(locale) || self.supported_locales.include?(self.convert_locale(locale))
77
93
  end
94
+
78
95
  end
79
96
 
80
97
 
81
98
  # other shared libraries (most access shared resource data in resources/shared)
82
99
  require 'shared/currencies'
83
100
  require 'shared/languages'
101
+ require 'shared/unicode_data'
84
102
 
85
103
  # all tokenizers
86
104
  require 'tokenizers/base'
@@ -100,9 +118,14 @@ require 'formatters/numbers/number_formatter'
100
118
  require 'formatters/numbers/decimal_formatter'
101
119
  require 'formatters/numbers/currency_formatter'
102
120
  require 'formatters/numbers/percent_formatter'
121
+ require 'formatters/plurals/plural_formatter'
103
122
  require 'formatters/plurals/rules'
104
123
 
105
124
  # formatter helpers
106
125
  require 'formatters/numbers/helpers/base'
107
126
  require 'formatters/numbers/helpers/fraction'
108
127
  require 'formatters/numbers/helpers/integer'
128
+
129
+ # all normalizers
130
+ require 'normalizers/base'
131
+ require 'normalizers/canonical/nfd'
@@ -0,0 +1,105 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ # The implementation of the TwitterCldr.interpolate method that backports String interpolation capabilities
7
+ # (originally implemented in String#% method) from Ruby 1.9 to Ruby 1.8 is heavily influenced by the
8
+ # implementation of the same feature in i18n (https://github.com/svenfuchs/i18n/blob/89ea337f48562370988421e50caa7c2fe89452c7/lib/i18n/core_ext/string/interpolate.rb)
9
+ # and gettext (https://github.com/mutoh/gettext/blob/11b8c1525ba9f00afb1942f7ebf34bec12f7558b/lib/gettext/core_ext/string.rb) gems.
10
+ #
11
+ # See NOTICE file for corresponding license agreements.
12
+
13
+
14
+ # KeyError is raised during interpolation when there is a placeholder that doesn't have corresponding key in the
15
+ # interpolation hash. KeyError is defined in 1.9. We define it for prior versions of Ruby to have the same behavior.
16
+ #
17
+ class KeyError < IndexError
18
+ def initialize(message = nil)
19
+ super(message || 'key not found')
20
+ end
21
+ end unless defined?(KeyError)
22
+
23
+
24
+ module TwitterCldr
25
+ module Utils
26
+
27
+ HASH_INTERPOLATION_REGEXP = Regexp.union(
28
+ /%\{(\w+)\}/,
29
+ /%<(\w+)>(.*?\d*\.?\d*[bBdiouxXeEfgGcps])/
30
+ )
31
+
32
+ HASH_INTERPOLATION_WITH_ESCAPE_REGEXP = Regexp.union(
33
+ /%%/,
34
+ HASH_INTERPOLATION_REGEXP
35
+ )
36
+
37
+ class << self
38
+
39
+ # Uses +string+ as a format specification and returns the result of applying it to +args+.
40
+ #
41
+ # There are three ways to use it:
42
+ #
43
+ # * Using a single argument or Array of arguments.
44
+ #
45
+ # This is the default behaviour of the String#% method. See Kernel#sprintf for more details about the format
46
+ # specification.
47
+ #
48
+ # Example:
49
+ #
50
+ # TwitterCldr::Utils.interpolate('%d %s', [1, 'message'])
51
+ # # => "1 message"
52
+ #
53
+ # * Using a Hash as an argument and unformatted, named placeholders (Ruby 1.9 syntax).
54
+ #
55
+ # When you pass a Hash as an argument and specify placeholders with %{foo} it will interpret the hash values as
56
+ # named arguments.
57
+ #
58
+ # Example:
59
+ #
60
+ # TwitterCldr::Utils.interpolate('%{firstname}, %{lastname}', :firstname => 'Masao', :lastname => 'Mutoh')
61
+ # # => "Masao Mutoh"
62
+ #
63
+ # * Using a Hash as an argument and formatted, named placeholders (Ruby 1.9 syntax).
64
+ #
65
+ # When you pass a Hash as an argument and specify placeholders with %<foo>d it will interpret the hash values
66
+ # as named arguments and format the value according to the formatting instruction appended to the closing >.
67
+ #
68
+ # Example:
69
+ #
70
+ # TwitterCldr::Utils.interpolate('%<integer>d, %<float>.1f', :integer => 10, :float => 43.4)
71
+ # # => "10, 43.3"
72
+ #
73
+ # An exception can be thrown in two cases when Ruby 1.9 interpolation syntax is used:
74
+ #
75
+ # * ArgumentError is thrown if Ruby 1.9. interpolation syntax is used in +string+, but +args+ is not a Hash;
76
+ # * KeyError is thrown if the value for one of the placeholders in +string+ is missing in +args+ hash.
77
+ #
78
+ def interpolate(string, args)
79
+ string =~ HASH_INTERPOLATION_REGEXP ? interpolate_hash(string, args) : interpolate_value_or_array(string, args)
80
+ end
81
+
82
+ private
83
+
84
+ def interpolate_hash(string, args)
85
+ raise ArgumentError.new('expected a Hash') unless args.is_a?(Hash)
86
+
87
+ string.gsub(HASH_INTERPOLATION_WITH_ESCAPE_REGEXP) do |match|
88
+ if match == '%%'
89
+ '%'
90
+ else
91
+ key = ($1 || $2).to_sym
92
+ raise KeyError unless args.has_key?(key)
93
+ $3 ? sprintf("%#{$3}", args[key]) : args[key]
94
+ end
95
+ end
96
+ end
97
+
98
+ def interpolate_value_or_array(string, args)
99
+ string.gsub(/%([{<])/, '%%\1') % args
100
+ end
101
+
102
+ end
103
+
104
+ end
105
+ end
data/lib/utils.rb ADDED
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'utils/interpolation'
7
+
8
+ module TwitterCldr
9
+ module Utils
10
+
11
+ class << self
12
+
13
+ # adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
14
+ def deep_symbolize_keys(arg)
15
+ case arg
16
+ when Array
17
+ arg.map { |elem| deep_symbolize_keys(elem) }
18
+ when Hash
19
+ Hash[arg.map { |k, v| [k.is_a?(String) ? k.to_sym : k, deep_symbolize_keys(v)] }]
20
+ else
21
+ arg
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
28
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,8 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
1
6
  module TwitterCldr
2
- VERSION = "1.0.1"
7
+ VERSION = "1.1.0"
3
8
  end