twitter_cldr 1.6.2 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (733) hide show
  1. data/Gemfile +3 -1
  2. data/History.txt +8 -0
  3. data/README.md +64 -14
  4. data/Rakefile +57 -7
  5. data/js/lib/compiler.rb +3 -1
  6. data/js/lib/mustache/bundle.coffee +5 -5
  7. data/js/lib/mustache/numbers/numbers.coffee +179 -0
  8. data/js/lib/mustache/shared/currencies.coffee +27 -0
  9. data/js/lib/renderers/numbers/numbers_renderer.rb +31 -0
  10. data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +5 -1
  11. data/js/lib/renderers/shared/currencies_renderer.rb +23 -0
  12. data/js/lib/twitter_cldr_js.rb +2 -0
  13. data/js/spec/js/calendars/timespan_spec.js +2 -2
  14. data/js/spec/js/numbers/currency_spec.js +34 -0
  15. data/js/spec/js/numbers/decimal_spec.js +24 -0
  16. data/js/spec/js/numbers/helpers/fraction_spec.js +23 -0
  17. data/js/spec/js/numbers/helpers/integer_spec.js +100 -0
  18. data/js/spec/js/numbers/number_spec.js +70 -0
  19. data/js/spec/js/numbers/percent_spec.js +22 -0
  20. data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +4 -0
  21. data/lib/twitter_cldr/collation/collator.rb +14 -20
  22. data/lib/twitter_cldr/collation/trie_builder.rb +3 -3
  23. data/lib/twitter_cldr/formatters/numbers/decimal_formatter.rb +0 -6
  24. data/lib/twitter_cldr/formatters/numbers/helpers/base.rb +1 -1
  25. data/lib/twitter_cldr/formatters/numbers/number_formatter.rb +6 -2
  26. data/lib/twitter_cldr/normalization/base.rb +7 -1
  27. data/lib/twitter_cldr/normalization/nfd.rb +2 -6
  28. data/lib/twitter_cldr/normalization/nfkc.rb +4 -10
  29. data/lib/twitter_cldr/normalization/nfkd.rb +12 -37
  30. data/lib/twitter_cldr/resources.rb +10 -3
  31. data/lib/twitter_cldr/resources/canonical_compositions_updater.rb +51 -0
  32. data/lib/twitter_cldr/resources/composition_exclusions_importer.rb +62 -0
  33. data/lib/twitter_cldr/resources/custom_locales_resources_importer.rb +80 -0
  34. data/lib/twitter_cldr/resources/download.rb +41 -0
  35. data/lib/twitter_cldr/resources/loader.rb +1 -1
  36. data/lib/twitter_cldr/resources/locales_resources_importer.rb +105 -0
  37. data/lib/twitter_cldr/resources/phone_codes_importer.rb +48 -0
  38. data/lib/twitter_cldr/resources/postal_codes_importer.rb +44 -0
  39. data/lib/twitter_cldr/resources/tailoring_importer.rb +18 -16
  40. data/lib/twitter_cldr/resources/unicode_data_importer.rb +90 -0
  41. data/lib/twitter_cldr/shared.rb +3 -1
  42. data/lib/twitter_cldr/shared/code_point.rb +47 -36
  43. data/lib/twitter_cldr/shared/currencies.rb +12 -15
  44. data/lib/twitter_cldr/shared/phone_codes.rb +30 -0
  45. data/lib/twitter_cldr/shared/postal_codes.rb +35 -0
  46. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +11 -3
  47. data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +8 -1
  48. data/lib/twitter_cldr/utils/code_points.rb +2 -2
  49. data/lib/twitter_cldr/version.rb +1 -1
  50. data/resources/collation/tailoring/af.yml +3 -3
  51. data/resources/collation/tailoring/ar.yml +3 -3
  52. data/resources/collation/tailoring/ca.yml +3 -3
  53. data/resources/collation/tailoring/cs.yml +3 -3
  54. data/resources/collation/tailoring/da.yml +4 -4
  55. data/resources/collation/tailoring/de.yml +3 -3
  56. data/resources/collation/tailoring/el.yml +3 -3
  57. data/resources/collation/tailoring/en.yml +3 -3
  58. data/resources/collation/tailoring/es.yml +3 -3
  59. data/resources/collation/tailoring/eu.yml +3 -3
  60. data/resources/collation/tailoring/fa.yml +3 -3
  61. data/resources/collation/tailoring/fi.yml +3 -3
  62. data/resources/collation/tailoring/fil.yml +3 -3
  63. data/resources/collation/tailoring/fr.yml +3 -3
  64. data/resources/collation/tailoring/he.yml +3 -3
  65. data/resources/collation/tailoring/hi.yml +3 -3
  66. data/resources/collation/tailoring/hu.yml +3 -3
  67. data/resources/collation/tailoring/id.yml +3 -3
  68. data/resources/collation/tailoring/it.yml +3 -3
  69. data/resources/collation/tailoring/ja.yml +3 -3
  70. data/resources/collation/tailoring/ko.yml +3 -3
  71. data/resources/collation/tailoring/ms.yml +3 -3
  72. data/resources/collation/tailoring/nb.yml +3 -3
  73. data/resources/collation/tailoring/nl.yml +3 -3
  74. data/resources/collation/tailoring/pl.yml +3 -3
  75. data/resources/collation/tailoring/pt.yml +3 -3
  76. data/resources/collation/tailoring/ru.yml +3 -3
  77. data/resources/collation/tailoring/sv.yml +3 -3
  78. data/resources/collation/tailoring/th.yml +3 -3
  79. data/resources/collation/tailoring/tr.yml +3 -3
  80. data/resources/collation/tailoring/uk.yml +3 -3
  81. data/resources/collation/tailoring/ur.yml +3 -3
  82. data/resources/collation/tailoring/zh-Hant.yml +3 -3
  83. data/resources/collation/tailoring/zh.yml +3 -3
  84. data/resources/custom/locales/af/units.yml +19 -19
  85. data/resources/custom/locales/ar/units.yml +35 -35
  86. data/resources/custom/locales/ca/units.yml +19 -19
  87. data/resources/custom/locales/cs/units.yml +23 -23
  88. data/resources/custom/locales/da/units.yml +19 -19
  89. data/resources/custom/locales/de/units.yml +19 -19
  90. data/resources/custom/locales/el/units.yml +19 -19
  91. data/resources/custom/locales/en/units.yml +10 -10
  92. data/resources/custom/locales/es/units.yml +19 -19
  93. data/resources/custom/locales/eu/units.yml +19 -19
  94. data/resources/custom/locales/fa/units.yml +15 -15
  95. data/resources/custom/locales/fi/units.yml +19 -19
  96. data/resources/custom/locales/fil/units.yml +19 -19
  97. data/resources/custom/locales/fr/units.yml +19 -19
  98. data/resources/custom/locales/he/units.yml +19 -19
  99. data/resources/custom/locales/hi/units.yml +19 -19
  100. data/resources/custom/locales/hu/units.yml +15 -15
  101. data/resources/custom/locales/id/units.yml +15 -15
  102. data/resources/custom/locales/it/units.yml +19 -19
  103. data/resources/custom/locales/ja/units.yml +15 -15
  104. data/resources/custom/locales/ko/units.yml +15 -15
  105. data/resources/custom/locales/ms/units.yml +15 -15
  106. data/resources/custom/locales/nb/units.yml +19 -19
  107. data/resources/custom/locales/nl/units.yml +19 -19
  108. data/resources/custom/locales/pl/units.yml +27 -23
  109. data/resources/custom/locales/pt/units.yml +19 -19
  110. data/resources/custom/locales/ru/units.yml +27 -27
  111. data/resources/custom/locales/sv/units.yml +19 -19
  112. data/resources/custom/locales/th/units.yml +15 -15
  113. data/resources/custom/locales/tr/units.yml +15 -15
  114. data/resources/custom/locales/uk/units.yml +27 -27
  115. data/resources/custom/locales/ur/units.yml +19 -19
  116. data/resources/custom/locales/zh-Hant/units.yml +15 -15
  117. data/resources/custom/locales/zh/units.yml +15 -15
  118. data/resources/locales/af/calendars.yml +114 -113
  119. data/resources/locales/af/languages.yml +174 -173
  120. data/resources/locales/af/numbers.yml +43 -42
  121. data/resources/locales/af/plurals.yml +3 -2
  122. data/resources/locales/af/units.yml +136 -135
  123. data/resources/locales/ar/calendars.yml +121 -120
  124. data/resources/locales/ar/languages.yml +501 -500
  125. data/resources/locales/ar/numbers.yml +36 -35
  126. data/resources/locales/ar/plurals.yml +9 -1
  127. data/resources/locales/ar/units.yml +220 -219
  128. data/resources/locales/ca/calendars.yml +157 -156
  129. data/resources/locales/ca/languages.yml +511 -510
  130. data/resources/locales/ca/numbers.yml +44 -43
  131. data/resources/locales/ca/plurals.yml +3 -2
  132. data/resources/locales/ca/units.yml +136 -135
  133. data/resources/locales/cs/calendars.yml +153 -152
  134. data/resources/locales/cs/languages.yml +472 -471
  135. data/resources/locales/cs/numbers.yml +45 -44
  136. data/resources/locales/cs/plurals.yml +3 -2
  137. data/resources/locales/cs/units.yml +164 -163
  138. data/resources/locales/da/calendars.yml +117 -116
  139. data/resources/locales/da/languages.yml +515 -514
  140. data/resources/locales/da/numbers.yml +44 -43
  141. data/resources/locales/da/plurals.yml +3 -1
  142. data/resources/locales/da/units.yml +122 -121
  143. data/resources/locales/de/calendars.yml +136 -135
  144. data/resources/locales/de/languages.yml +514 -513
  145. data/resources/locales/de/numbers.yml +44 -43
  146. data/resources/locales/de/plurals.yml +3 -1
  147. data/resources/locales/de/units.yml +136 -135
  148. data/resources/locales/el/calendars.yml +138 -137
  149. data/resources/locales/el/languages.yml +520 -519
  150. data/resources/locales/el/numbers.yml +43 -42
  151. data/resources/locales/el/plurals.yml +3 -2
  152. data/resources/locales/el/units.yml +143 -142
  153. data/resources/locales/en/calendars.yml +117 -116
  154. data/resources/locales/en/languages.yml +559 -558
  155. data/resources/locales/en/numbers.yml +32 -31
  156. data/resources/locales/en/plurals.yml +3 -1
  157. data/resources/locales/en/units.yml +108 -107
  158. data/resources/locales/es/calendars.yml +118 -117
  159. data/resources/locales/es/languages.yml +511 -510
  160. data/resources/locales/es/numbers.yml +42 -41
  161. data/resources/locales/es/plurals.yml +3 -1
  162. data/resources/locales/es/units.yml +136 -135
  163. data/resources/locales/eu/calendars.yml +124 -123
  164. data/resources/locales/eu/languages.yml +162 -161
  165. data/resources/locales/eu/numbers.yml +44 -43
  166. data/resources/locales/eu/plurals.yml +3 -2
  167. data/resources/locales/eu/units.yml +129 -128
  168. data/resources/locales/fa/calendars.yml +137 -136
  169. data/resources/locales/fa/languages.yml +489 -488
  170. data/resources/locales/fa/numbers.yml +31 -30
  171. data/resources/locales/fa/plurals.yml +3 -1
  172. data/resources/locales/fa/units.yml +122 -121
  173. data/resources/locales/fi/calendars.yml +153 -152
  174. data/resources/locales/fi/languages.yml +520 -519
  175. data/resources/locales/fi/numbers.yml +44 -43
  176. data/resources/locales/fi/plurals.yml +3 -1
  177. data/resources/locales/fi/units.yml +136 -135
  178. data/resources/locales/fil/calendars.yml +123 -122
  179. data/resources/locales/fil/languages.yml +169 -168
  180. data/resources/locales/fil/numbers.yml +32 -31
  181. data/resources/locales/fil/plurals.yml +3 -1
  182. data/resources/locales/fil/units.yml +122 -121
  183. data/resources/locales/fr/calendars.yml +144 -143
  184. data/resources/locales/fr/languages.yml +512 -511
  185. data/resources/locales/fr/numbers.yml +44 -43
  186. data/resources/locales/fr/plurals.yml +3 -1
  187. data/resources/locales/fr/units.yml +136 -135
  188. data/resources/locales/he/calendars.yml +120 -119
  189. data/resources/locales/he/languages.yml +282 -281
  190. data/resources/locales/he/numbers.yml +32 -31
  191. data/resources/locales/he/plurals.yml +3 -1
  192. data/resources/locales/he/units.yml +122 -121
  193. data/resources/locales/hi/calendars.yml +112 -111
  194. data/resources/locales/hi/languages.yml +511 -510
  195. data/resources/locales/hi/numbers.yml +32 -31
  196. data/resources/locales/hi/plurals.yml +3 -1
  197. data/resources/locales/hi/units.yml +122 -121
  198. data/resources/locales/hu/calendars.yml +142 -141
  199. data/resources/locales/hu/languages.yml +520 -519
  200. data/resources/locales/hu/numbers.yml +43 -42
  201. data/resources/locales/hu/plurals.yml +3 -1
  202. data/resources/locales/hu/units.yml +108 -107
  203. data/resources/locales/id/calendars.yml +117 -116
  204. data/resources/locales/id/languages.yml +514 -513
  205. data/resources/locales/id/numbers.yml +43 -42
  206. data/resources/locales/id/plurals.yml +3 -1
  207. data/resources/locales/id/units.yml +108 -107
  208. data/resources/locales/it/calendars.yml +123 -122
  209. data/resources/locales/it/languages.yml +504 -503
  210. data/resources/locales/it/numbers.yml +44 -43
  211. data/resources/locales/it/plurals.yml +3 -1
  212. data/resources/locales/it/units.yml +122 -121
  213. data/resources/locales/ja/calendars.yml +109 -108
  214. data/resources/locales/ja/languages.yml +516 -515
  215. data/resources/locales/ja/numbers.yml +35 -34
  216. data/resources/locales/ja/plurals.yml +3 -1
  217. data/resources/locales/ja/units.yml +108 -107
  218. data/resources/locales/ko/calendars.yml +112 -111
  219. data/resources/locales/ko/languages.yml +509 -508
  220. data/resources/locales/ko/numbers.yml +32 -31
  221. data/resources/locales/ko/plurals.yml +3 -1
  222. data/resources/locales/ko/units.yml +108 -107
  223. data/resources/locales/ms/calendars.yml +134 -133
  224. data/resources/locales/ms/languages.yml +158 -157
  225. data/resources/locales/ms/numbers.yml +31 -30
  226. data/resources/locales/ms/plurals.yml +3 -1
  227. data/resources/locales/ms/units.yml +136 -135
  228. data/resources/locales/nb/calendars.yml +143 -142
  229. data/resources/locales/nb/languages.yml +530 -529
  230. data/resources/locales/nb/numbers.yml +44 -43
  231. data/resources/locales/nb/plurals.yml +3 -2
  232. data/resources/locales/nb/units.yml +130 -129
  233. data/resources/locales/nl/calendars.yml +124 -123
  234. data/resources/locales/nl/languages.yml +516 -515
  235. data/resources/locales/nl/numbers.yml +44 -43
  236. data/resources/locales/nl/plurals.yml +3 -1
  237. data/resources/locales/nl/units.yml +122 -121
  238. data/resources/locales/pl/calendars.yml +148 -147
  239. data/resources/locales/pl/languages.yml +505 -504
  240. data/resources/locales/pl/numbers.yml +46 -45
  241. data/resources/locales/pl/plurals.yml +5 -1
  242. data/resources/locales/pl/units.yml +205 -204
  243. data/resources/locales/pt/calendars.yml +131 -130
  244. data/resources/locales/pt/languages.yml +517 -516
  245. data/resources/locales/pt/numbers.yml +44 -43
  246. data/resources/locales/pt/plurals.yml +3 -1
  247. data/resources/locales/pt/units.yml +136 -135
  248. data/resources/locales/ru/calendars.yml +143 -142
  249. data/resources/locales/ru/languages.yml +511 -510
  250. data/resources/locales/ru/numbers.yml +43 -42
  251. data/resources/locales/ru/plurals.yml +5 -1
  252. data/resources/locales/ru/units.yml +192 -191
  253. data/resources/locales/sv/calendars.yml +151 -150
  254. data/resources/locales/sv/languages.yml +531 -530
  255. data/resources/locales/sv/numbers.yml +44 -43
  256. data/resources/locales/sv/plurals.yml +3 -1
  257. data/resources/locales/sv/units.yml +136 -135
  258. data/resources/locales/th/calendars.yml +125 -124
  259. data/resources/locales/th/languages.yml +510 -509
  260. data/resources/locales/th/numbers.yml +41 -40
  261. data/resources/locales/th/plurals.yml +3 -1
  262. data/resources/locales/th/units.yml +108 -107
  263. data/resources/locales/tr/calendars.yml +139 -138
  264. data/resources/locales/tr/languages.yml +511 -510
  265. data/resources/locales/tr/numbers.yml +43 -42
  266. data/resources/locales/tr/plurals.yml +3 -1
  267. data/resources/locales/tr/units.yml +108 -107
  268. data/resources/locales/uk/calendars.yml +131 -130
  269. data/resources/locales/uk/languages.yml +520 -519
  270. data/resources/locales/uk/numbers.yml +46 -45
  271. data/resources/locales/uk/plurals.yml +5 -2
  272. data/resources/locales/uk/units.yml +192 -191
  273. data/resources/locales/ur/calendars.yml +111 -110
  274. data/resources/locales/ur/languages.yml +164 -163
  275. data/resources/locales/ur/numbers.yml +32 -31
  276. data/resources/locales/ur/plurals.yml +3 -1
  277. data/resources/locales/ur/units.yml +136 -135
  278. data/resources/locales/zh-Hant/calendars.yml +116 -115
  279. data/resources/locales/zh-Hant/languages.yml +510 -509
  280. data/resources/locales/zh-Hant/numbers.yml +35 -34
  281. data/resources/locales/zh-Hant/plurals.yml +3 -2
  282. data/resources/locales/zh-Hant/units.yml +108 -107
  283. data/resources/locales/zh/calendars.yml +138 -137
  284. data/resources/locales/zh/languages.yml +506 -505
  285. data/resources/locales/zh/numbers.yml +35 -34
  286. data/resources/locales/zh/plurals.yml +3 -1
  287. data/resources/locales/zh/units.yml +80 -79
  288. data/resources/shared/currencies.yml +448 -451
  289. data/resources/shared/phone_codes.yml +241 -0
  290. data/resources/shared/postal_codes.yml +160 -0
  291. data/resources/unicode_data/blocks.yml +221 -221
  292. data/resources/unicode_data/blocks/aegean_numbers.yml +913 -0
  293. data/resources/unicode_data/blocks/alchemical_symbols.yml +1857 -0
  294. data/resources/unicode_data/blocks/alphabetic_presentation_forms.yml +929 -0
  295. data/resources/unicode_data/blocks/ancient_greek_musical_notation.yml +1121 -0
  296. data/resources/unicode_data/blocks/ancient_greek_numbers.yml +1201 -0
  297. data/resources/unicode_data/blocks/ancient_symbols.yml +193 -0
  298. data/resources/unicode_data/blocks/arabic.yml +4049 -0
  299. data/resources/unicode_data/blocks/arabic_extended_a.yml +625 -0
  300. data/resources/unicode_data/blocks/arabic_mathematical_alphabetic_symbols.yml +2289 -0
  301. data/resources/unicode_data/{arabic_presentation_forms_a.yml → blocks/arabic_presentation_forms_a.yml} +7369 -7368
  302. data/resources/unicode_data/{arabic_presentation_forms_b.yml → blocks/arabic_presentation_forms_b.yml} +1414 -1414
  303. data/resources/unicode_data/blocks/arabic_supplement.yml +769 -0
  304. data/resources/unicode_data/blocks/armenian.yml +1393 -0
  305. data/resources/unicode_data/blocks/arrows.yml +1793 -0
  306. data/resources/unicode_data/blocks/avestan.yml +977 -0
  307. data/resources/unicode_data/blocks/balinese.yml +1937 -0
  308. data/resources/unicode_data/blocks/bamum.yml +1409 -0
  309. data/resources/unicode_data/blocks/bamum_supplement.yml +9105 -0
  310. data/resources/unicode_data/blocks/basic_latin.yml +2049 -0
  311. data/resources/unicode_data/blocks/batak.yml +897 -0
  312. data/resources/unicode_data/blocks/bengali.yml +1473 -0
  313. data/resources/unicode_data/blocks/block_elements.yml +513 -0
  314. data/resources/unicode_data/blocks/bopomofo.yml +657 -0
  315. data/resources/unicode_data/blocks/bopomofo_extended.yml +433 -0
  316. data/resources/unicode_data/{box_drawing.yml → blocks/box_drawing.yml} +1537 -1537
  317. data/resources/unicode_data/blocks/brahmi.yml +1729 -0
  318. data/resources/unicode_data/blocks/braille_patterns.yml +4097 -0
  319. data/resources/unicode_data/blocks/buginese.yml +481 -0
  320. data/resources/unicode_data/blocks/buhid.yml +321 -0
  321. data/resources/unicode_data/blocks/byzantine_musical_symbols.yml +3937 -0
  322. data/resources/unicode_data/blocks/carian.yml +785 -0
  323. data/resources/unicode_data/blocks/chakma.yml +1073 -0
  324. data/resources/unicode_data/blocks/cham.yml +1329 -0
  325. data/resources/unicode_data/blocks/cherokee.yml +1361 -0
  326. data/resources/unicode_data/{cjk_compatibility.yml → blocks/cjk_compatibility.yml} +2706 -2706
  327. data/resources/unicode_data/{cjk_compatibility_forms.yml → blocks/cjk_compatibility_forms.yml} +363 -363
  328. data/resources/unicode_data/blocks/cjk_compatibility_ideographs.yml +7553 -0
  329. data/resources/unicode_data/blocks/cjk_compatibility_ideographs_supplement.yml +8673 -0
  330. data/resources/unicode_data/blocks/cjk_radicals_supplement.yml +1841 -0
  331. data/resources/unicode_data/blocks/cjk_strokes.yml +577 -0
  332. data/resources/unicode_data/blocks/cjk_symbols_and_punctuation.yml +1025 -0
  333. data/resources/unicode_data/blocks/cjk_unified_ideographs.yml +33 -0
  334. data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_a.yml +33 -0
  335. data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_b.yml +33 -0
  336. data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_c.yml +33 -0
  337. data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_d.yml +33 -0
  338. data/resources/unicode_data/blocks/combining_diacritical_marks.yml +1793 -0
  339. data/resources/unicode_data/{combining_diacritical_marks_for_symbols.yml → blocks/combining_diacritical_marks_for_symbols.yml} +409 -409
  340. data/resources/unicode_data/blocks/combining_diacritical_marks_supplement.yml +689 -0
  341. data/resources/unicode_data/blocks/combining_half_marks.yml +113 -0
  342. data/resources/unicode_data/blocks/common_indic_number_forms.yml +161 -0
  343. data/resources/unicode_data/blocks/control_pictures.yml +625 -0
  344. data/resources/unicode_data/blocks/coptic.yml +1969 -0
  345. data/resources/unicode_data/blocks/counting_rod_numerals.yml +289 -0
  346. data/resources/unicode_data/blocks/cuneiform.yml +14065 -0
  347. data/resources/unicode_data/blocks/cuneiform_numbers_and_punctuation.yml +1649 -0
  348. data/resources/unicode_data/blocks/currency_symbols.yml +417 -0
  349. data/resources/unicode_data/blocks/cypriot_syllabary.yml +881 -0
  350. data/resources/unicode_data/{cyrillic.yml → blocks/cyrillic.yml} +2765 -2765
  351. data/resources/unicode_data/blocks/cyrillic_extended_a.yml +513 -0
  352. data/resources/unicode_data/blocks/cyrillic_extended_b.yml +1425 -0
  353. data/resources/unicode_data/blocks/cyrillic_supplement.yml +641 -0
  354. data/resources/unicode_data/blocks/deseret.yml +1281 -0
  355. data/resources/unicode_data/blocks/devanagari.yml +2033 -0
  356. data/resources/unicode_data/blocks/devanagari_extended.yml +449 -0
  357. data/resources/unicode_data/blocks/dingbats.yml +3057 -0
  358. data/resources/unicode_data/blocks/domino_tiles.yml +1601 -0
  359. data/resources/unicode_data/blocks/egyptian_hieroglyphs.yml +17137 -0
  360. data/resources/unicode_data/blocks/emoticons.yml +1217 -0
  361. data/resources/unicode_data/blocks/enclosed_alphanumeric_supplement.yml +2737 -0
  362. data/resources/unicode_data/blocks/enclosed_alphanumerics.yml +2561 -0
  363. data/resources/unicode_data/{enclosed_cjk_letters_and_months.yml → blocks/enclosed_cjk_letters_and_months.yml} +3067 -3067
  364. data/resources/unicode_data/{enclosed_ideographic_supplement.yml → blocks/enclosed_ideographic_supplement.yml} +685 -685
  365. data/resources/unicode_data/blocks/ethiopic.yml +5729 -0
  366. data/resources/unicode_data/blocks/ethiopic_extended.yml +1265 -0
  367. data/resources/unicode_data/blocks/ethiopic_extended_a.yml +513 -0
  368. data/resources/unicode_data/blocks/ethiopic_supplement.yml +417 -0
  369. data/resources/unicode_data/blocks/general_punctuation.yml +1713 -0
  370. data/resources/unicode_data/blocks/geometric_shapes.yml +1537 -0
  371. data/resources/unicode_data/blocks/georgian.yml +1409 -0
  372. data/resources/unicode_data/blocks/georgian_supplement.yml +641 -0
  373. data/resources/unicode_data/blocks/glagolitic.yml +1505 -0
  374. data/resources/unicode_data/blocks/gothic.yml +433 -0
  375. data/resources/unicode_data/{greek_and_coptic.yml → blocks/greek_and_coptic.yml} +1360 -1360
  376. data/resources/unicode_data/{greek_extended.yml → blocks/greek_extended.yml} +2330 -2330
  377. data/resources/unicode_data/blocks/gujarati.yml +1345 -0
  378. data/resources/unicode_data/blocks/gurmukhi.yml +1265 -0
  379. data/resources/unicode_data/{halfwidth_and_fullwidth_forms.yml → blocks/halfwidth_and_fullwidth_forms.yml} +2517 -2517
  380. data/resources/unicode_data/{hangul_compatibility_jamo.yml → blocks/hangul_compatibility_jamo.yml} +993 -993
  381. data/resources/unicode_data/blocks/hangul_jamo.yml +4097 -0
  382. data/resources/unicode_data/blocks/hangul_jamo_extended_a.yml +465 -0
  383. data/resources/unicode_data/blocks/hangul_jamo_extended_b.yml +1153 -0
  384. data/resources/unicode_data/blocks/hangul_syllables.yml +33 -0
  385. data/resources/unicode_data/blocks/hanunoo.yml +369 -0
  386. data/resources/unicode_data/blocks/hebrew.yml +1393 -0
  387. data/resources/unicode_data/blocks/high_private_use_surrogates.yml +33 -0
  388. data/resources/unicode_data/blocks/high_surrogates.yml +33 -0
  389. data/resources/unicode_data/blocks/hiragana.yml +1489 -0
  390. data/resources/unicode_data/blocks/ideographic_description_characters.yml +193 -0
  391. data/resources/unicode_data/blocks/imperial_aramaic.yml +497 -0
  392. data/resources/unicode_data/blocks/inscriptional_pahlavi.yml +433 -0
  393. data/resources/unicode_data/blocks/inscriptional_parthian.yml +481 -0
  394. data/resources/unicode_data/{ipa_extensions.yml → blocks/ipa_extensions.yml} +1050 -1050
  395. data/resources/unicode_data/blocks/javanese.yml +1457 -0
  396. data/resources/unicode_data/blocks/kaithi.yml +1057 -0
  397. data/resources/unicode_data/blocks/kana_supplement.yml +33 -0
  398. data/resources/unicode_data/{kanbun.yml → blocks/kanbun.yml} +167 -167
  399. data/resources/unicode_data/blocks/kangxi_radicals.yml +3425 -0
  400. data/resources/unicode_data/blocks/kannada.yml +1377 -0
  401. data/resources/unicode_data/blocks/katakana.yml +1537 -0
  402. data/resources/unicode_data/blocks/katakana_phonetic_extensions.yml +257 -0
  403. data/resources/unicode_data/blocks/kayah_li.yml +769 -0
  404. data/resources/unicode_data/blocks/kharoshthi.yml +1041 -0
  405. data/resources/unicode_data/blocks/khmer.yml +1825 -0
  406. data/resources/unicode_data/blocks/khmer_symbols.yml +513 -0
  407. data/resources/unicode_data/blocks/lao.yml +1073 -0
  408. data/resources/unicode_data/{latin_1_supplement.yml → blocks/latin_1_supplement.yml} +1319 -1319
  409. data/resources/unicode_data/{latin_extended_a.yml → blocks/latin_extended_a.yml} +1210 -1210
  410. data/resources/unicode_data/{latin_extended_additional.yml → blocks/latin_extended_additional.yml} +2460 -2460
  411. data/resources/unicode_data/{latin_extended_b.yml → blocks/latin_extended_b.yml} +2096 -2096
  412. data/resources/unicode_data/blocks/latin_extended_c.yml +513 -0
  413. data/resources/unicode_data/blocks/latin_extended_d.yml +2145 -0
  414. data/resources/unicode_data/blocks/lepcha.yml +1185 -0
  415. data/resources/unicode_data/blocks/letterlike_symbols.yml +1281 -0
  416. data/resources/unicode_data/blocks/limbu.yml +1057 -0
  417. data/resources/unicode_data/blocks/linear_b_ideograms.yml +1969 -0
  418. data/resources/unicode_data/blocks/linear_b_syllabary.yml +1409 -0
  419. data/resources/unicode_data/blocks/lisu.yml +769 -0
  420. data/resources/unicode_data/blocks/low_surrogates.yml +33 -0
  421. data/resources/unicode_data/blocks/lycian.yml +465 -0
  422. data/resources/unicode_data/blocks/lydian.yml +433 -0
  423. data/resources/unicode_data/blocks/mahjong_tiles.yml +705 -0
  424. data/resources/unicode_data/blocks/malayalam.yml +1569 -0
  425. data/resources/unicode_data/blocks/mandaic.yml +465 -0
  426. data/resources/unicode_data/{mathematical_alphanumeric_symbols.yml → blocks/mathematical_alphanumeric_symbols.yml} +11953 -11953
  427. data/resources/unicode_data/blocks/mathematical_operators.yml +4097 -0
  428. data/resources/unicode_data/blocks/meetei_mayek.yml +897 -0
  429. data/resources/unicode_data/blocks/meetei_mayek_extensions.yml +369 -0
  430. data/resources/unicode_data/blocks/meroitic_cursive.yml +417 -0
  431. data/resources/unicode_data/blocks/meroitic_hieroglyphs.yml +513 -0
  432. data/resources/unicode_data/blocks/miao.yml +2129 -0
  433. data/resources/unicode_data/blocks/miscellaneous_mathematical_symbols_a.yml +769 -0
  434. data/resources/unicode_data/blocks/miscellaneous_mathematical_symbols_b.yml +2049 -0
  435. data/resources/unicode_data/blocks/miscellaneous_symbols.yml +4097 -0
  436. data/resources/unicode_data/blocks/miscellaneous_symbols_and_arrows.yml +1393 -0
  437. data/resources/unicode_data/blocks/miscellaneous_symbols_and_pictographs.yml +8529 -0
  438. data/resources/unicode_data/blocks/miscellaneous_technical.yml +3905 -0
  439. data/resources/unicode_data/blocks/modifier_tone_letters.yml +513 -0
  440. data/resources/unicode_data/blocks/mongolian.yml +2497 -0
  441. data/resources/unicode_data/blocks/musical_symbols.yml +3521 -0
  442. data/resources/unicode_data/blocks/myanmar.yml +2561 -0
  443. data/resources/unicode_data/blocks/myanmar_extended_a.yml +449 -0
  444. data/resources/unicode_data/blocks/new_tai_lue.yml +1329 -0
  445. data/resources/unicode_data/blocks/nko.yml +945 -0
  446. data/resources/unicode_data/blocks/number_forms.yml +929 -0
  447. data/resources/unicode_data/blocks/ogham.yml +465 -0
  448. data/resources/unicode_data/blocks/ol_chiki.yml +769 -0
  449. data/resources/unicode_data/blocks/old_italic.yml +561 -0
  450. data/resources/unicode_data/blocks/old_persian.yml +801 -0
  451. data/resources/unicode_data/blocks/old_south_arabian.yml +513 -0
  452. data/resources/unicode_data/blocks/old_turkic.yml +1169 -0
  453. data/resources/unicode_data/blocks/optical_character_recognition.yml +177 -0
  454. data/resources/unicode_data/blocks/oriya.yml +1441 -0
  455. data/resources/unicode_data/blocks/osmanya.yml +641 -0
  456. data/resources/unicode_data/blocks/phags_pa.yml +897 -0
  457. data/resources/unicode_data/blocks/phaistos_disc.yml +737 -0
  458. data/resources/unicode_data/blocks/phoenician.yml +465 -0
  459. data/resources/unicode_data/blocks/phonetic_extensions.yml +2049 -0
  460. data/resources/unicode_data/blocks/phonetic_extensions_supplement.yml +1025 -0
  461. data/resources/unicode_data/blocks/playing_cards.yml +945 -0
  462. data/resources/unicode_data/blocks/private_use_area.yml +33 -0
  463. data/resources/unicode_data/blocks/rejang.yml +593 -0
  464. data/resources/unicode_data/blocks/rumi_numeral_symbols.yml +497 -0
  465. data/resources/unicode_data/blocks/runic.yml +1297 -0
  466. data/resources/unicode_data/blocks/samaritan.yml +977 -0
  467. data/resources/unicode_data/blocks/saurashtra.yml +1297 -0
  468. data/resources/unicode_data/blocks/sharada.yml +1329 -0
  469. data/resources/unicode_data/blocks/shavian.yml +769 -0
  470. data/resources/unicode_data/blocks/sinhala.yml +1281 -0
  471. data/resources/unicode_data/blocks/small_form_variants.yml +417 -0
  472. data/resources/unicode_data/blocks/sora_sompeng.yml +561 -0
  473. data/resources/unicode_data/blocks/spacing_modifier_letters.yml +1281 -0
  474. data/resources/unicode_data/blocks/specials.yml +81 -0
  475. data/resources/unicode_data/blocks/sundanese.yml +1025 -0
  476. data/resources/unicode_data/blocks/sundanese_supplement.yml +129 -0
  477. data/resources/unicode_data/blocks/superscripts_and_subscripts.yml +673 -0
  478. data/resources/unicode_data/blocks/supplemental_arrows_a.yml +257 -0
  479. data/resources/unicode_data/blocks/supplemental_arrows_b.yml +2049 -0
  480. data/resources/unicode_data/blocks/supplemental_mathematical_operators.yml +4097 -0
  481. data/resources/unicode_data/blocks/supplemental_punctuation.yml +961 -0
  482. data/resources/unicode_data/blocks/supplementary_private_use_area_a.yml +33 -0
  483. data/resources/unicode_data/blocks/supplementary_private_use_area_b.yml +33 -0
  484. data/resources/unicode_data/blocks/syloti_nagri.yml +705 -0
  485. data/resources/unicode_data/blocks/syriac.yml +1233 -0
  486. data/resources/unicode_data/blocks/tagalog.yml +321 -0
  487. data/resources/unicode_data/blocks/tagbanwa.yml +289 -0
  488. data/resources/unicode_data/blocks/tags.yml +1553 -0
  489. data/resources/unicode_data/blocks/tai_le.yml +561 -0
  490. data/resources/unicode_data/blocks/tai_tham.yml +2033 -0
  491. data/resources/unicode_data/blocks/tai_viet.yml +1153 -0
  492. data/resources/unicode_data/blocks/tai_xuan_jing_symbols.yml +1393 -0
  493. data/resources/unicode_data/blocks/takri.yml +1057 -0
  494. data/resources/unicode_data/blocks/tamil.yml +1153 -0
  495. data/resources/unicode_data/blocks/telugu.yml +1489 -0
  496. data/resources/unicode_data/blocks/thaana.yml +801 -0
  497. data/resources/unicode_data/blocks/thai.yml +1393 -0
  498. data/resources/unicode_data/blocks/tibetan.yml +3377 -0
  499. data/resources/unicode_data/blocks/tifinagh.yml +945 -0
  500. data/resources/unicode_data/blocks/transport_and_map_symbols.yml +1121 -0
  501. data/resources/unicode_data/blocks/ugaritic.yml +497 -0
  502. data/resources/unicode_data/blocks/unified_canadian_aboriginal_syllabics.yml +10241 -0
  503. data/resources/unicode_data/blocks/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
  504. data/resources/unicode_data/blocks/vai.yml +4801 -0
  505. data/resources/unicode_data/blocks/variation_selectors.yml +257 -0
  506. data/resources/unicode_data/blocks/variation_selectors_supplement.yml +3841 -0
  507. data/resources/unicode_data/blocks/vedic_extensions.yml +625 -0
  508. data/resources/unicode_data/{vertical_forms.yml → blocks/vertical_forms.yml} +121 -121
  509. data/resources/unicode_data/blocks/yi_radicals.yml +881 -0
  510. data/resources/unicode_data/blocks/yi_syllables.yml +18641 -0
  511. data/resources/unicode_data/blocks/yijing_hexagram_symbols.yml +1025 -0
  512. data/resources/unicode_data/canonical_compositions.yml +4925 -0
  513. data/resources/unicode_data/composition_exclusions.yml +78 -74
  514. data/resources/unicode_data/hangul_blocks.yml +9 -9
  515. data/spec/collation/collation_spec.rb +6 -6
  516. data/spec/collation/collator_spec.rb +18 -19
  517. data/spec/collation/trie_builder_spec.rb +6 -9
  518. data/spec/core_ext/array_spec.rb +1 -1
  519. data/spec/core_ext/string_spec.rb +1 -1
  520. data/spec/formatters/numbers/currency_formatter_spec.rb +5 -0
  521. data/spec/formatters/numbers/decimal_formatter_spec.rb +4 -0
  522. data/spec/formatters/numbers/number_formatter_spec.rb +4 -4
  523. data/spec/formatters/numbers/percent_formatter_spec.rb +8 -0
  524. data/spec/normalization/base_spec.rb +2 -2
  525. data/spec/normalization/normalization_spec.rb +7 -3
  526. data/spec/readme_spec.rb +9 -9
  527. data/spec/resources/loader_spec.rb +4 -4
  528. data/spec/shared/code_point_spec.rb +102 -62
  529. data/spec/shared/currencies_spec.rb +17 -19
  530. data/spec/shared/phone_codes_spec.rb +49 -0
  531. data/spec/shared/postal_codes_spec.rb +68 -0
  532. data/spec/utils/code_points_spec.rb +6 -6
  533. metadata +264 -224
  534. data/resources/unicode_data/aegean_numbers.yml +0 -913
  535. data/resources/unicode_data/alchemical_symbols.yml +0 -1857
  536. data/resources/unicode_data/alphabetic_presentation_forms.yml +0 -929
  537. data/resources/unicode_data/ancient_greek_musical_notation.yml +0 -1121
  538. data/resources/unicode_data/ancient_greek_numbers.yml +0 -1201
  539. data/resources/unicode_data/ancient_symbols.yml +0 -193
  540. data/resources/unicode_data/arabic.yml +0 -4049
  541. data/resources/unicode_data/arabic_extended_a.yml +0 -625
  542. data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +0 -2289
  543. data/resources/unicode_data/arabic_supplement.yml +0 -769
  544. data/resources/unicode_data/armenian.yml +0 -1393
  545. data/resources/unicode_data/arrows.yml +0 -1793
  546. data/resources/unicode_data/avestan.yml +0 -977
  547. data/resources/unicode_data/balinese.yml +0 -1937
  548. data/resources/unicode_data/bamum.yml +0 -1409
  549. data/resources/unicode_data/bamum_supplement.yml +0 -9105
  550. data/resources/unicode_data/basic_latin.yml +0 -2049
  551. data/resources/unicode_data/batak.yml +0 -897
  552. data/resources/unicode_data/bengali.yml +0 -1473
  553. data/resources/unicode_data/block_elements.yml +0 -513
  554. data/resources/unicode_data/bopomofo.yml +0 -657
  555. data/resources/unicode_data/bopomofo_extended.yml +0 -433
  556. data/resources/unicode_data/brahmi.yml +0 -1729
  557. data/resources/unicode_data/braille_patterns.yml +0 -4097
  558. data/resources/unicode_data/buginese.yml +0 -481
  559. data/resources/unicode_data/buhid.yml +0 -321
  560. data/resources/unicode_data/byzantine_musical_symbols.yml +0 -3937
  561. data/resources/unicode_data/carian.yml +0 -785
  562. data/resources/unicode_data/chakma.yml +0 -1073
  563. data/resources/unicode_data/cham.yml +0 -1329
  564. data/resources/unicode_data/cherokee.yml +0 -1361
  565. data/resources/unicode_data/cjk_compatibility_ideographs.yml +0 -7553
  566. data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +0 -8673
  567. data/resources/unicode_data/cjk_radicals_supplement.yml +0 -1841
  568. data/resources/unicode_data/cjk_strokes.yml +0 -577
  569. data/resources/unicode_data/cjk_symbols_and_punctuation.yml +0 -1025
  570. data/resources/unicode_data/cjk_unified_ideographs.yml +0 -33
  571. data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +0 -33
  572. data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +0 -33
  573. data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +0 -33
  574. data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +0 -33
  575. data/resources/unicode_data/combining_diacritical_marks.yml +0 -1793
  576. data/resources/unicode_data/combining_diacritical_marks_supplement.yml +0 -689
  577. data/resources/unicode_data/combining_half_marks.yml +0 -113
  578. data/resources/unicode_data/common_indic_number_forms.yml +0 -161
  579. data/resources/unicode_data/control_pictures.yml +0 -625
  580. data/resources/unicode_data/coptic.yml +0 -1969
  581. data/resources/unicode_data/counting_rod_numerals.yml +0 -289
  582. data/resources/unicode_data/cuneiform.yml +0 -14065
  583. data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +0 -1649
  584. data/resources/unicode_data/currency_symbols.yml +0 -417
  585. data/resources/unicode_data/cypriot_syllabary.yml +0 -881
  586. data/resources/unicode_data/cyrillic_extended_a.yml +0 -513
  587. data/resources/unicode_data/cyrillic_extended_b.yml +0 -1425
  588. data/resources/unicode_data/cyrillic_supplement.yml +0 -641
  589. data/resources/unicode_data/decomposition_map.yml +0 -4565
  590. data/resources/unicode_data/deseret.yml +0 -1281
  591. data/resources/unicode_data/devanagari.yml +0 -2033
  592. data/resources/unicode_data/devanagari_extended.yml +0 -449
  593. data/resources/unicode_data/dingbats.yml +0 -3057
  594. data/resources/unicode_data/domino_tiles.yml +0 -1601
  595. data/resources/unicode_data/egyptian_hieroglyphs.yml +0 -17137
  596. data/resources/unicode_data/emoticons.yml +0 -1217
  597. data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +0 -2737
  598. data/resources/unicode_data/enclosed_alphanumerics.yml +0 -2561
  599. data/resources/unicode_data/ethiopic.yml +0 -5729
  600. data/resources/unicode_data/ethiopic_extended.yml +0 -1265
  601. data/resources/unicode_data/ethiopic_extended_a.yml +0 -513
  602. data/resources/unicode_data/ethiopic_supplement.yml +0 -417
  603. data/resources/unicode_data/general_punctuation.yml +0 -1713
  604. data/resources/unicode_data/geometric_shapes.yml +0 -1537
  605. data/resources/unicode_data/georgian.yml +0 -1409
  606. data/resources/unicode_data/georgian_supplement.yml +0 -641
  607. data/resources/unicode_data/glagolitic.yml +0 -1505
  608. data/resources/unicode_data/gothic.yml +0 -433
  609. data/resources/unicode_data/gujarati.yml +0 -1345
  610. data/resources/unicode_data/gurmukhi.yml +0 -1265
  611. data/resources/unicode_data/hangul_jamo.yml +0 -4097
  612. data/resources/unicode_data/hangul_jamo_extended_a.yml +0 -465
  613. data/resources/unicode_data/hangul_jamo_extended_b.yml +0 -1153
  614. data/resources/unicode_data/hangul_syllables.yml +0 -33
  615. data/resources/unicode_data/hanunoo.yml +0 -369
  616. data/resources/unicode_data/hebrew.yml +0 -1393
  617. data/resources/unicode_data/high_private_use_surrogates.yml +0 -33
  618. data/resources/unicode_data/high_surrogates.yml +0 -33
  619. data/resources/unicode_data/hiragana.yml +0 -1489
  620. data/resources/unicode_data/ideographic_description_characters.yml +0 -193
  621. data/resources/unicode_data/imperial_aramaic.yml +0 -497
  622. data/resources/unicode_data/inscriptional_pahlavi.yml +0 -433
  623. data/resources/unicode_data/inscriptional_parthian.yml +0 -481
  624. data/resources/unicode_data/javanese.yml +0 -1457
  625. data/resources/unicode_data/kaithi.yml +0 -1057
  626. data/resources/unicode_data/kana_supplement.yml +0 -33
  627. data/resources/unicode_data/kangxi_radicals.yml +0 -3425
  628. data/resources/unicode_data/kannada.yml +0 -1377
  629. data/resources/unicode_data/katakana.yml +0 -1537
  630. data/resources/unicode_data/katakana_phonetic_extensions.yml +0 -257
  631. data/resources/unicode_data/kayah_li.yml +0 -769
  632. data/resources/unicode_data/kharoshthi.yml +0 -1041
  633. data/resources/unicode_data/khmer.yml +0 -1825
  634. data/resources/unicode_data/khmer_symbols.yml +0 -513
  635. data/resources/unicode_data/lao.yml +0 -1073
  636. data/resources/unicode_data/latin_extended_c.yml +0 -513
  637. data/resources/unicode_data/latin_extended_d.yml +0 -2145
  638. data/resources/unicode_data/lepcha.yml +0 -1185
  639. data/resources/unicode_data/letterlike_symbols.yml +0 -1281
  640. data/resources/unicode_data/limbu.yml +0 -1057
  641. data/resources/unicode_data/linear_b_ideograms.yml +0 -1969
  642. data/resources/unicode_data/linear_b_syllabary.yml +0 -1409
  643. data/resources/unicode_data/lisu.yml +0 -769
  644. data/resources/unicode_data/low_surrogates.yml +0 -33
  645. data/resources/unicode_data/lycian.yml +0 -465
  646. data/resources/unicode_data/lydian.yml +0 -433
  647. data/resources/unicode_data/mahjong_tiles.yml +0 -705
  648. data/resources/unicode_data/malayalam.yml +0 -1569
  649. data/resources/unicode_data/mandaic.yml +0 -465
  650. data/resources/unicode_data/mathematical_operators.yml +0 -4097
  651. data/resources/unicode_data/meetei_mayek.yml +0 -897
  652. data/resources/unicode_data/meetei_mayek_extensions.yml +0 -369
  653. data/resources/unicode_data/meroitic_cursive.yml +0 -417
  654. data/resources/unicode_data/meroitic_hieroglyphs.yml +0 -513
  655. data/resources/unicode_data/miao.yml +0 -2129
  656. data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +0 -769
  657. data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +0 -2049
  658. data/resources/unicode_data/miscellaneous_symbols.yml +0 -4097
  659. data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +0 -1393
  660. data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +0 -8529
  661. data/resources/unicode_data/miscellaneous_technical.yml +0 -3905
  662. data/resources/unicode_data/modifier_tone_letters.yml +0 -513
  663. data/resources/unicode_data/mongolian.yml +0 -2497
  664. data/resources/unicode_data/musical_symbols.yml +0 -3521
  665. data/resources/unicode_data/myanmar.yml +0 -2561
  666. data/resources/unicode_data/myanmar_extended_a.yml +0 -449
  667. data/resources/unicode_data/new_tai_lue.yml +0 -1329
  668. data/resources/unicode_data/nko.yml +0 -945
  669. data/resources/unicode_data/number_forms.yml +0 -929
  670. data/resources/unicode_data/ogham.yml +0 -465
  671. data/resources/unicode_data/ol_chiki.yml +0 -769
  672. data/resources/unicode_data/old_italic.yml +0 -561
  673. data/resources/unicode_data/old_persian.yml +0 -801
  674. data/resources/unicode_data/old_south_arabian.yml +0 -513
  675. data/resources/unicode_data/old_turkic.yml +0 -1169
  676. data/resources/unicode_data/optical_character_recognition.yml +0 -177
  677. data/resources/unicode_data/oriya.yml +0 -1441
  678. data/resources/unicode_data/osmanya.yml +0 -641
  679. data/resources/unicode_data/phags_pa.yml +0 -897
  680. data/resources/unicode_data/phaistos_disc.yml +0 -737
  681. data/resources/unicode_data/phoenician.yml +0 -465
  682. data/resources/unicode_data/phonetic_extensions.yml +0 -2049
  683. data/resources/unicode_data/phonetic_extensions_supplement.yml +0 -1025
  684. data/resources/unicode_data/playing_cards.yml +0 -945
  685. data/resources/unicode_data/private_use_area.yml +0 -33
  686. data/resources/unicode_data/rejang.yml +0 -593
  687. data/resources/unicode_data/rumi_numeral_symbols.yml +0 -497
  688. data/resources/unicode_data/runic.yml +0 -1297
  689. data/resources/unicode_data/samaritan.yml +0 -977
  690. data/resources/unicode_data/saurashtra.yml +0 -1297
  691. data/resources/unicode_data/sharada.yml +0 -1329
  692. data/resources/unicode_data/shavian.yml +0 -769
  693. data/resources/unicode_data/sinhala.yml +0 -1281
  694. data/resources/unicode_data/small_form_variants.yml +0 -417
  695. data/resources/unicode_data/sora_sompeng.yml +0 -561
  696. data/resources/unicode_data/spacing_modifier_letters.yml +0 -1281
  697. data/resources/unicode_data/specials.yml +0 -81
  698. data/resources/unicode_data/sundanese.yml +0 -1025
  699. data/resources/unicode_data/sundanese_supplement.yml +0 -129
  700. data/resources/unicode_data/superscripts_and_subscripts.yml +0 -673
  701. data/resources/unicode_data/supplemental_arrows_a.yml +0 -257
  702. data/resources/unicode_data/supplemental_arrows_b.yml +0 -2049
  703. data/resources/unicode_data/supplemental_mathematical_operators.yml +0 -4097
  704. data/resources/unicode_data/supplemental_punctuation.yml +0 -961
  705. data/resources/unicode_data/supplementary_private_use_area_a.yml +0 -33
  706. data/resources/unicode_data/supplementary_private_use_area_b.yml +0 -33
  707. data/resources/unicode_data/syloti_nagri.yml +0 -705
  708. data/resources/unicode_data/syriac.yml +0 -1233
  709. data/resources/unicode_data/tagalog.yml +0 -321
  710. data/resources/unicode_data/tagbanwa.yml +0 -289
  711. data/resources/unicode_data/tags.yml +0 -1553
  712. data/resources/unicode_data/tai_le.yml +0 -561
  713. data/resources/unicode_data/tai_tham.yml +0 -2033
  714. data/resources/unicode_data/tai_viet.yml +0 -1153
  715. data/resources/unicode_data/tai_xuan_jing_symbols.yml +0 -1393
  716. data/resources/unicode_data/takri.yml +0 -1057
  717. data/resources/unicode_data/tamil.yml +0 -1153
  718. data/resources/unicode_data/telugu.yml +0 -1489
  719. data/resources/unicode_data/thaana.yml +0 -801
  720. data/resources/unicode_data/thai.yml +0 -1393
  721. data/resources/unicode_data/tibetan.yml +0 -3377
  722. data/resources/unicode_data/tifinagh.yml +0 -945
  723. data/resources/unicode_data/transport_and_map_symbols.yml +0 -1121
  724. data/resources/unicode_data/ugaritic.yml +0 -497
  725. data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +0 -10241
  726. data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +0 -1121
  727. data/resources/unicode_data/vai.yml +0 -4801
  728. data/resources/unicode_data/variation_selectors.yml +0 -257
  729. data/resources/unicode_data/variation_selectors_supplement.yml +0 -3841
  730. data/resources/unicode_data/vedic_extensions.yml +0 -625
  731. data/resources/unicode_data/yi_radicals.yml +0 -881
  732. data/resources/unicode_data/yi_syllables.yml +0 -18641
  733. data/resources/unicode_data/yijing_hexagram_symbols.yml +0 -1025
@@ -39,12 +39,12 @@ module TwitterCldr
39
39
  end
40
40
 
41
41
  def parse_code_points(string)
42
- string.split.map { |cp| cp.to_i(16) }
42
+ string.split.map(&:hex)
43
43
  end
44
44
 
45
45
  def parse_collation_element(string)
46
46
  string.scan(/\[.*?\]/).map do |match|
47
- match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
47
+ match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.hex }
48
48
  end
49
49
  end
50
50
 
@@ -71,7 +71,7 @@ module TwitterCldr
71
71
 
72
72
  def parse_suppressed_starters(suppressed_contractions)
73
73
  suppressed_contractions.chars.map do |starter|
74
- TwitterCldr::Utils::CodePoints.from_string(starter).first.to_i(16)
74
+ TwitterCldr::Utils::CodePoints.from_string(starter).first
75
75
  end
76
76
  end
77
77
 
@@ -20,12 +20,6 @@ module TwitterCldr
20
20
  def default_format_options_for(number)
21
21
  { :precision => precision_from(number) }
22
22
  end
23
-
24
- protected
25
-
26
- def get_tokens(obj, options = {})
27
- obj.abs == obj ? @tokenizer.tokens(:sign => :positive) : @tokenizer.tokens(:sign => :negative)
28
- end
29
23
  end
30
24
  end
31
25
  end
@@ -10,7 +10,7 @@ module TwitterCldr
10
10
  def interpolate(string, value, orientation = :right)
11
11
  value = value.to_s
12
12
  length = value.length
13
- start, pad = orientation == :left ? [0, :rjust] : [-length, :ljust]
13
+ start = orientation == :left ? 0 : -length
14
14
 
15
15
  string = string.dup
16
16
  string = string.ljust(length, '#') if string.length < length
@@ -35,8 +35,8 @@ module TwitterCldr
35
35
 
36
36
  def parse_number(number, options = {})
37
37
  precision = options[:precision] || self.precision_from(number)
38
- number = round_to(number, precision)
39
- number.abs.to_s.split(".")
38
+ number = "%.#{precision}f" % round_to(number, precision).abs
39
+ number.split(".")
40
40
  end
41
41
 
42
42
  def round_to(number, precision)
@@ -48,6 +48,10 @@ module TwitterCldr
48
48
  parts = num.to_s.split(".")
49
49
  parts.size == 2 ? parts[1].size : 0
50
50
  end
51
+
52
+ def get_tokens(obj, options = {})
53
+ obj.abs == obj ? @tokenizer.tokens(:sign => :positive) : @tokenizer.tokens(:sign => :negative)
54
+ end
51
55
  end
52
56
  end
53
57
  end
@@ -9,8 +9,14 @@ module TwitterCldr
9
9
 
10
10
  class << self
11
11
 
12
+ def normalize(string)
13
+ code_points = TwitterCldr::Utils::CodePoints.from_string(string)
14
+ normalized_code_points = normalize_code_points(code_points)
15
+ TwitterCldr::Utils::CodePoints.to_string(normalized_code_points)
16
+ end
17
+
12
18
  def combining_class_for(code_point)
13
- TwitterCldr::Shared::CodePoint.for_hex(code_point).combining_class.to_i
19
+ TwitterCldr::Shared::CodePoint.find(code_point).combining_class.to_i
14
20
  rescue NoMethodError
15
21
  0
16
22
  end
@@ -15,12 +15,8 @@ module TwitterCldr
15
15
 
16
16
  protected
17
17
 
18
- # Returns code point's Decomposition Mapping based on its Unicode data. Returns nil if the mapping has
19
- # compatibility type (it contains compatibility formatting tag).
20
- #
21
- def decomposition_mapping(unicode_data)
22
- mapping = parse_decomposition_mapping(unicode_data)
23
- mapping unless compatibility_decomposition?(mapping)
18
+ def decompose?(unicode_data)
19
+ super && !unicode_data.compatibility_decomposition? # skip compatibility decompositions
24
20
  end
25
21
 
26
22
  end
@@ -13,12 +13,6 @@ module TwitterCldr
13
13
 
14
14
  class << self
15
15
 
16
- def normalize(string)
17
- code_points = TwitterCldr::Utils::CodePoints.from_string(string)
18
- normalized_code_points = normalize_code_points(code_points)
19
- TwitterCldr::Utils::CodePoints.to_string(normalized_code_points)
20
- end
21
-
22
16
  def normalize_code_points(code_points)
23
17
  compose(TwitterCldr::Normalization::NFKD.normalize_code_points(code_points))
24
18
  end
@@ -59,7 +53,7 @@ module TwitterCldr
59
53
  end
60
54
 
61
55
  def compose_hangul(code_points)
62
- TwitterCldr::Normalization::Hangul.compose(code_points.map { |cp| cp.hex }).to_s(16).upcase.rjust(4, "0")
56
+ TwitterCldr::Normalization::Hangul.compose(code_points)
63
57
  end
64
58
 
65
59
  # Implements composition of Unicode code points following the guidelines here:
@@ -86,12 +80,12 @@ module TwitterCldr
86
80
 
87
81
  unless blocked
88
82
  # do a reverse-lookup for the decomposed code points
89
- decomp_data = TwitterCldr::Shared::CodePoint.for_decomposition([code_points[starter_index], code_point])
83
+ composite = TwitterCldr::Shared::CodePoint.for_canonical_decomposition([code_points[starter_index], code_point])
90
84
 
91
85
  # check if two code points are canonically equivalent
92
- if decomp_data && !decomp_data.excluded_from_composition?
86
+ if composite && !composite.excluded_from_composition?
93
87
  # combine the characters
94
- code_points[starter_index] = decomp_data.code_point
88
+ code_points[starter_index] = composite.code_point
95
89
  code_points.delete_at(index)
96
90
  index -= 1
97
91
  end
@@ -16,65 +16,48 @@ module TwitterCldr
16
16
  #
17
17
  class NFKD < Base
18
18
 
19
- class << self
20
19
 
21
- def normalize(string)
22
- code_points = TwitterCldr::Utils::CodePoints.from_string(string)
23
- normalized_code_points = normalize_code_points(code_points)
24
- TwitterCldr::Utils::CodePoints.to_string(normalized_code_points)
25
- end
20
+ class << self
26
21
 
27
22
  def normalize_code_points(code_points)
28
- canonical_ordering(decomposition(code_points))
23
+ canonical_ordering(decompose(code_points))
29
24
  end
30
25
 
31
26
  protected
32
27
 
33
- def decomposition(code_points)
28
+ def decompose(code_points)
34
29
  code_points.map { |code_point| decompose_recursively(code_point) }.flatten
35
30
  end
36
31
 
37
32
  # Recursively decomposes a given code point with the values in its Decomposition Mapping property.
38
33
  #
39
34
  def decompose_recursively(code_point)
40
- unicode_data = TwitterCldr::Shared::CodePoint.for_hex(code_point)
35
+ unicode_data = TwitterCldr::Shared::CodePoint.find(code_point)
41
36
  return code_point unless unicode_data
42
37
 
43
38
  if unicode_data.hangul_type == :compositions
44
39
  decompose_hangul(code_point)
45
40
  else
46
- decompose_regular(code_point, decomposition_mapping(unicode_data))
41
+ decompose_regular(unicode_data)
47
42
  end
48
43
  end
49
44
 
50
45
  # Decomposes regular (non-Hangul) code point.
51
46
  #
52
- def decompose_regular(code_point, mapping)
53
- if mapping && !mapping.empty?
54
- mapping.map{ |cp| decompose_recursively(cp) }.flatten
47
+ def decompose_regular(unicode_data)
48
+ if decompose?(unicode_data)
49
+ unicode_data.decomposition.map { |code_point| decompose_recursively(code_point) }.flatten
55
50
  else
56
- code_point
51
+ unicode_data.code_point
57
52
  end
58
53
  end
59
54
 
60
- # Returns code point's Decomposition Mapping based on its Unicode data.
61
- #
62
- def decomposition_mapping(unicode_data)
63
- mapping = parse_decomposition_mapping(unicode_data)
64
- mapping.shift if compatibility_decomposition?(mapping) # remove compatibility formatting tag
65
- mapping
66
- end
67
-
68
- def compatibility_decomposition?(mapping)
69
- !!(COMPATIBILITY_FORMATTING_TAG_REGEXP =~ mapping.first)
70
- end
71
-
72
- def parse_decomposition_mapping(unicode_data)
73
- unicode_data.decomposition.split
55
+ def decompose?(unicode_data)
56
+ !!unicode_data.decomposition
74
57
  end
75
58
 
76
59
  def decompose_hangul(code_point)
77
- TwitterCldr::Normalization::Hangul.decompose(code_point.hex).map { |e| e.to_s(16).upcase }
60
+ TwitterCldr::Normalization::Hangul.decompose(code_point)
78
61
  end
79
62
 
80
63
  # Performs the Canonical Ordering Algorithm by stable sorting of every subsequence of combining code points
@@ -127,16 +110,8 @@ module TwitterCldr
127
110
  code_points_with_cc
128
111
  end
129
112
 
130
- def combining_class_for(code_point)
131
- TwitterCldr::Shared::CodePoint.for_hex(code_point).combining_class.to_i
132
- rescue NoMethodError
133
- 0
134
- end
135
-
136
113
  end
137
114
 
138
- COMPATIBILITY_FORMATTING_TAG_REGEXP = /^<.*>$/
139
-
140
115
  end
141
116
  end
142
117
  end
@@ -5,8 +5,15 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Resources
8
- autoload :Loader, 'twitter_cldr/resources/loader'
9
- autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
10
- autoload :TriesDumper, 'twitter_cldr/resources/tries_dumper'
8
+ autoload :CanonicalCompositionsUpdater, 'twitter_cldr/resources/canonical_compositions_updater'
9
+ autoload :CompositionExclusionsImporter, 'twitter_cldr/resources/composition_exclusions_importer'
10
+ autoload :CustomLocalesResourcesImporter, 'twitter_cldr/resources/custom_locales_resources_importer'
11
+ autoload :Loader, 'twitter_cldr/resources/loader'
12
+ autoload :LocalesResourcesImporter, 'twitter_cldr/resources/locales_resources_importer'
13
+ autoload :PhoneCodesImporter, 'twitter_cldr/resources/phone_codes_importer'
14
+ autoload :PostalCodesImporter, 'twitter_cldr/resources/postal_codes_importer'
15
+ autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
16
+ autoload :TriesDumper, 'twitter_cldr/resources/tries_dumper'
17
+ autoload :UnicodeDataImporter, 'twitter_cldr/resources/unicode_data_importer'
11
18
  end
12
19
  end
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+
9
+ class CanonicalCompositionsUpdater
10
+
11
+ CODE_POINT_MAX = 0x10FFFF
12
+
13
+ # Arguments:
14
+ #
15
+ # output_path - output directory for generated YAML file
16
+ #
17
+ def initialize(output_path)
18
+ @output_path = output_path
19
+ end
20
+
21
+ def update
22
+ File.open(File.join(@output_path, 'canonical_compositions.yml'), 'w') do |output|
23
+ YAML.dump(generate_compositions, output)
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def generate_compositions
30
+ (1..CODE_POINT_MAX).inject({}) do |memo, code_point|
31
+ code_point_data = TwitterCldr::Shared::CodePoint.find(code_point)
32
+
33
+ if code_point_data && !code_point_data.compatibility_decomposition? && code_point_data.decomposition && !code_point_data.decomposition.empty?
34
+ memo[code_point_data.decomposition] = code_point
35
+ end
36
+
37
+ log_progress(code_point, memo.size)
38
+
39
+ memo
40
+ end
41
+ end
42
+
43
+ def log_progress(code_point, compositions_count)
44
+ $stdout.write("\r#{(100.0 * code_point / CODE_POINT_MAX).round}% complete, found #{compositions_count} canonical compositions")
45
+ $stdout.write("\n") if code_point == CODE_POINT_MAX
46
+ end
47
+
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'lib/twitter_cldr/resources/download'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class CompositionExclusionsImporter
12
+
13
+ COMPOSITION_EXCLUSIONS_URL = 'http://www.unicode.org/Public/6.1.0/ucd/DerivedNormalizationProps.txt'
14
+ COMPOSITION_EXCLUSION_REGEXP = /^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+; Full_Composition_Exclusion #.*$/
15
+ TOTAL_CODE_POINTS_REGEXP = /^# Total code points: (\d+)$/
16
+
17
+ # Arguments:
18
+ #
19
+ # input_path - path to DerivedNormalizationProps.txt file
20
+ # output_path - output directory for generated YAML file
21
+ #
22
+ def initialize(input_path, output_path)
23
+ @input_path = input_path
24
+ @output_path = output_path
25
+ end
26
+
27
+ def import
28
+ File.open(File.join(@output_path, 'composition_exclusions.yml'), 'w') do |output|
29
+ YAML.dump(generate_composition_exclusions, output)
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def generate_composition_exclusions
36
+ data = File.open(composition_exclusions_file) { |file| file.read }
37
+ start_pos = data.index("# Derived Property: Full_Composition_Exclusion")
38
+ end_pos = data.index(/^#\s=*$/, start_pos)
39
+ data = data[start_pos..end_pos].split("\n")
40
+
41
+ expected_code_points_count = nil
42
+
43
+ result = data.inject([]) do |memo, line|
44
+ memo << ($1.hex..($2 || $1).hex) if line =~ COMPOSITION_EXCLUSION_REGEXP
45
+ expected_code_points_count = $1.to_i if line =~ TOTAL_CODE_POINTS_REGEXP
46
+ memo
47
+ end
48
+
49
+ raise "Expected number of code points was not found." unless expected_code_points_count
50
+ code_points_count = result.map(&:count).inject(:+)
51
+ raise "Unexpected number of code points: expected - #{expected_code_points_count}, got - #{code_points_count}." unless code_points_count == expected_code_points_count
52
+
53
+ result
54
+ end
55
+
56
+ def composition_exclusions_file
57
+ TwitterCldr::Resources.download_if_necessary(@input_path, COMPOSITION_EXCLUSIONS_URL)
58
+ end
59
+
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,80 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'json'
7
+ require 'open-uri'
8
+
9
+ module TwitterCldr
10
+ module Resources
11
+
12
+ class CustomLocalesResourcesImporter
13
+
14
+ API_ENDPOINT = "http://translate.twttr.com/api/2/twitter/phrase/%s/translations.json"
15
+
16
+ TIME_PERIODS = {
17
+ :day => 19636,
18
+ :hour => 19638,
19
+ :minute => 19634,
20
+ :second => 19639
21
+ }
22
+
23
+ # Arguments:
24
+ #
25
+ # output_path - output directory for imported YAML files
26
+ #
27
+ def initialize(output_path)
28
+ @output_path = output_path
29
+ end
30
+
31
+ def import
32
+ import_units
33
+ end
34
+
35
+ private
36
+
37
+ def import_units
38
+ fetch_units_data.each do |locale, data|
39
+ dir_path = File.join(@output_path, locale.to_s)
40
+
41
+ FileUtils.mkpath(dir_path)
42
+
43
+ File.open(File.join(dir_path, 'units.yml'), 'w:utf-8') do |output|
44
+ output.write(YAML.dump({ locale => data }))
45
+ end
46
+ end
47
+ end
48
+
49
+ def fetch_units_data
50
+ TIME_PERIODS.inject({}) do |result, (label, id)|
51
+ api_response = JSON.parse(open(API_ENDPOINT % id).read)
52
+
53
+ TwitterCldr.supported_locales.each do |locale|
54
+ twitter_locale = TwitterCldr.twitter_locale(locale).to_s
55
+
56
+ next unless api_response[twitter_locale]
57
+
58
+ patterns = TwitterCldr::Formatters::Plurals::Rules.all_for(locale).inject({}) do |memo, rule|
59
+ memo[rule] = api_response[twitter_locale].gsub("%{number}", "{0}"); memo
60
+ end
61
+
62
+ set_value(result, patterns, locale, :units, label, :abbreviated)
63
+ end
64
+
65
+ result
66
+ end
67
+ end
68
+
69
+ def set_value(hash, value, *path)
70
+ last = path[0..-2].inject(hash) do |current, level|
71
+ current[level] ||= {}
72
+ end
73
+
74
+ last[path.last] = value
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+ end