twitter_cldr 3.0.0.beta1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (698) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +16 -2
  3. data/History.txt +9 -1
  4. data/README.md +297 -113
  5. data/Rakefile +97 -17
  6. data/lib/twitter_cldr/collation/collator.rb +12 -3
  7. data/lib/twitter_cldr/collation/trie_builder.rb +1 -1
  8. data/lib/twitter_cldr/{tokenizers/calendars → data_readers}/additional_date_format_selector.rb +6 -4
  9. data/lib/twitter_cldr/data_readers/calendar_data_reader.rb +91 -0
  10. data/lib/twitter_cldr/data_readers/data_reader.rb +32 -0
  11. data/lib/twitter_cldr/data_readers/date_data_reader.rb +26 -0
  12. data/lib/twitter_cldr/data_readers/date_time_data_reader.rb +41 -0
  13. data/lib/twitter_cldr/data_readers/number_data_reader.rb +142 -0
  14. data/lib/twitter_cldr/data_readers/time_data_reader.rb +26 -0
  15. data/lib/twitter_cldr/data_readers/timespan_data_reader.rb +122 -0
  16. data/lib/twitter_cldr/data_readers.rb +17 -0
  17. data/lib/twitter_cldr/formatters/calendars/{datetime_formatter.rb → date_time_formatter.rb} +27 -42
  18. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +3 -64
  19. data/lib/twitter_cldr/formatters/formatter.rb +39 -0
  20. data/lib/twitter_cldr/formatters/list_formatter.rb +22 -12
  21. data/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb +5 -26
  22. data/lib/twitter_cldr/formatters/numbers/currency_formatter.rb +2 -11
  23. data/lib/twitter_cldr/formatters/numbers/decimal_formatter.rb +4 -2
  24. data/lib/twitter_cldr/formatters/numbers/number_formatter.rb +45 -27
  25. data/lib/twitter_cldr/formatters/numbers/percent_formatter.rb +3 -13
  26. data/lib/twitter_cldr/formatters/numbers/rbnf/formatters.rb +224 -0
  27. data/lib/twitter_cldr/formatters/numbers/rbnf/post_processors/chinese.rb +122 -0
  28. data/lib/twitter_cldr/formatters/numbers/rbnf/rule.rb +93 -0
  29. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_group.rb +20 -0
  30. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_parser.rb +86 -0
  31. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_set.rb +259 -0
  32. data/lib/twitter_cldr/formatters/numbers/rbnf/substitution.rb +30 -0
  33. data/lib/twitter_cldr/formatters/numbers/rbnf.rb +127 -0
  34. data/lib/twitter_cldr/formatters/plurals/plural_formatter.rb +18 -6
  35. data/lib/twitter_cldr/formatters.rb +4 -5
  36. data/lib/twitter_cldr/localized/localized_array.rb +1 -1
  37. data/lib/twitter_cldr/localized/localized_date.rb +6 -3
  38. data/lib/twitter_cldr/localized/localized_datetime.rb +38 -15
  39. data/lib/twitter_cldr/localized/localized_number.rb +40 -24
  40. data/lib/twitter_cldr/localized/localized_object.rb +4 -4
  41. data/lib/twitter_cldr/localized/localized_string.rb +40 -7
  42. data/lib/twitter_cldr/localized/localized_time.rb +9 -2
  43. data/lib/twitter_cldr/localized/localized_timespan.rb +50 -5
  44. data/lib/twitter_cldr/normalization.rb +8 -19
  45. data/lib/twitter_cldr/parsers/parser.rb +50 -0
  46. data/lib/twitter_cldr/parsers/segmentation_parser.rb +137 -0
  47. data/lib/twitter_cldr/parsers/symbol_table.rb +30 -0
  48. data/lib/twitter_cldr/parsers/unicode_regex/character_class.rb +91 -0
  49. data/lib/twitter_cldr/parsers/unicode_regex/character_range.rb +39 -0
  50. data/lib/twitter_cldr/parsers/unicode_regex/character_set.rb +65 -0
  51. data/lib/twitter_cldr/parsers/unicode_regex/component.rb +50 -0
  52. data/lib/twitter_cldr/parsers/unicode_regex/literal.rb +83 -0
  53. data/lib/twitter_cldr/parsers/unicode_regex/unicode_string.rb +41 -0
  54. data/lib/twitter_cldr/parsers/unicode_regex_parser.rb +262 -0
  55. data/lib/twitter_cldr/parsers.rb +5 -1
  56. data/lib/twitter_cldr/resources/casefolder.rb.erb +64 -0
  57. data/lib/twitter_cldr/resources/casefolder_class_generator.rb +75 -0
  58. data/lib/twitter_cldr/resources/download.rb +10 -4
  59. data/lib/twitter_cldr/resources/icu_based_importer.rb +18 -0
  60. data/lib/twitter_cldr/resources/locales_resources_importer.rb +24 -13
  61. data/lib/twitter_cldr/resources/normalization_quick_check_importer.rb +1 -14
  62. data/lib/twitter_cldr/resources/rbnf_test_importer.rb +107 -0
  63. data/lib/twitter_cldr/resources/readme_renderer.rb +115 -0
  64. data/lib/twitter_cldr/resources/tailoring_importer.rb +2 -8
  65. data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +62 -0
  66. data/lib/twitter_cldr/resources/uli.rb +12 -0
  67. data/lib/twitter_cldr/resources/unicode_data_importer.rb +84 -14
  68. data/lib/twitter_cldr/resources/unicode_importer.rb +37 -0
  69. data/lib/twitter_cldr/resources/unicode_properties_importer.rb +79 -0
  70. data/lib/twitter_cldr/resources.rb +8 -1
  71. data/lib/twitter_cldr/shared/break_iterator.rb +213 -0
  72. data/lib/twitter_cldr/shared/calendar.rb +38 -14
  73. data/lib/twitter_cldr/shared/casefolder.rb +210 -0
  74. data/lib/twitter_cldr/shared/code_point.rb +103 -16
  75. data/lib/twitter_cldr/shared/numbering_system.rb +58 -0
  76. data/lib/twitter_cldr/shared/territories.rb +43 -1
  77. data/lib/twitter_cldr/shared/unicode_regex.rb +81 -0
  78. data/lib/twitter_cldr/shared.rb +13 -9
  79. data/lib/twitter_cldr/tokenizers/calendars/date_time_tokenizer.rb +77 -0
  80. data/lib/twitter_cldr/tokenizers/calendars/date_tokenizer.rb +14 -29
  81. data/lib/twitter_cldr/tokenizers/calendars/time_tokenizer.rb +13 -28
  82. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +11 -87
  83. data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +16 -71
  84. data/lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb +53 -0
  85. data/lib/twitter_cldr/tokenizers/pattern_tokenizer.rb +42 -0
  86. data/lib/twitter_cldr/tokenizers/segmentation/segmentation_tokenizer.rb +39 -0
  87. data/lib/twitter_cldr/tokenizers/tokenizer.rb +116 -0
  88. data/lib/twitter_cldr/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +52 -0
  89. data/lib/twitter_cldr/tokenizers.rb +8 -4
  90. data/lib/twitter_cldr/utils/code_points.rb +1 -1
  91. data/lib/twitter_cldr/utils/range_set.rb +242 -0
  92. data/lib/twitter_cldr/utils/yaml.rb +17 -12
  93. data/lib/twitter_cldr/utils.rb +1 -1
  94. data/lib/twitter_cldr/version.rb +1 -1
  95. data/lib/twitter_cldr.rb +2 -1
  96. data/resources/custom/locales/sv/units.yml +8 -0
  97. data/resources/locales/af/calendars.yml +278 -224
  98. data/resources/locales/af/currencies.yml +954 -916
  99. data/resources/locales/af/languages.yml +583 -580
  100. data/resources/locales/af/layout.yml +5 -5
  101. data/resources/locales/af/lists.yml +23 -7
  102. data/resources/locales/af/numbers.yml +59 -54
  103. data/resources/locales/af/plurals.yml +2 -2
  104. data/resources/locales/af/rbnf.yml +261 -0
  105. data/resources/locales/af/territories.yml +264 -263
  106. data/resources/locales/ar/calendars.yml +287 -259
  107. data/resources/locales/ar/currencies.yml +1730 -1692
  108. data/resources/locales/ar/languages.yml +583 -580
  109. data/resources/locales/ar/layout.yml +5 -5
  110. data/resources/locales/ar/lists.yml +23 -7
  111. data/resources/locales/ar/numbers.yml +66 -61
  112. data/resources/locales/ar/plurals.yml +5 -8
  113. data/resources/locales/ar/rbnf.yml +519 -0
  114. data/resources/locales/ar/territories.yml +264 -263
  115. data/resources/locales/be/calendars.yml +238 -237
  116. data/resources/locales/be/currencies.yml +954 -917
  117. data/resources/locales/be/languages.yml +583 -580
  118. data/resources/locales/be/layout.yml +5 -5
  119. data/resources/locales/be/lists.yml +23 -7
  120. data/resources/locales/be/numbers.yml +62 -57
  121. data/resources/locales/be/plurals.yml +7 -4
  122. data/resources/locales/be/rbnf.yml +1288 -0
  123. data/resources/locales/be/territories.yml +264 -263
  124. data/resources/locales/bg/calendars.yml +278 -218
  125. data/resources/locales/bg/currencies.yml +955 -917
  126. data/resources/locales/bg/languages.yml +583 -580
  127. data/resources/locales/bg/layout.yml +5 -5
  128. data/resources/locales/bg/lists.yml +23 -7
  129. data/resources/locales/bg/numbers.yml +62 -57
  130. data/resources/locales/bg/plurals.yml +2 -2
  131. data/resources/locales/bg/rbnf.yml +280 -0
  132. data/resources/locales/bg/territories.yml +264 -263
  133. data/resources/locales/bn/calendars.yml +287 -225
  134. data/resources/locales/bn/currencies.yml +953 -916
  135. data/resources/locales/bn/languages.yml +583 -580
  136. data/resources/locales/bn/layout.yml +5 -5
  137. data/resources/locales/bn/lists.yml +23 -7
  138. data/resources/locales/bn/numbers.yml +62 -57
  139. data/resources/locales/bn/plurals.yml +2 -2
  140. data/resources/locales/bn/rbnf.yml +4 -0
  141. data/resources/locales/bn/territories.yml +264 -263
  142. data/resources/locales/ca/calendars.yml +278 -278
  143. data/resources/locales/ca/currencies.yml +953 -916
  144. data/resources/locales/ca/languages.yml +583 -580
  145. data/resources/locales/ca/layout.yml +5 -5
  146. data/resources/locales/ca/lists.yml +23 -7
  147. data/resources/locales/ca/numbers.yml +62 -57
  148. data/resources/locales/ca/plurals.yml +3 -2
  149. data/resources/locales/ca/rbnf.yml +756 -0
  150. data/resources/locales/ca/territories.yml +264 -263
  151. data/resources/locales/cs/calendars.yml +269 -262
  152. data/resources/locales/cs/currencies.yml +1483 -1172
  153. data/resources/locales/cs/languages.yml +583 -580
  154. data/resources/locales/cs/layout.yml +5 -5
  155. data/resources/locales/cs/lists.yml +23 -7
  156. data/resources/locales/cs/numbers.yml +64 -58
  157. data/resources/locales/cs/plurals.yml +6 -2
  158. data/resources/locales/cs/rbnf.yml +367 -0
  159. data/resources/locales/cs/territories.yml +264 -263
  160. data/resources/locales/cy/calendars.yml +275 -274
  161. data/resources/locales/cy/currencies.yml +1585 -1548
  162. data/resources/locales/cy/languages.yml +583 -580
  163. data/resources/locales/cy/layout.yml +5 -5
  164. data/resources/locales/cy/lists.yml +23 -7
  165. data/resources/locales/cy/numbers.yml +66 -61
  166. data/resources/locales/cy/plurals.yml +4 -3
  167. data/resources/locales/cy/rbnf.yml +298 -0
  168. data/resources/locales/cy/territories.yml +264 -263
  169. data/resources/locales/da/calendars.yml +281 -280
  170. data/resources/locales/da/currencies.yml +954 -916
  171. data/resources/locales/da/languages.yml +583 -580
  172. data/resources/locales/da/layout.yml +5 -5
  173. data/resources/locales/da/lists.yml +23 -7
  174. data/resources/locales/da/numbers.yml +62 -57
  175. data/resources/locales/da/plurals.yml +3 -2
  176. data/resources/locales/da/rbnf.yml +194 -0
  177. data/resources/locales/da/territories.yml +264 -263
  178. data/resources/locales/de/calendars.yml +294 -293
  179. data/resources/locales/de/currencies.yml +954 -916
  180. data/resources/locales/de/languages.yml +583 -580
  181. data/resources/locales/de/layout.yml +5 -5
  182. data/resources/locales/de/lists.yml +23 -7
  183. data/resources/locales/de/numbers.yml +62 -57
  184. data/resources/locales/de/plurals.yml +3 -2
  185. data/resources/locales/de/rbnf.yml +346 -0
  186. data/resources/locales/de/territories.yml +264 -263
  187. data/resources/locales/el/calendars.yml +279 -282
  188. data/resources/locales/el/currencies.yml +954 -916
  189. data/resources/locales/el/languages.yml +583 -580
  190. data/resources/locales/el/layout.yml +5 -5
  191. data/resources/locales/el/lists.yml +23 -7
  192. data/resources/locales/el/numbers.yml +62 -57
  193. data/resources/locales/el/plurals.yml +2 -2
  194. data/resources/locales/el/rbnf.yml +880 -0
  195. data/resources/locales/el/territories.yml +264 -263
  196. data/resources/locales/en/calendars.yml +192 -191
  197. data/resources/locales/en/currencies.yml +953 -915
  198. data/resources/locales/en/languages.yml +583 -580
  199. data/resources/locales/en/layout.yml +5 -5
  200. data/resources/locales/en/lists.yml +23 -7
  201. data/resources/locales/en/numbers.yml +62 -57
  202. data/resources/locales/en/plurals.yml +3 -2
  203. data/resources/locales/en/rbnf.yml +542 -0
  204. data/resources/locales/en/territories.yml +264 -263
  205. data/resources/locales/en-GB/calendars.yml +195 -194
  206. data/resources/locales/en-GB/currencies.yml +953 -915
  207. data/resources/locales/en-GB/languages.yml +583 -580
  208. data/resources/locales/en-GB/layout.yml +5 -5
  209. data/resources/locales/en-GB/lists.yml +23 -7
  210. data/resources/locales/en-GB/numbers.yml +62 -57
  211. data/resources/locales/en-GB/plurals.yml +2 -1
  212. data/resources/locales/en-GB/rbnf.yml +4 -0
  213. data/resources/locales/en-GB/territories.yml +264 -263
  214. data/resources/locales/es/calendars.yml +288 -238
  215. data/resources/locales/es/currencies.yml +953 -922
  216. data/resources/locales/es/languages.yml +583 -580
  217. data/resources/locales/es/layout.yml +5 -5
  218. data/resources/locales/es/lists.yml +23 -7
  219. data/resources/locales/es/numbers.yml +62 -57
  220. data/resources/locales/es/plurals.yml +2 -2
  221. data/resources/locales/es/rbnf.yml +913 -0
  222. data/resources/locales/es/territories.yml +264 -263
  223. data/resources/locales/eu/calendars.yml +277 -218
  224. data/resources/locales/eu/currencies.yml +953 -916
  225. data/resources/locales/eu/languages.yml +583 -580
  226. data/resources/locales/eu/layout.yml +5 -5
  227. data/resources/locales/eu/lists.yml +23 -7
  228. data/resources/locales/eu/numbers.yml +56 -51
  229. data/resources/locales/eu/plurals.yml +2 -2
  230. data/resources/locales/eu/rbnf.yml +4 -0
  231. data/resources/locales/eu/territories.yml +264 -263
  232. data/resources/locales/fa/calendars.yml +294 -293
  233. data/resources/locales/fa/currencies.yml +955 -916
  234. data/resources/locales/fa/languages.yml +583 -580
  235. data/resources/locales/fa/layout.yml +5 -5
  236. data/resources/locales/fa/lists.yml +23 -7
  237. data/resources/locales/fa/numbers.yml +62 -57
  238. data/resources/locales/fa/plurals.yml +2 -2
  239. data/resources/locales/fa/rbnf.yml +157 -0
  240. data/resources/locales/fa/territories.yml +264 -263
  241. data/resources/locales/fi/calendars.yml +284 -283
  242. data/resources/locales/fi/currencies.yml +953 -915
  243. data/resources/locales/fi/languages.yml +583 -580
  244. data/resources/locales/fi/layout.yml +5 -5
  245. data/resources/locales/fi/lists.yml +23 -7
  246. data/resources/locales/fi/numbers.yml +62 -57
  247. data/resources/locales/fi/plurals.yml +3 -2
  248. data/resources/locales/fi/rbnf.yml +206 -0
  249. data/resources/locales/fi/territories.yml +264 -263
  250. data/resources/locales/fil/calendars.yml +281 -230
  251. data/resources/locales/fil/currencies.yml +953 -916
  252. data/resources/locales/fil/languages.yml +583 -580
  253. data/resources/locales/fil/layout.yml +5 -5
  254. data/resources/locales/fil/lists.yml +23 -7
  255. data/resources/locales/fil/numbers.yml +62 -57
  256. data/resources/locales/fil/plurals.yml +3 -2
  257. data/resources/locales/fil/rbnf.yml +158 -0
  258. data/resources/locales/fil/territories.yml +264 -263
  259. data/resources/locales/fr/calendars.yml +297 -296
  260. data/resources/locales/fr/currencies.yml +968 -949
  261. data/resources/locales/fr/languages.yml +583 -580
  262. data/resources/locales/fr/layout.yml +5 -5
  263. data/resources/locales/fr/lists.yml +23 -7
  264. data/resources/locales/fr/numbers.yml +62 -57
  265. data/resources/locales/fr/plurals.yml +2 -2
  266. data/resources/locales/fr/rbnf.yml +621 -0
  267. data/resources/locales/fr/territories.yml +264 -263
  268. data/resources/locales/ga/calendars.yml +192 -191
  269. data/resources/locales/ga/currencies.yml +954 -916
  270. data/resources/locales/ga/languages.yml +583 -580
  271. data/resources/locales/ga/layout.yml +5 -5
  272. data/resources/locales/ga/lists.yml +23 -7
  273. data/resources/locales/ga/numbers.yml +62 -57
  274. data/resources/locales/ga/plurals.yml +4 -3
  275. data/resources/locales/ga/rbnf.yml +615 -0
  276. data/resources/locales/ga/territories.yml +264 -263
  277. data/resources/locales/gl/calendars.yml +283 -217
  278. data/resources/locales/gl/currencies.yml +953 -916
  279. data/resources/locales/gl/languages.yml +583 -580
  280. data/resources/locales/gl/layout.yml +5 -5
  281. data/resources/locales/gl/lists.yml +23 -7
  282. data/resources/locales/gl/numbers.yml +62 -57
  283. data/resources/locales/gl/plurals.yml +3 -2
  284. data/resources/locales/gl/rbnf.yml +4 -0
  285. data/resources/locales/gl/territories.yml +264 -263
  286. data/resources/locales/he/calendars.yml +248 -220
  287. data/resources/locales/he/currencies.yml +992 -932
  288. data/resources/locales/he/languages.yml +583 -580
  289. data/resources/locales/he/layout.yml +5 -5
  290. data/resources/locales/he/lists.yml +23 -7
  291. data/resources/locales/he/numbers.yml +64 -59
  292. data/resources/locales/he/plurals.yml +6 -3
  293. data/resources/locales/he/rbnf.yml +1029 -0
  294. data/resources/locales/he/territories.yml +264 -263
  295. data/resources/locales/hi/calendars.yml +284 -216
  296. data/resources/locales/hi/currencies.yml +953 -915
  297. data/resources/locales/hi/languages.yml +583 -580
  298. data/resources/locales/hi/layout.yml +5 -5
  299. data/resources/locales/hi/lists.yml +23 -7
  300. data/resources/locales/hi/numbers.yml +60 -55
  301. data/resources/locales/hi/plurals.yml +2 -2
  302. data/resources/locales/hi/rbnf.yml +430 -0
  303. data/resources/locales/hi/territories.yml +264 -263
  304. data/resources/locales/hr/calendars.yml +308 -307
  305. data/resources/locales/hr/currencies.yml +1248 -1504
  306. data/resources/locales/hr/languages.yml +583 -580
  307. data/resources/locales/hr/layout.yml +5 -5
  308. data/resources/locales/hr/lists.yml +23 -7
  309. data/resources/locales/hr/numbers.yml +63 -59
  310. data/resources/locales/hr/plurals.yml +12 -4
  311. data/resources/locales/hr/rbnf.yml +599 -0
  312. data/resources/locales/hr/territories.yml +264 -263
  313. data/resources/locales/hu/calendars.yml +285 -284
  314. data/resources/locales/hu/currencies.yml +954 -916
  315. data/resources/locales/hu/languages.yml +583 -580
  316. data/resources/locales/hu/layout.yml +5 -5
  317. data/resources/locales/hu/lists.yml +23 -7
  318. data/resources/locales/hu/numbers.yml +62 -57
  319. data/resources/locales/hu/plurals.yml +2 -2
  320. data/resources/locales/hu/rbnf.yml +363 -0
  321. data/resources/locales/hu/territories.yml +264 -263
  322. data/resources/locales/id/calendars.yml +276 -275
  323. data/resources/locales/id/currencies.yml +954 -916
  324. data/resources/locales/id/languages.yml +583 -580
  325. data/resources/locales/id/layout.yml +5 -5
  326. data/resources/locales/id/lists.yml +23 -7
  327. data/resources/locales/id/numbers.yml +61 -56
  328. data/resources/locales/id/plurals.yml +2 -2
  329. data/resources/locales/id/rbnf.yml +121 -0
  330. data/resources/locales/id/territories.yml +264 -263
  331. data/resources/locales/is/calendars.yml +281 -242
  332. data/resources/locales/is/currencies.yml +954 -916
  333. data/resources/locales/is/languages.yml +583 -580
  334. data/resources/locales/is/layout.yml +5 -5
  335. data/resources/locales/is/lists.yml +23 -7
  336. data/resources/locales/is/numbers.yml +62 -57
  337. data/resources/locales/is/plurals.yml +5 -2
  338. data/resources/locales/is/rbnf.yml +326 -0
  339. data/resources/locales/is/territories.yml +264 -263
  340. data/resources/locales/it/calendars.yml +275 -260
  341. data/resources/locales/it/currencies.yml +953 -920
  342. data/resources/locales/it/languages.yml +583 -580
  343. data/resources/locales/it/layout.yml +5 -5
  344. data/resources/locales/it/lists.yml +23 -7
  345. data/resources/locales/it/numbers.yml +59 -54
  346. data/resources/locales/it/plurals.yml +3 -2
  347. data/resources/locales/it/rbnf.yml +1189 -0
  348. data/resources/locales/it/territories.yml +264 -263
  349. data/resources/locales/ja/calendars.yml +269 -207
  350. data/resources/locales/ja/currencies.yml +953 -915
  351. data/resources/locales/ja/languages.yml +583 -580
  352. data/resources/locales/ja/layout.yml +5 -5
  353. data/resources/locales/ja/lists.yml +23 -7
  354. data/resources/locales/ja/numbers.yml +62 -57
  355. data/resources/locales/ja/plurals.yml +2 -2
  356. data/resources/locales/ja/rbnf.yml +209 -0
  357. data/resources/locales/ja/territories.yml +264 -263
  358. data/resources/locales/ko/calendars.yml +246 -213
  359. data/resources/locales/ko/currencies.yml +953 -915
  360. data/resources/locales/ko/languages.yml +583 -580
  361. data/resources/locales/ko/layout.yml +5 -5
  362. data/resources/locales/ko/lists.yml +23 -7
  363. data/resources/locales/ko/numbers.yml +60 -55
  364. data/resources/locales/ko/plurals.yml +2 -2
  365. data/resources/locales/ko/rbnf.yml +722 -0
  366. data/resources/locales/ko/territories.yml +264 -263
  367. data/resources/locales/lv/calendars.yml +286 -285
  368. data/resources/locales/lv/currencies.yml +1122 -1084
  369. data/resources/locales/lv/languages.yml +583 -580
  370. data/resources/locales/lv/layout.yml +5 -5
  371. data/resources/locales/lv/lists.yml +23 -7
  372. data/resources/locales/lv/numbers.yml +63 -58
  373. data/resources/locales/lv/plurals.yml +11 -3
  374. data/resources/locales/lv/rbnf.yml +238 -0
  375. data/resources/locales/lv/territories.yml +264 -263
  376. data/resources/locales/ms/calendars.yml +280 -279
  377. data/resources/locales/ms/currencies.yml +954 -916
  378. data/resources/locales/ms/languages.yml +583 -580
  379. data/resources/locales/ms/layout.yml +5 -5
  380. data/resources/locales/ms/lists.yml +23 -7
  381. data/resources/locales/ms/numbers.yml +62 -57
  382. data/resources/locales/ms/plurals.yml +2 -2
  383. data/resources/locales/ms/rbnf.yml +130 -0
  384. data/resources/locales/ms/territories.yml +264 -263
  385. data/resources/locales/nb/calendars.yml +284 -283
  386. data/resources/locales/nb/currencies.yml +958 -916
  387. data/resources/locales/nb/languages.yml +583 -580
  388. data/resources/locales/nb/layout.yml +5 -5
  389. data/resources/locales/nb/lists.yml +23 -7
  390. data/resources/locales/nb/numbers.yml +62 -57
  391. data/resources/locales/nb/plurals.yml +2 -2
  392. data/resources/locales/nb/rbnf.yml +191 -0
  393. data/resources/locales/nb/territories.yml +264 -263
  394. data/resources/locales/nl/calendars.yml +285 -284
  395. data/resources/locales/nl/currencies.yml +953 -917
  396. data/resources/locales/nl/languages.yml +583 -580
  397. data/resources/locales/nl/layout.yml +5 -5
  398. data/resources/locales/nl/lists.yml +23 -7
  399. data/resources/locales/nl/numbers.yml +62 -57
  400. data/resources/locales/nl/plurals.yml +3 -2
  401. data/resources/locales/nl/rbnf.yml +320 -0
  402. data/resources/locales/nl/territories.yml +264 -263
  403. data/resources/locales/pl/calendars.yml +288 -287
  404. data/resources/locales/pl/currencies.yml +1326 -1284
  405. data/resources/locales/pl/languages.yml +583 -580
  406. data/resources/locales/pl/layout.yml +5 -5
  407. data/resources/locales/pl/lists.yml +23 -7
  408. data/resources/locales/pl/numbers.yml +64 -59
  409. data/resources/locales/pl/plurals.yml +11 -4
  410. data/resources/locales/pl/rbnf.yml +410 -0
  411. data/resources/locales/pl/territories.yml +264 -263
  412. data/resources/locales/pt/calendars.yml +290 -289
  413. data/resources/locales/pt/currencies.yml +954 -916
  414. data/resources/locales/pt/languages.yml +583 -580
  415. data/resources/locales/pt/layout.yml +5 -5
  416. data/resources/locales/pt/lists.yml +23 -7
  417. data/resources/locales/pt/numbers.yml +62 -57
  418. data/resources/locales/pt/plurals.yml +4 -2
  419. data/resources/locales/pt/rbnf.yml +586 -0
  420. data/resources/locales/pt/territories.yml +264 -263
  421. data/resources/locales/ro/calendars.yml +284 -283
  422. data/resources/locales/ro/currencies.yml +1170 -1132
  423. data/resources/locales/ro/languages.yml +583 -580
  424. data/resources/locales/ro/layout.yml +5 -5
  425. data/resources/locales/ro/lists.yml +23 -7
  426. data/resources/locales/ro/numbers.yml +63 -58
  427. data/resources/locales/ro/plurals.yml +5 -2
  428. data/resources/locales/ro/rbnf.yml +250 -0
  429. data/resources/locales/ro/territories.yml +264 -263
  430. data/resources/locales/ru/calendars.yml +282 -281
  431. data/resources/locales/ru/currencies.yml +1118 -1247
  432. data/resources/locales/ru/languages.yml +583 -580
  433. data/resources/locales/ru/layout.yml +5 -5
  434. data/resources/locales/ru/lists.yml +23 -7
  435. data/resources/locales/ru/numbers.yml +63 -59
  436. data/resources/locales/ru/plurals.yml +8 -4
  437. data/resources/locales/ru/rbnf.yml +385 -0
  438. data/resources/locales/ru/territories.yml +264 -263
  439. data/resources/locales/sk/calendars.yml +254 -251
  440. data/resources/locales/sk/currencies.yml +1174 -1008
  441. data/resources/locales/sk/languages.yml +583 -580
  442. data/resources/locales/sk/layout.yml +5 -5
  443. data/resources/locales/sk/lists.yml +23 -7
  444. data/resources/locales/sk/numbers.yml +64 -58
  445. data/resources/locales/sk/plurals.yml +6 -2
  446. data/resources/locales/sk/rbnf.yml +304 -0
  447. data/resources/locales/sk/territories.yml +264 -263
  448. data/resources/locales/sq/calendars.yml +283 -206
  449. data/resources/locales/sq/currencies.yml +954 -916
  450. data/resources/locales/sq/languages.yml +583 -580
  451. data/resources/locales/sq/layout.yml +5 -5
  452. data/resources/locales/sq/lists.yml +23 -7
  453. data/resources/locales/sq/numbers.yml +62 -57
  454. data/resources/locales/sq/plurals.yml +2 -2
  455. data/resources/locales/sq/rbnf.yml +181 -0
  456. data/resources/locales/sq/territories.yml +264 -263
  457. data/resources/locales/sr/calendars.yml +290 -289
  458. data/resources/locales/sr/currencies.yml +1251 -1508
  459. data/resources/locales/sr/languages.yml +583 -580
  460. data/resources/locales/sr/layout.yml +5 -5
  461. data/resources/locales/sr/lists.yml +23 -7
  462. data/resources/locales/sr/numbers.yml +62 -58
  463. data/resources/locales/sr/plurals.yml +12 -4
  464. data/resources/locales/sr/rbnf.yml +429 -0
  465. data/resources/locales/sr/territories.yml +264 -263
  466. data/resources/locales/sv/calendars.yml +290 -289
  467. data/resources/locales/sv/currencies.yml +960 -930
  468. data/resources/locales/sv/languages.yml +583 -580
  469. data/resources/locales/sv/layout.yml +5 -5
  470. data/resources/locales/sv/lists.yml +23 -7
  471. data/resources/locales/sv/numbers.yml +63 -58
  472. data/resources/locales/sv/plurals.yml +3 -2
  473. data/resources/locales/sv/rbnf.yml +692 -0
  474. data/resources/locales/sv/territories.yml +264 -263
  475. data/resources/locales/ta/calendars.yml +281 -266
  476. data/resources/locales/ta/currencies.yml +953 -915
  477. data/resources/locales/ta/languages.yml +583 -580
  478. data/resources/locales/ta/layout.yml +5 -5
  479. data/resources/locales/ta/lists.yml +23 -7
  480. data/resources/locales/ta/numbers.yml +62 -57
  481. data/resources/locales/ta/plurals.yml +2 -2
  482. data/resources/locales/ta/rbnf.yml +241 -0
  483. data/resources/locales/ta/territories.yml +264 -263
  484. data/resources/locales/th/calendars.yml +278 -289
  485. data/resources/locales/th/currencies.yml +953 -915
  486. data/resources/locales/th/languages.yml +583 -580
  487. data/resources/locales/th/layout.yml +5 -5
  488. data/resources/locales/th/lists.yml +23 -7
  489. data/resources/locales/th/numbers.yml +62 -57
  490. data/resources/locales/th/plurals.yml +2 -2
  491. data/resources/locales/th/rbnf.yml +119 -0
  492. data/resources/locales/th/territories.yml +264 -263
  493. data/resources/locales/tr/calendars.yml +287 -286
  494. data/resources/locales/tr/currencies.yml +953 -916
  495. data/resources/locales/tr/languages.yml +583 -580
  496. data/resources/locales/tr/layout.yml +5 -5
  497. data/resources/locales/tr/lists.yml +23 -7
  498. data/resources/locales/tr/numbers.yml +61 -56
  499. data/resources/locales/tr/plurals.yml +2 -2
  500. data/resources/locales/tr/rbnf.yml +277 -0
  501. data/resources/locales/tr/territories.yml +264 -263
  502. data/resources/locales/uk/calendars.yml +286 -252
  503. data/resources/locales/uk/currencies.yml +1311 -1070
  504. data/resources/locales/uk/languages.yml +583 -580
  505. data/resources/locales/uk/layout.yml +5 -5
  506. data/resources/locales/uk/lists.yml +23 -7
  507. data/resources/locales/uk/numbers.yml +64 -59
  508. data/resources/locales/uk/plurals.yml +10 -4
  509. data/resources/locales/uk/rbnf.yml +430 -0
  510. data/resources/locales/uk/territories.yml +264 -263
  511. data/resources/locales/ur/calendars.yml +267 -228
  512. data/resources/locales/ur/currencies.yml +954 -916
  513. data/resources/locales/ur/languages.yml +583 -580
  514. data/resources/locales/ur/layout.yml +5 -5
  515. data/resources/locales/ur/lists.yml +23 -7
  516. data/resources/locales/ur/numbers.yml +62 -57
  517. data/resources/locales/ur/plurals.yml +3 -2
  518. data/resources/locales/ur/rbnf.yml +4 -0
  519. data/resources/locales/ur/territories.yml +264 -263
  520. data/resources/locales/vi/calendars.yml +256 -236
  521. data/resources/locales/vi/currencies.yml +953 -915
  522. data/resources/locales/vi/languages.yml +583 -580
  523. data/resources/locales/vi/layout.yml +5 -5
  524. data/resources/locales/vi/lists.yml +23 -7
  525. data/resources/locales/vi/numbers.yml +62 -57
  526. data/resources/locales/vi/plurals.yml +2 -2
  527. data/resources/locales/vi/rbnf.yml +164 -0
  528. data/resources/locales/vi/territories.yml +264 -263
  529. data/resources/locales/zh/calendars.yml +266 -265
  530. data/resources/locales/zh/currencies.yml +953 -915
  531. data/resources/locales/zh/languages.yml +583 -580
  532. data/resources/locales/zh/layout.yml +5 -5
  533. data/resources/locales/zh/lists.yml +23 -7
  534. data/resources/locales/zh/numbers.yml +62 -57
  535. data/resources/locales/zh/plurals.yml +2 -2
  536. data/resources/locales/zh/rbnf.yml +689 -0
  537. data/resources/locales/zh/territories.yml +264 -263
  538. data/resources/locales/zh-Hant/calendars.yml +266 -265
  539. data/resources/locales/zh-Hant/currencies.yml +955 -915
  540. data/resources/locales/zh-Hant/languages.yml +583 -580
  541. data/resources/locales/zh-Hant/layout.yml +5 -5
  542. data/resources/locales/zh-Hant/lists.yml +23 -7
  543. data/resources/locales/zh-Hant/numbers.yml +62 -57
  544. data/resources/locales/zh-Hant/plurals.yml +2 -2
  545. data/resources/locales/zh-Hant/rbnf.yml +647 -0
  546. data/resources/locales/zh-Hant/territories.yml +264 -263
  547. data/resources/shared/currency_digits_and_rounding.yml +67 -64
  548. data/resources/shared/numbering_systems.yml +176 -0
  549. data/resources/shared/rbnf_root.yml +1573 -0
  550. data/resources/shared/segments/segments_root.yml +728 -0
  551. data/resources/shared/segments/tailorings/en.yml +8 -0
  552. data/resources/uli/segments/de.yml +128 -0
  553. data/resources/uli/segments/en.yml +154 -0
  554. data/resources/uli/segments/es.yml +112 -0
  555. data/resources/uli/segments/fr.yml +47 -0
  556. data/resources/uli/segments/it.yml +37 -0
  557. data/resources/uli/segments/pt.yml +173 -0
  558. data/resources/uli/segments/ru.yml +10 -0
  559. data/resources/unicode_data/casefolding.yml +4765 -0
  560. data/resources/unicode_data/indices/bidi_class.yml +4572 -0
  561. data/resources/unicode_data/indices/bidi_mirrored.yml +3087 -0
  562. data/resources/unicode_data/indices/category.yml +10918 -0
  563. data/resources/unicode_data/indices/keys.yml +101 -0
  564. data/resources/unicode_data/properties/line_break.yml +9269 -0
  565. data/resources/unicode_data/properties/sentence_break.yml +8067 -0
  566. data/resources/unicode_data/properties/word_break.yml +3001 -0
  567. data/spec/collation/collation_spec.rb +2 -1
  568. data/spec/collation/collator_spec.rb +4 -3
  569. data/spec/collation/tailoring_spec.rb +2 -2
  570. data/spec/collation/tailoring_tests/he.txt +5 -2
  571. data/spec/{tokenizers/calendars → data_readers}/additional_date_format_selector_spec.rb +13 -13
  572. data/spec/data_readers/date_time_data_reader_spec.rb +26 -0
  573. data/spec/data_readers/number_data_reader_spec.rb +18 -0
  574. data/spec/data_readers/timespan_data_reader.rb +22 -0
  575. data/spec/formatters/calendars/datetime_formatter_spec.rb +18 -22
  576. data/spec/formatters/list_formatter_spec.rb +16 -87
  577. data/spec/formatters/numbers/abbreviated/abbreviated_number_formatter_spec.rb +15 -59
  578. data/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +32 -17
  579. data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +33 -17
  580. data/spec/formatters/numbers/currency_formatter_spec.rb +18 -13
  581. data/spec/formatters/numbers/decimal_formatter_spec.rb +16 -18
  582. data/spec/formatters/numbers/number_formatter_spec.rb +40 -31
  583. data/spec/formatters/numbers/percent_formatter_spec.rb +14 -6
  584. data/spec/formatters/numbers/rbnf/allowed_failures.yml +74 -0
  585. data/spec/formatters/numbers/rbnf/locales/af/rbnf_test.yml +706 -0
  586. data/spec/formatters/numbers/rbnf/locales/ar/rbnf_test.yml +706 -0
  587. data/spec/formatters/numbers/rbnf/locales/be/rbnf_test.yml +1174 -0
  588. data/spec/formatters/numbers/rbnf/locales/bg/rbnf_test.yml +706 -0
  589. data/spec/formatters/numbers/rbnf/locales/bn/rbnf_test.yml +1291 -0
  590. data/spec/formatters/numbers/rbnf/locales/ca/rbnf_test.yml +1174 -0
  591. data/spec/formatters/numbers/rbnf/locales/cs/rbnf_test.yml +823 -0
  592. data/spec/formatters/numbers/rbnf/locales/cy/rbnf_test.yml +940 -0
  593. data/spec/formatters/numbers/rbnf/locales/da/rbnf_test.yml +940 -0
  594. data/spec/formatters/numbers/rbnf/locales/de/rbnf_test.yml +940 -0
  595. data/spec/formatters/numbers/rbnf/locales/el/rbnf_test.yml +1174 -0
  596. data/spec/formatters/numbers/rbnf/locales/en/rbnf_test.yml +1291 -0
  597. data/spec/formatters/numbers/rbnf/locales/en-GB/rbnf_test.yml +1291 -0
  598. data/spec/formatters/numbers/rbnf/locales/es/rbnf_test.yml +1642 -0
  599. data/spec/formatters/numbers/rbnf/locales/eu/rbnf_test.yml +1291 -0
  600. data/spec/formatters/numbers/rbnf/locales/fa/rbnf_test.yml +589 -0
  601. data/spec/formatters/numbers/rbnf/locales/fi/rbnf_test.yml +706 -0
  602. data/spec/formatters/numbers/rbnf/locales/fil/rbnf_test.yml +706 -0
  603. data/spec/formatters/numbers/rbnf/locales/fr/rbnf_test.yml +1408 -0
  604. data/spec/formatters/numbers/rbnf/locales/ga/rbnf_test.yml +940 -0
  605. data/spec/formatters/numbers/rbnf/locales/gl/rbnf_test.yml +1291 -0
  606. data/spec/formatters/numbers/rbnf/locales/he/rbnf_test.yml +1057 -0
  607. data/spec/formatters/numbers/rbnf/locales/hi/rbnf_test.yml +823 -0
  608. data/spec/formatters/numbers/rbnf/locales/hr/rbnf_test.yml +1174 -0
  609. data/spec/formatters/numbers/rbnf/locales/hu/rbnf_test.yml +940 -0
  610. data/spec/formatters/numbers/rbnf/locales/id/rbnf_test.yml +706 -0
  611. data/spec/formatters/numbers/rbnf/locales/is/rbnf_test.yml +823 -0
  612. data/spec/formatters/numbers/rbnf/locales/it/rbnf_test.yml +1174 -0
  613. data/spec/formatters/numbers/rbnf/locales/ja/rbnf_test.yml +823 -0
  614. data/spec/formatters/numbers/rbnf/locales/ko/rbnf_test.yml +1408 -0
  615. data/spec/formatters/numbers/rbnf/locales/lv/rbnf_test.yml +706 -0
  616. data/spec/formatters/numbers/rbnf/locales/ms/rbnf_test.yml +706 -0
  617. data/spec/formatters/numbers/rbnf/locales/nb/rbnf_test.yml +940 -0
  618. data/spec/formatters/numbers/rbnf/locales/nl/rbnf_test.yml +706 -0
  619. data/spec/formatters/numbers/rbnf/locales/pl/rbnf_test.yml +823 -0
  620. data/spec/formatters/numbers/rbnf/locales/pt/rbnf_test.yml +1174 -0
  621. data/spec/formatters/numbers/rbnf/locales/ro/rbnf_test.yml +823 -0
  622. data/spec/formatters/numbers/rbnf/locales/ru/rbnf_test.yml +823 -0
  623. data/spec/formatters/numbers/rbnf/locales/sk/rbnf_test.yml +823 -0
  624. data/spec/formatters/numbers/rbnf/locales/sq/rbnf_test.yml +706 -0
  625. data/spec/formatters/numbers/rbnf/locales/sr/rbnf_test.yml +940 -0
  626. data/spec/formatters/numbers/rbnf/locales/sv/rbnf_test.yml +1876 -0
  627. data/spec/formatters/numbers/rbnf/locales/ta/rbnf_test.yml +706 -0
  628. data/spec/formatters/numbers/rbnf/locales/th/rbnf_test.yml +706 -0
  629. data/spec/formatters/numbers/rbnf/locales/tr/rbnf_test.yml +706 -0
  630. data/spec/formatters/numbers/rbnf/locales/uk/rbnf_test.yml +823 -0
  631. data/spec/formatters/numbers/rbnf/locales/ur/rbnf_test.yml +1291 -0
  632. data/spec/formatters/numbers/rbnf/locales/vi/rbnf_test.yml +706 -0
  633. data/spec/formatters/numbers/rbnf/locales/zh/rbnf_test.yml +940 -0
  634. data/spec/formatters/numbers/rbnf/locales/zh-Hant/rbnf_test.yml +940 -0
  635. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +98 -0
  636. data/spec/formatters/plurals/plural_formatter_spec.rb +4 -4
  637. data/spec/formatters/plurals/rules_spec.rb +5 -5
  638. data/spec/localized/localized_date_spec.rb +1 -1
  639. data/spec/localized/localized_datetime_spec.rb +8 -13
  640. data/spec/localized/localized_number_spec.rb +17 -32
  641. data/spec/localized/localized_object_spec.rb +0 -5
  642. data/spec/localized/localized_string_spec.rb +40 -2
  643. data/spec/localized/localized_time_spec.rb +3 -6
  644. data/spec/localized/localized_timespan_spec.rb +144 -0
  645. data/spec/normalization_spec.rb +12 -12
  646. data/spec/parsers/number_parser_spec.rb +5 -5
  647. data/spec/parsers/parser_spec.rb +60 -0
  648. data/spec/parsers/segmentation_parser_spec.rb +96 -0
  649. data/spec/parsers/symbol_table_spec.rb +32 -0
  650. data/spec/parsers/unicode_regex/character_class_spec.rb +117 -0
  651. data/spec/parsers/unicode_regex/character_range_spec.rb +21 -0
  652. data/spec/parsers/unicode_regex/character_set_spec.rb +36 -0
  653. data/spec/parsers/unicode_regex/literal_spec.rb +34 -0
  654. data/spec/parsers/unicode_regex/unicode_string_spec.rb +22 -0
  655. data/spec/parsers/unicode_regex_parser_spec.rb +86 -0
  656. data/spec/readme_spec.rb +8 -269
  657. data/spec/shared/break_iterator_spec.rb +72 -0
  658. data/spec/shared/calendar_spec.rb +5 -4
  659. data/spec/shared/casefolder_spec.rb +30 -0
  660. data/spec/shared/casefolding.txt +251 -0
  661. data/spec/shared/casefolding_expected.txt +251 -0
  662. data/spec/shared/code_point_spec.rb +44 -14
  663. data/spec/shared/numbering_system_spec.rb +41 -0
  664. data/spec/shared/territories_spec.rb +14 -6
  665. data/spec/shared/unicode_regex_spec.rb +203 -0
  666. data/spec/spec_helper.rb +17 -0
  667. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +26 -30
  668. data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +11 -90
  669. data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -5
  670. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +17 -7
  671. data/spec/tokenizers/numbers/number_tokenizer_spec.rb +28 -27
  672. data/spec/tokenizers/segmentation/segmentation_tokenizer_spec.rb +40 -0
  673. data/spec/tokenizers/unicode_regex/unicode_regex_tokenizer_spec.rb +190 -0
  674. data/spec/utils/range_set_spec.rb +171 -0
  675. data/spec/utils/yaml/yaml_spec.rb +62 -51
  676. data/twitter_cldr.gemspec +1 -1
  677. metadata +199 -30
  678. data/lib/twitter_cldr/formatters/base.rb +0 -47
  679. data/lib/twitter_cldr/formatters/calendars/date_formatter.rb +0 -19
  680. data/lib/twitter_cldr/formatters/calendars/time_formatter.rb +0 -19
  681. data/lib/twitter_cldr/normalization/base.rb +0 -37
  682. data/lib/twitter_cldr/normalization/hangul.rb +0 -79
  683. data/lib/twitter_cldr/normalization/nfc.rb +0 -24
  684. data/lib/twitter_cldr/normalization/nfd.rb +0 -26
  685. data/lib/twitter_cldr/normalization/nfkc.rb +0 -114
  686. data/lib/twitter_cldr/normalization/nfkd.rb +0 -120
  687. data/lib/twitter_cldr/normalization/quick_check.rb +0 -41
  688. data/lib/twitter_cldr/tokenizers/base.rb +0 -169
  689. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -131
  690. data/lib/twitter_cldr/utils/territories.rb +0 -56
  691. data/spec/formatters/base_spec.rb +0 -18
  692. data/spec/formatters/calendars/timespan_formatter_spec.rb +0 -112
  693. data/spec/normalization/NormalizationTestShort.txt +0 -602
  694. data/spec/normalization/base_spec.rb +0 -16
  695. data/spec/normalization/hangul_spec.rb +0 -42
  696. data/spec/normalization/normalization_spec.rb +0 -113
  697. data/spec/tokenizers/base_spec.rb +0 -259
  698. data/spec/utils/territories_spec.rb +0 -16
@@ -0,0 +1,83 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Parsers
8
+ class UnicodeRegexParser
9
+ class Literal < Component
10
+
11
+ attr_reader :text
12
+
13
+ # ord is good enough (don't need unpack) because ASCII chars
14
+ # have the same numbers as their unicode equivalents
15
+ def self.ordinalize(char)
16
+ if char.respond_to?(:ord)
17
+ char.ord
18
+ else
19
+ char[0]
20
+ end
21
+ end
22
+
23
+ SPECIAL_CHARACTERS = {
24
+ "s" => [32], # space
25
+ "t" => [9], # tab
26
+ "r" => [13], # carriage return
27
+ "n" => [10], # newline
28
+ "f" => [12], # form feed
29
+ "d" => ("0".."9").to_a.map { |c| ordinalize(c) },
30
+ "w" => (("a".."z").to_a + ("A".."Z").to_a + ("0".."9").to_a + ["_"]).map { |c| ordinalize(c) }
31
+ }
32
+
33
+ def initialize(text)
34
+ @text = text
35
+ end
36
+
37
+ def to_regexp_str
38
+ text
39
+ end
40
+
41
+ def to_set
42
+ if text =~ /^\\/
43
+ special_char = text[1..-1]
44
+
45
+ if SPECIAL_CHARACTERS.include?(special_char.downcase)
46
+ set_for_special_char(special_char)
47
+ else
48
+ TwitterCldr::Utils::RangeSet.from_array([
49
+ self.class.ordinalize(special_char)
50
+ ])
51
+ end
52
+ else
53
+ TwitterCldr::Utils::RangeSet.from_array([
54
+ self.class.ordinalize(text)
55
+ ])
56
+ end
57
+ end
58
+
59
+ private
60
+
61
+ def set_for_special_char(char)
62
+ special_char_set_cache[char] ||= begin
63
+ chars = TwitterCldr::Utils::RangeSet.from_array(
64
+ SPECIAL_CHARACTERS[char.downcase]
65
+ )
66
+
67
+ if char.upcase == char
68
+ UnicodeRegex.valid_regexp_chars.subtract(chars)
69
+ else
70
+ chars
71
+ end
72
+ end
73
+ end
74
+
75
+ def special_char_set_cache
76
+ @@special_char_set_cache ||= {}
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Parsers
8
+ class UnicodeRegexParser
9
+
10
+ # unicode_char, escaped_char, string, multichar_string
11
+ # Can exist inside and outside of character classes
12
+ class UnicodeString < Component
13
+
14
+ attr_reader :codepoints
15
+
16
+ def initialize(codepoints)
17
+ @codepoints = codepoints
18
+ end
19
+
20
+ def to_set
21
+ # If the number of codepoints is greater than 1, treat them as a
22
+ # group (eg. multichar string). This is definitely a hack in that
23
+ # it means there has to be special logic in RangeSet that deals
24
+ # with data types that aren't true integer ranges. I can't think
25
+ # of any other way to support multichar strings :(
26
+ if codepoints.size > 1
27
+ TwitterCldr::Utils::RangeSet.new([codepoints..codepoints])
28
+ else
29
+ TwitterCldr::Utils::RangeSet.new([codepoints.first..codepoints.first])
30
+ end
31
+ end
32
+
33
+ def to_regexp_str
34
+ cps = codepoints.is_a?(Array) ? codepoints : [codepoints]
35
+ array_to_regex(codepoints)
36
+ end
37
+
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,262 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Parsers
8
+
9
+ class UnicodeRegexParserError < StandardError; end
10
+
11
+ class UnicodeRegexParser < Parser
12
+
13
+ autoload :Component, "twitter_cldr/parsers/unicode_regex/component"
14
+ autoload :CharacterClass, "twitter_cldr/parsers/unicode_regex/character_class"
15
+ autoload :CharacterRange, "twitter_cldr/parsers/unicode_regex/character_range"
16
+ autoload :CharacterSet, "twitter_cldr/parsers/unicode_regex/character_set"
17
+ autoload :Literal, "twitter_cldr/parsers/unicode_regex/literal"
18
+ autoload :UnicodeString, "twitter_cldr/parsers/unicode_regex/unicode_string"
19
+
20
+ def parse(tokens, options = {})
21
+ super(
22
+ preprocess(
23
+ substitute_variables(tokens, options[:symbol_table])
24
+ ), options
25
+ )
26
+ end
27
+
28
+ private
29
+
30
+ # Types that are allowed to be used in character ranges.
31
+ CHARACTER_CLASS_TOKEN_TYPES = [
32
+ :variable, :character_set, :negated_character_set, :unicode_char,
33
+ :multichar_string, :string, :escaped_character, :character_range
34
+ ]
35
+
36
+ NEGATED_TOKEN_TYPES = [
37
+ :negated_character_set
38
+ ]
39
+
40
+ BINARY_OPERATORS = [
41
+ :pipe, :ampersand, :dash, :union
42
+ ]
43
+
44
+ UNARY_OPERATORS = [
45
+ :negate
46
+ ]
47
+
48
+ def make_token(type, value = nil)
49
+ TwitterCldr::Tokenizers::Token.new({
50
+ :type => type,
51
+ :value => value
52
+ })
53
+ end
54
+
55
+ # Identifies regex ranges and makes implicit operators explicit
56
+ def preprocess(tokens)
57
+ result = []
58
+ i = 0
59
+
60
+ while i < tokens.size
61
+ # Character class entities side-by-side are treated as unions. So
62
+ # are side-by-side character classes. Add a special placeholder token
63
+ # to help out the expression parser.
64
+ add_union = (valid_character_class_token?(result.last) && tokens[i].type != :close_bracket) ||
65
+ (result.last && result.last.type == :close_bracket && tokens[i].type == :open_bracket)
66
+
67
+ result << make_token(:union) if add_union
68
+
69
+ is_range = valid_character_class_token?(tokens[i]) &&
70
+ valid_character_class_token?(tokens[i + 2]) &&
71
+ tokens[i + 1].type == :dash
72
+
73
+ if is_range
74
+ initial = send(tokens[i].type, tokens[i])
75
+ final = send(tokens[i + 2].type, tokens[i + 2])
76
+ result << make_character_range(initial, final)
77
+ i += 3
78
+ else
79
+ if negated_token?(tokens[i])
80
+ result += [
81
+ make_token(:open_bracket),
82
+ make_token(:negate),
83
+ tokens[i],
84
+ make_token(:close_bracket)
85
+ ]
86
+ else
87
+ result << tokens[i]
88
+ end
89
+
90
+ i += 1
91
+ end
92
+ end
93
+
94
+ result
95
+ end
96
+
97
+ def substitute_variables(tokens, symbol_table)
98
+ return tokens unless symbol_table
99
+ tokens.inject([]) do |ret, token|
100
+ if token.type == :variable && sub = symbol_table.fetch(token.value)
101
+ # variables can themselves contain references to other variables
102
+ # note: this could be cached somehow
103
+ ret += substitute_variables(sub, symbol_table)
104
+ else
105
+ ret << token
106
+ end
107
+ ret
108
+ end
109
+ end
110
+
111
+ def make_character_range(initial, final)
112
+ CharacterRange.new(initial, final)
113
+ end
114
+
115
+ def negated_token?(token)
116
+ token && NEGATED_TOKEN_TYPES.include?(token.type)
117
+ end
118
+
119
+ def valid_character_class_token?(token)
120
+ token && CHARACTER_CLASS_TOKEN_TYPES.include?(token.type)
121
+ end
122
+
123
+ def unary_operator?(token)
124
+ token && UNARY_OPERATORS.include?(token.type)
125
+ end
126
+
127
+ def binary_operator?(token)
128
+ token && BINARY_OPERATORS.include?(token.type)
129
+ end
130
+
131
+ def do_parse(options)
132
+ elements = []
133
+
134
+ while current_token
135
+ case current_token.type
136
+ when :open_bracket
137
+ elements << character_class
138
+ when :union
139
+ next_token(:union)
140
+ else
141
+ elements << send(current_token.type, current_token)
142
+ next_token(current_token.type)
143
+ end
144
+ end
145
+
146
+ elements
147
+ end
148
+
149
+ def character_set(token)
150
+ CharacterSet.new(
151
+ token.value.gsub(/^\\p/, "").gsub(/[\{\}\[\]:]/, "")
152
+ )
153
+ end
154
+
155
+ def negated_character_set(token)
156
+ CharacterSet.new(
157
+ token.value.gsub(/^\\[pP]/, "").gsub(/[\{\}\[\]:^]/, "")
158
+ )
159
+ end
160
+
161
+ def unicode_char(token)
162
+ UnicodeString.new(
163
+ [token.value.gsub(/^\\u/, "").gsub(/[\{\}]/, "").to_i(16)]
164
+ )
165
+ end
166
+
167
+ def string(token)
168
+ UnicodeString.new(
169
+ token.value.unpack("U*")
170
+ )
171
+ end
172
+
173
+ def multichar_string(token)
174
+ UnicodeString.new(
175
+ token.value.gsub(/[\{\}]/, "").unpack("U*")
176
+ )
177
+ end
178
+
179
+ def escaped_character(token)
180
+ Literal.new(token.value)
181
+ end
182
+
183
+ def special_char(token)
184
+ Literal.new(token.value)
185
+ end
186
+
187
+ alias :negate :special_char
188
+ alias :pipe :special_char
189
+ alias :ampersand :special_char
190
+
191
+ # current_token is already a CharacterRange object
192
+ def character_range(token)
193
+ token
194
+ end
195
+
196
+ def character_class
197
+ operator_stack = []
198
+ operand_stack = []
199
+ open_count = 0
200
+
201
+ loop do
202
+ case current_token.type
203
+ when *CharacterClass.closing_types
204
+ last_operator = peek(operator_stack)
205
+ open_count -= 1
206
+
207
+ until last_operator.type == CharacterClass.opening_type_for(current_token.type)
208
+ operator = operator_stack.pop
209
+
210
+ node = if unary_operator?(operator)
211
+ unary_operator_node(operator.type, operand_stack.pop)
212
+ else
213
+ binary_operator_node(
214
+ operator.type, operand_stack.pop, operand_stack.pop
215
+ )
216
+ end
217
+
218
+ operand_stack.push(node)
219
+ last_operator = peek(operator_stack)
220
+ end
221
+ operator_stack.pop
222
+
223
+ when *CharacterClass.opening_types
224
+ open_count += 1
225
+ operator_stack.push(current_token)
226
+
227
+ when *(BINARY_OPERATORS + UNARY_OPERATORS)
228
+ operator_stack.push(current_token)
229
+
230
+ else
231
+ operand_stack.push(
232
+ send(current_token.type, current_token)
233
+ )
234
+ end
235
+
236
+ next_token(current_token.type)
237
+ break if operator_stack.empty? && open_count == 0
238
+ end
239
+
240
+ CharacterClass.new(operand_stack.pop)
241
+ end
242
+
243
+ def peek(array)
244
+ array.last
245
+ end
246
+
247
+ def binary_operator_node(operator, right, left)
248
+ CharacterClass::BinaryOperator.new(
249
+ operator, left, right
250
+ )
251
+ end
252
+
253
+ def unary_operator_node(operator, child)
254
+ CharacterClass::UnaryOperator.new(
255
+ operator, child
256
+ )
257
+ end
258
+
259
+ end
260
+
261
+ end
262
+ end
@@ -5,6 +5,10 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Parsers
8
- autoload :NumberParser, 'twitter_cldr/parsers/number_parser'
8
+ autoload :Parser, 'twitter_cldr/parsers/parser'
9
+ autoload :SymbolTable, 'twitter_cldr/parsers/symbol_table'
10
+ autoload :UnicodeRegexParser, 'twitter_cldr/parsers/unicode_regex_parser'
11
+ autoload :NumberParser, 'twitter_cldr/parsers/number_parser'
12
+ autoload :SegmentationParser, 'twitter_cldr/parsers/segmentation_parser'
9
13
  end
10
14
  end
@@ -0,0 +1,64 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Shared
8
+ class Casefolder
9
+ class << self
10
+
11
+ CASEFOLDING_REGEX_C = /<%= casefolding_char_class_for("C") %>/
12
+ CASEFOLDING_REGEX_F = /<%= casefolding_char_class_for("F") %>/
13
+ CASEFOLDING_REGEX_S = /<%= casefolding_char_class_for("S") %>/
14
+ CASEFOLDING_REGEX_T = /<%= casefolding_char_class_for("T") %>/
15
+
16
+ CASEFOLDING_HASH = <%= inspect_hash_in_lines(casefolding_hash_for(["C", "F", "S"]), 8, 5) %>
17
+ CASEFOLDING_HASH_T = CASEFOLDING_HASH.merge(
18
+ <%= inspect_hash_in_lines(casefolding_hash_for(["T"]), 9, 6) %>
19
+ )
20
+
21
+ def simple_casefold(str, t = false)
22
+ perform_casefold(str, simple_casefold_regex, t)
23
+ end
24
+
25
+ def full_casefold(str, t = false)
26
+ perform_casefold(str, full_casefold_regex, t)
27
+ end
28
+
29
+ alias :casefold :full_casefold
30
+
31
+ def common_casefold(str)
32
+ perform_casefold(str, CASEFOLDING_REGEX_C, false)
33
+ end
34
+
35
+ private
36
+
37
+ def perform_casefold(str, regex, t)
38
+ regex = regex_with_t(regex) if t
39
+ casefolding_hash = t ? CASEFOLDING_HASH_T : CASEFOLDING_HASH
40
+
41
+ str.gsub(regex) do |s|
42
+ s.unpack("U*").inject([]) do |ret, ss|
43
+ ret + casefolding_hash[ss]
44
+ end.pack("U*")
45
+ end
46
+ end
47
+
48
+ def simple_casefold_regex
49
+ @simple_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_S)
50
+ end
51
+
52
+ def full_casefold_regex
53
+ @full_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_F)
54
+ end
55
+
56
+ def regex_with_t(regex)
57
+ @regex_with_t_cache[regex.source] ||=
58
+ Regexp.union(regex, CASEFOLDING_REGEX_T)
59
+ end
60
+
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,75 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'erb'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+ class CasefolderClassGenerator
11
+
12
+ attr_reader :template_file, :output_dir
13
+
14
+ def initialize(template_file, output_dir)
15
+ @template_file = template_file
16
+ @output_dir = output_dir
17
+ end
18
+
19
+ def generate
20
+ output_file = File.basename(template_file).chomp(".erb")
21
+ File.open(File.join(output_dir, output_file), "w+") do |f|
22
+ f.write(
23
+ ERB.new(File.read(template_file)).result(binding)
24
+ )
25
+ end
26
+ end
27
+
28
+ def casefolding_char_class_for(status)
29
+ to_regex_char_sequence(casefolding_data_for(status))
30
+ end
31
+
32
+ def casefolding_hash_for(statuses)
33
+ statuses.inject({}) do |ret, status|
34
+ ret.merge!(casefolding_data_for(status))
35
+ ret
36
+ end
37
+ end
38
+
39
+ def inspect_hash_in_lines(hash, per_line, indent)
40
+ str = "{\n#{" " * indent}"
41
+ hash.each_with_index do |(key, val), idx|
42
+ if idx > 0 && idx % per_line == 0
43
+ str << "\n#{" " * indent}"
44
+ end
45
+ str << "#{key.inspect}=>#{val.inspect}"
46
+ str << ", " if idx != (hash.size - 1)
47
+ end
48
+ str << "\n#{" " * (indent - 1)}}"
49
+ end
50
+
51
+ private
52
+
53
+ def to_regex_char_sequence(casefold_data)
54
+ casefold_data.map { |(source, _)| to_utf8(source) }.join("|")
55
+ end
56
+
57
+ def to_utf8(obj)
58
+ arr = obj.is_a?(Array) ? obj : [obj]
59
+ arr.pack("U*").bytes.to_a.map { |s| "\\" + s.to_s(8) }.join
60
+ end
61
+
62
+ def casefolding_data_for(status)
63
+ resource.inject({}) do |ret, data|
64
+ ret[data[:source]] = data[:target] if data[:status] == status
65
+ ret
66
+ end
67
+ end
68
+
69
+ def resource
70
+ @@resource ||= TwitterCldr.get_resource("unicode_data", "casefolding")
71
+ end
72
+
73
+ end
74
+ end
75
+ end
@@ -3,13 +3,11 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'cldr/download'
7
-
8
6
  module TwitterCldr
9
7
  module Resources
10
8
 
11
- CLDR_URL = 'http://unicode.org/Public/cldr/23.1/core.zip'
12
- ICU4J_URL = 'http://download.icu-project.org/files/icu4j/51.2/icu4j-51_2.jar'
9
+ CLDR_URL = 'http://unicode.org/Public/cldr/24/core.zip'
10
+ ICU4J_URL = 'http://download.icu-project.org/files/icu4j/52.1/icu4j-52_1.jar'
13
11
 
14
12
  class << self
15
13
 
@@ -29,6 +27,14 @@ module TwitterCldr
29
27
  if File.directory?(path)
30
28
  puts "Using CLDR data from '#{path}'."
31
29
  else
30
+ begin
31
+ require 'zip'
32
+ rescue LoadError
33
+ raise StandardError.new("Unable to require 'zip'. Please switch to at least Ruby 1.9, then rebundle and try again.")
34
+ end
35
+
36
+ require 'cldr/download'
37
+
32
38
  puts "Downloading CLDR data from '#{url}' to '#{path}'."
33
39
  Cldr.download(url, path)
34
40
  end
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'java'
7
+ require 'twitter_cldr/resources/download'
8
+
9
+ class IcuBasedImporter
10
+
11
+ protected
12
+
13
+ def require_icu4j(icu4j_path)
14
+ TwitterCldr::Resources.download_icu4j_if_necessary(icu4j_path)
15
+ require icu4j_path
16
+ end
17
+
18
+ end
@@ -12,7 +12,9 @@ module TwitterCldr
12
12
 
13
13
  class LocalesResourcesImporter
14
14
 
15
- COMPONENTS = %w[calendars languages numbers units plurals lists layout currencies territories]
15
+ # NOTE: units.yml was NOT updated to cldr 24 (too many significant changes) - add back in when appropriate
16
+ LOCALE_COMPONENTS = %w[calendars languages numbers plurals lists layout currencies territories rbnf] # units
17
+ SHARED_COMPONENTS = %w[currency_digits_and_rounding rbnf_root numbering_systems segments_root]
16
18
 
17
19
  # Arguments:
18
20
  #
@@ -48,7 +50,7 @@ module TwitterCldr
48
50
  def import_components
49
51
  export_args = {
50
52
  :locales => TwitterCldr.supported_locales,
51
- :components => COMPONENTS,
53
+ :components => LOCALE_COMPONENTS,
52
54
  :target => File.join(@output_path, 'locales'),
53
55
  :merge => true # fill in the gaps, eg fill in sub-locales like en_GB with en
54
56
  }
@@ -61,7 +63,7 @@ module TwitterCldr
61
63
  end
62
64
 
63
65
  export_args = {
64
- :components => ["currency_digits_and_rounding"],
66
+ :components => SHARED_COMPONENTS,
65
67
  :target => File.join(@output_path, 'shared'),
66
68
  :merge => true
67
69
  }
@@ -78,7 +80,16 @@ module TwitterCldr
78
80
  data = YAML.load(File.read(path))
79
81
 
80
82
  File.open(path, 'w:utf-8') do |output|
81
- output.write(YAML.dump(TwitterCldr::Utils.deep_symbolize_keys(data)))
83
+ output.write(
84
+ # Quote all strings for compat with 1.8. This is important because
85
+ # RBNF syntax includes characters that are significant in the Yaml
86
+ # syntax, like >, <, etc. Psych doesn't have problems parsing them,
87
+ # but Syck does (ruby 1.8).
88
+ TwitterCldr::Utils::YAML.dump(TwitterCldr::Utils.deep_symbolize_keys(data), {
89
+ :quote_all_strings => true,
90
+ :use_natural_symbols => true
91
+ })
92
+ )
82
93
  end
83
94
  end
84
95
 
@@ -93,7 +104,7 @@ module TwitterCldr
93
104
 
94
105
  data = YAML.load(File.read(path))
95
106
  data.keys.each do |l|
96
- data[l] = TwitterCldr::Utils::Territories.deep_normalize_territory_code_keys(data[l])
107
+ data[l] = TwitterCldr::Shared::Territories.deep_normalize_territory_code_keys(data[l])
97
108
  end
98
109
 
99
110
  File.open(path, 'w:utf-8') do |output|
@@ -125,15 +136,15 @@ module TwitterCldr
125
136
  end
126
137
 
127
138
  BUDDHIST_CALENDAR = {
128
- 'formats' => {
129
- 'date' => {
130
- 'default' => :'calendars.buddhist.formats.date.medium',
131
- 'full' => { 'pattern' => 'EEEEที่ d MMMM G y' },
132
- 'long' => { 'pattern' => 'd MMMM พ.ศ. #{y + 543}' },
133
- 'medium' => { 'pattern' => 'd MMM #{y + 543}' },
134
- 'short' => { 'pattern' => 'd/M/#{y + 543}' }
135
- }
139
+ 'formats' => {
140
+ 'date' => {
141
+ 'default' => :'calendars.buddhist.formats.date.medium',
142
+ 'full' => { 'pattern' => 'EEEEที่ d MMMM G y' },
143
+ 'long' => { 'pattern' => 'd MMMM พ.ศ. #{y + 543}' },
144
+ 'medium' => { 'pattern' => 'd MMM #{y + 543}' },
145
+ 'short' => { 'pattern' => 'd/M/#{y + 543}' }
136
146
  }
147
+ }
137
148
  }
138
149
 
139
150
  end
@@ -25,26 +25,13 @@ module TwitterCldr
25
25
  def import
26
26
  parse_props_file.each_pair do |algorithm, code_point_list|
27
27
  File.open(File.join(@output_path, "#{algorithm.downcase}_quick_check.yml"), "w+") do |f|
28
- f.write(YAML.dump(rangify(partition_prop_list(code_point_list))))
28
+ f.write(YAML.dump(TwitterCldr::Utils::RangeSet.rangify(code_point_list)))
29
29
  end
30
30
  end
31
31
  end
32
32
 
33
33
  private
34
34
 
35
- def rangify(lists)
36
- lists.map { |list| (list.first..list.last) }
37
- end
38
-
39
- def partition_prop_list(list)
40
- last_item = 0
41
- list.inject([]) do |ret, item|
42
- (item - last_item == 1) ? ret[-1] << item : ret << [item]
43
- last_item = item
44
- ret
45
- end
46
- end
47
-
48
35
  def parse_props_file
49
36
  check_table = {}
50
37
  cur_type = nil