twitter_cldr 3.0.0.beta1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (698) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +16 -2
  3. data/History.txt +9 -1
  4. data/README.md +297 -113
  5. data/Rakefile +97 -17
  6. data/lib/twitter_cldr/collation/collator.rb +12 -3
  7. data/lib/twitter_cldr/collation/trie_builder.rb +1 -1
  8. data/lib/twitter_cldr/{tokenizers/calendars → data_readers}/additional_date_format_selector.rb +6 -4
  9. data/lib/twitter_cldr/data_readers/calendar_data_reader.rb +91 -0
  10. data/lib/twitter_cldr/data_readers/data_reader.rb +32 -0
  11. data/lib/twitter_cldr/data_readers/date_data_reader.rb +26 -0
  12. data/lib/twitter_cldr/data_readers/date_time_data_reader.rb +41 -0
  13. data/lib/twitter_cldr/data_readers/number_data_reader.rb +142 -0
  14. data/lib/twitter_cldr/data_readers/time_data_reader.rb +26 -0
  15. data/lib/twitter_cldr/data_readers/timespan_data_reader.rb +122 -0
  16. data/lib/twitter_cldr/data_readers.rb +17 -0
  17. data/lib/twitter_cldr/formatters/calendars/{datetime_formatter.rb → date_time_formatter.rb} +27 -42
  18. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +3 -64
  19. data/lib/twitter_cldr/formatters/formatter.rb +39 -0
  20. data/lib/twitter_cldr/formatters/list_formatter.rb +22 -12
  21. data/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb +5 -26
  22. data/lib/twitter_cldr/formatters/numbers/currency_formatter.rb +2 -11
  23. data/lib/twitter_cldr/formatters/numbers/decimal_formatter.rb +4 -2
  24. data/lib/twitter_cldr/formatters/numbers/number_formatter.rb +45 -27
  25. data/lib/twitter_cldr/formatters/numbers/percent_formatter.rb +3 -13
  26. data/lib/twitter_cldr/formatters/numbers/rbnf/formatters.rb +224 -0
  27. data/lib/twitter_cldr/formatters/numbers/rbnf/post_processors/chinese.rb +122 -0
  28. data/lib/twitter_cldr/formatters/numbers/rbnf/rule.rb +93 -0
  29. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_group.rb +20 -0
  30. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_parser.rb +86 -0
  31. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_set.rb +259 -0
  32. data/lib/twitter_cldr/formatters/numbers/rbnf/substitution.rb +30 -0
  33. data/lib/twitter_cldr/formatters/numbers/rbnf.rb +127 -0
  34. data/lib/twitter_cldr/formatters/plurals/plural_formatter.rb +18 -6
  35. data/lib/twitter_cldr/formatters.rb +4 -5
  36. data/lib/twitter_cldr/localized/localized_array.rb +1 -1
  37. data/lib/twitter_cldr/localized/localized_date.rb +6 -3
  38. data/lib/twitter_cldr/localized/localized_datetime.rb +38 -15
  39. data/lib/twitter_cldr/localized/localized_number.rb +40 -24
  40. data/lib/twitter_cldr/localized/localized_object.rb +4 -4
  41. data/lib/twitter_cldr/localized/localized_string.rb +40 -7
  42. data/lib/twitter_cldr/localized/localized_time.rb +9 -2
  43. data/lib/twitter_cldr/localized/localized_timespan.rb +50 -5
  44. data/lib/twitter_cldr/normalization.rb +8 -19
  45. data/lib/twitter_cldr/parsers/parser.rb +50 -0
  46. data/lib/twitter_cldr/parsers/segmentation_parser.rb +137 -0
  47. data/lib/twitter_cldr/parsers/symbol_table.rb +30 -0
  48. data/lib/twitter_cldr/parsers/unicode_regex/character_class.rb +91 -0
  49. data/lib/twitter_cldr/parsers/unicode_regex/character_range.rb +39 -0
  50. data/lib/twitter_cldr/parsers/unicode_regex/character_set.rb +65 -0
  51. data/lib/twitter_cldr/parsers/unicode_regex/component.rb +50 -0
  52. data/lib/twitter_cldr/parsers/unicode_regex/literal.rb +83 -0
  53. data/lib/twitter_cldr/parsers/unicode_regex/unicode_string.rb +41 -0
  54. data/lib/twitter_cldr/parsers/unicode_regex_parser.rb +262 -0
  55. data/lib/twitter_cldr/parsers.rb +5 -1
  56. data/lib/twitter_cldr/resources/casefolder.rb.erb +64 -0
  57. data/lib/twitter_cldr/resources/casefolder_class_generator.rb +75 -0
  58. data/lib/twitter_cldr/resources/download.rb +10 -4
  59. data/lib/twitter_cldr/resources/icu_based_importer.rb +18 -0
  60. data/lib/twitter_cldr/resources/locales_resources_importer.rb +24 -13
  61. data/lib/twitter_cldr/resources/normalization_quick_check_importer.rb +1 -14
  62. data/lib/twitter_cldr/resources/rbnf_test_importer.rb +107 -0
  63. data/lib/twitter_cldr/resources/readme_renderer.rb +115 -0
  64. data/lib/twitter_cldr/resources/tailoring_importer.rb +2 -8
  65. data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +62 -0
  66. data/lib/twitter_cldr/resources/uli.rb +12 -0
  67. data/lib/twitter_cldr/resources/unicode_data_importer.rb +84 -14
  68. data/lib/twitter_cldr/resources/unicode_importer.rb +37 -0
  69. data/lib/twitter_cldr/resources/unicode_properties_importer.rb +79 -0
  70. data/lib/twitter_cldr/resources.rb +8 -1
  71. data/lib/twitter_cldr/shared/break_iterator.rb +213 -0
  72. data/lib/twitter_cldr/shared/calendar.rb +38 -14
  73. data/lib/twitter_cldr/shared/casefolder.rb +210 -0
  74. data/lib/twitter_cldr/shared/code_point.rb +103 -16
  75. data/lib/twitter_cldr/shared/numbering_system.rb +58 -0
  76. data/lib/twitter_cldr/shared/territories.rb +43 -1
  77. data/lib/twitter_cldr/shared/unicode_regex.rb +81 -0
  78. data/lib/twitter_cldr/shared.rb +13 -9
  79. data/lib/twitter_cldr/tokenizers/calendars/date_time_tokenizer.rb +77 -0
  80. data/lib/twitter_cldr/tokenizers/calendars/date_tokenizer.rb +14 -29
  81. data/lib/twitter_cldr/tokenizers/calendars/time_tokenizer.rb +13 -28
  82. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +11 -87
  83. data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +16 -71
  84. data/lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb +53 -0
  85. data/lib/twitter_cldr/tokenizers/pattern_tokenizer.rb +42 -0
  86. data/lib/twitter_cldr/tokenizers/segmentation/segmentation_tokenizer.rb +39 -0
  87. data/lib/twitter_cldr/tokenizers/tokenizer.rb +116 -0
  88. data/lib/twitter_cldr/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +52 -0
  89. data/lib/twitter_cldr/tokenizers.rb +8 -4
  90. data/lib/twitter_cldr/utils/code_points.rb +1 -1
  91. data/lib/twitter_cldr/utils/range_set.rb +242 -0
  92. data/lib/twitter_cldr/utils/yaml.rb +17 -12
  93. data/lib/twitter_cldr/utils.rb +1 -1
  94. data/lib/twitter_cldr/version.rb +1 -1
  95. data/lib/twitter_cldr.rb +2 -1
  96. data/resources/custom/locales/sv/units.yml +8 -0
  97. data/resources/locales/af/calendars.yml +278 -224
  98. data/resources/locales/af/currencies.yml +954 -916
  99. data/resources/locales/af/languages.yml +583 -580
  100. data/resources/locales/af/layout.yml +5 -5
  101. data/resources/locales/af/lists.yml +23 -7
  102. data/resources/locales/af/numbers.yml +59 -54
  103. data/resources/locales/af/plurals.yml +2 -2
  104. data/resources/locales/af/rbnf.yml +261 -0
  105. data/resources/locales/af/territories.yml +264 -263
  106. data/resources/locales/ar/calendars.yml +287 -259
  107. data/resources/locales/ar/currencies.yml +1730 -1692
  108. data/resources/locales/ar/languages.yml +583 -580
  109. data/resources/locales/ar/layout.yml +5 -5
  110. data/resources/locales/ar/lists.yml +23 -7
  111. data/resources/locales/ar/numbers.yml +66 -61
  112. data/resources/locales/ar/plurals.yml +5 -8
  113. data/resources/locales/ar/rbnf.yml +519 -0
  114. data/resources/locales/ar/territories.yml +264 -263
  115. data/resources/locales/be/calendars.yml +238 -237
  116. data/resources/locales/be/currencies.yml +954 -917
  117. data/resources/locales/be/languages.yml +583 -580
  118. data/resources/locales/be/layout.yml +5 -5
  119. data/resources/locales/be/lists.yml +23 -7
  120. data/resources/locales/be/numbers.yml +62 -57
  121. data/resources/locales/be/plurals.yml +7 -4
  122. data/resources/locales/be/rbnf.yml +1288 -0
  123. data/resources/locales/be/territories.yml +264 -263
  124. data/resources/locales/bg/calendars.yml +278 -218
  125. data/resources/locales/bg/currencies.yml +955 -917
  126. data/resources/locales/bg/languages.yml +583 -580
  127. data/resources/locales/bg/layout.yml +5 -5
  128. data/resources/locales/bg/lists.yml +23 -7
  129. data/resources/locales/bg/numbers.yml +62 -57
  130. data/resources/locales/bg/plurals.yml +2 -2
  131. data/resources/locales/bg/rbnf.yml +280 -0
  132. data/resources/locales/bg/territories.yml +264 -263
  133. data/resources/locales/bn/calendars.yml +287 -225
  134. data/resources/locales/bn/currencies.yml +953 -916
  135. data/resources/locales/bn/languages.yml +583 -580
  136. data/resources/locales/bn/layout.yml +5 -5
  137. data/resources/locales/bn/lists.yml +23 -7
  138. data/resources/locales/bn/numbers.yml +62 -57
  139. data/resources/locales/bn/plurals.yml +2 -2
  140. data/resources/locales/bn/rbnf.yml +4 -0
  141. data/resources/locales/bn/territories.yml +264 -263
  142. data/resources/locales/ca/calendars.yml +278 -278
  143. data/resources/locales/ca/currencies.yml +953 -916
  144. data/resources/locales/ca/languages.yml +583 -580
  145. data/resources/locales/ca/layout.yml +5 -5
  146. data/resources/locales/ca/lists.yml +23 -7
  147. data/resources/locales/ca/numbers.yml +62 -57
  148. data/resources/locales/ca/plurals.yml +3 -2
  149. data/resources/locales/ca/rbnf.yml +756 -0
  150. data/resources/locales/ca/territories.yml +264 -263
  151. data/resources/locales/cs/calendars.yml +269 -262
  152. data/resources/locales/cs/currencies.yml +1483 -1172
  153. data/resources/locales/cs/languages.yml +583 -580
  154. data/resources/locales/cs/layout.yml +5 -5
  155. data/resources/locales/cs/lists.yml +23 -7
  156. data/resources/locales/cs/numbers.yml +64 -58
  157. data/resources/locales/cs/plurals.yml +6 -2
  158. data/resources/locales/cs/rbnf.yml +367 -0
  159. data/resources/locales/cs/territories.yml +264 -263
  160. data/resources/locales/cy/calendars.yml +275 -274
  161. data/resources/locales/cy/currencies.yml +1585 -1548
  162. data/resources/locales/cy/languages.yml +583 -580
  163. data/resources/locales/cy/layout.yml +5 -5
  164. data/resources/locales/cy/lists.yml +23 -7
  165. data/resources/locales/cy/numbers.yml +66 -61
  166. data/resources/locales/cy/plurals.yml +4 -3
  167. data/resources/locales/cy/rbnf.yml +298 -0
  168. data/resources/locales/cy/territories.yml +264 -263
  169. data/resources/locales/da/calendars.yml +281 -280
  170. data/resources/locales/da/currencies.yml +954 -916
  171. data/resources/locales/da/languages.yml +583 -580
  172. data/resources/locales/da/layout.yml +5 -5
  173. data/resources/locales/da/lists.yml +23 -7
  174. data/resources/locales/da/numbers.yml +62 -57
  175. data/resources/locales/da/plurals.yml +3 -2
  176. data/resources/locales/da/rbnf.yml +194 -0
  177. data/resources/locales/da/territories.yml +264 -263
  178. data/resources/locales/de/calendars.yml +294 -293
  179. data/resources/locales/de/currencies.yml +954 -916
  180. data/resources/locales/de/languages.yml +583 -580
  181. data/resources/locales/de/layout.yml +5 -5
  182. data/resources/locales/de/lists.yml +23 -7
  183. data/resources/locales/de/numbers.yml +62 -57
  184. data/resources/locales/de/plurals.yml +3 -2
  185. data/resources/locales/de/rbnf.yml +346 -0
  186. data/resources/locales/de/territories.yml +264 -263
  187. data/resources/locales/el/calendars.yml +279 -282
  188. data/resources/locales/el/currencies.yml +954 -916
  189. data/resources/locales/el/languages.yml +583 -580
  190. data/resources/locales/el/layout.yml +5 -5
  191. data/resources/locales/el/lists.yml +23 -7
  192. data/resources/locales/el/numbers.yml +62 -57
  193. data/resources/locales/el/plurals.yml +2 -2
  194. data/resources/locales/el/rbnf.yml +880 -0
  195. data/resources/locales/el/territories.yml +264 -263
  196. data/resources/locales/en/calendars.yml +192 -191
  197. data/resources/locales/en/currencies.yml +953 -915
  198. data/resources/locales/en/languages.yml +583 -580
  199. data/resources/locales/en/layout.yml +5 -5
  200. data/resources/locales/en/lists.yml +23 -7
  201. data/resources/locales/en/numbers.yml +62 -57
  202. data/resources/locales/en/plurals.yml +3 -2
  203. data/resources/locales/en/rbnf.yml +542 -0
  204. data/resources/locales/en/territories.yml +264 -263
  205. data/resources/locales/en-GB/calendars.yml +195 -194
  206. data/resources/locales/en-GB/currencies.yml +953 -915
  207. data/resources/locales/en-GB/languages.yml +583 -580
  208. data/resources/locales/en-GB/layout.yml +5 -5
  209. data/resources/locales/en-GB/lists.yml +23 -7
  210. data/resources/locales/en-GB/numbers.yml +62 -57
  211. data/resources/locales/en-GB/plurals.yml +2 -1
  212. data/resources/locales/en-GB/rbnf.yml +4 -0
  213. data/resources/locales/en-GB/territories.yml +264 -263
  214. data/resources/locales/es/calendars.yml +288 -238
  215. data/resources/locales/es/currencies.yml +953 -922
  216. data/resources/locales/es/languages.yml +583 -580
  217. data/resources/locales/es/layout.yml +5 -5
  218. data/resources/locales/es/lists.yml +23 -7
  219. data/resources/locales/es/numbers.yml +62 -57
  220. data/resources/locales/es/plurals.yml +2 -2
  221. data/resources/locales/es/rbnf.yml +913 -0
  222. data/resources/locales/es/territories.yml +264 -263
  223. data/resources/locales/eu/calendars.yml +277 -218
  224. data/resources/locales/eu/currencies.yml +953 -916
  225. data/resources/locales/eu/languages.yml +583 -580
  226. data/resources/locales/eu/layout.yml +5 -5
  227. data/resources/locales/eu/lists.yml +23 -7
  228. data/resources/locales/eu/numbers.yml +56 -51
  229. data/resources/locales/eu/plurals.yml +2 -2
  230. data/resources/locales/eu/rbnf.yml +4 -0
  231. data/resources/locales/eu/territories.yml +264 -263
  232. data/resources/locales/fa/calendars.yml +294 -293
  233. data/resources/locales/fa/currencies.yml +955 -916
  234. data/resources/locales/fa/languages.yml +583 -580
  235. data/resources/locales/fa/layout.yml +5 -5
  236. data/resources/locales/fa/lists.yml +23 -7
  237. data/resources/locales/fa/numbers.yml +62 -57
  238. data/resources/locales/fa/plurals.yml +2 -2
  239. data/resources/locales/fa/rbnf.yml +157 -0
  240. data/resources/locales/fa/territories.yml +264 -263
  241. data/resources/locales/fi/calendars.yml +284 -283
  242. data/resources/locales/fi/currencies.yml +953 -915
  243. data/resources/locales/fi/languages.yml +583 -580
  244. data/resources/locales/fi/layout.yml +5 -5
  245. data/resources/locales/fi/lists.yml +23 -7
  246. data/resources/locales/fi/numbers.yml +62 -57
  247. data/resources/locales/fi/plurals.yml +3 -2
  248. data/resources/locales/fi/rbnf.yml +206 -0
  249. data/resources/locales/fi/territories.yml +264 -263
  250. data/resources/locales/fil/calendars.yml +281 -230
  251. data/resources/locales/fil/currencies.yml +953 -916
  252. data/resources/locales/fil/languages.yml +583 -580
  253. data/resources/locales/fil/layout.yml +5 -5
  254. data/resources/locales/fil/lists.yml +23 -7
  255. data/resources/locales/fil/numbers.yml +62 -57
  256. data/resources/locales/fil/plurals.yml +3 -2
  257. data/resources/locales/fil/rbnf.yml +158 -0
  258. data/resources/locales/fil/territories.yml +264 -263
  259. data/resources/locales/fr/calendars.yml +297 -296
  260. data/resources/locales/fr/currencies.yml +968 -949
  261. data/resources/locales/fr/languages.yml +583 -580
  262. data/resources/locales/fr/layout.yml +5 -5
  263. data/resources/locales/fr/lists.yml +23 -7
  264. data/resources/locales/fr/numbers.yml +62 -57
  265. data/resources/locales/fr/plurals.yml +2 -2
  266. data/resources/locales/fr/rbnf.yml +621 -0
  267. data/resources/locales/fr/territories.yml +264 -263
  268. data/resources/locales/ga/calendars.yml +192 -191
  269. data/resources/locales/ga/currencies.yml +954 -916
  270. data/resources/locales/ga/languages.yml +583 -580
  271. data/resources/locales/ga/layout.yml +5 -5
  272. data/resources/locales/ga/lists.yml +23 -7
  273. data/resources/locales/ga/numbers.yml +62 -57
  274. data/resources/locales/ga/plurals.yml +4 -3
  275. data/resources/locales/ga/rbnf.yml +615 -0
  276. data/resources/locales/ga/territories.yml +264 -263
  277. data/resources/locales/gl/calendars.yml +283 -217
  278. data/resources/locales/gl/currencies.yml +953 -916
  279. data/resources/locales/gl/languages.yml +583 -580
  280. data/resources/locales/gl/layout.yml +5 -5
  281. data/resources/locales/gl/lists.yml +23 -7
  282. data/resources/locales/gl/numbers.yml +62 -57
  283. data/resources/locales/gl/plurals.yml +3 -2
  284. data/resources/locales/gl/rbnf.yml +4 -0
  285. data/resources/locales/gl/territories.yml +264 -263
  286. data/resources/locales/he/calendars.yml +248 -220
  287. data/resources/locales/he/currencies.yml +992 -932
  288. data/resources/locales/he/languages.yml +583 -580
  289. data/resources/locales/he/layout.yml +5 -5
  290. data/resources/locales/he/lists.yml +23 -7
  291. data/resources/locales/he/numbers.yml +64 -59
  292. data/resources/locales/he/plurals.yml +6 -3
  293. data/resources/locales/he/rbnf.yml +1029 -0
  294. data/resources/locales/he/territories.yml +264 -263
  295. data/resources/locales/hi/calendars.yml +284 -216
  296. data/resources/locales/hi/currencies.yml +953 -915
  297. data/resources/locales/hi/languages.yml +583 -580
  298. data/resources/locales/hi/layout.yml +5 -5
  299. data/resources/locales/hi/lists.yml +23 -7
  300. data/resources/locales/hi/numbers.yml +60 -55
  301. data/resources/locales/hi/plurals.yml +2 -2
  302. data/resources/locales/hi/rbnf.yml +430 -0
  303. data/resources/locales/hi/territories.yml +264 -263
  304. data/resources/locales/hr/calendars.yml +308 -307
  305. data/resources/locales/hr/currencies.yml +1248 -1504
  306. data/resources/locales/hr/languages.yml +583 -580
  307. data/resources/locales/hr/layout.yml +5 -5
  308. data/resources/locales/hr/lists.yml +23 -7
  309. data/resources/locales/hr/numbers.yml +63 -59
  310. data/resources/locales/hr/plurals.yml +12 -4
  311. data/resources/locales/hr/rbnf.yml +599 -0
  312. data/resources/locales/hr/territories.yml +264 -263
  313. data/resources/locales/hu/calendars.yml +285 -284
  314. data/resources/locales/hu/currencies.yml +954 -916
  315. data/resources/locales/hu/languages.yml +583 -580
  316. data/resources/locales/hu/layout.yml +5 -5
  317. data/resources/locales/hu/lists.yml +23 -7
  318. data/resources/locales/hu/numbers.yml +62 -57
  319. data/resources/locales/hu/plurals.yml +2 -2
  320. data/resources/locales/hu/rbnf.yml +363 -0
  321. data/resources/locales/hu/territories.yml +264 -263
  322. data/resources/locales/id/calendars.yml +276 -275
  323. data/resources/locales/id/currencies.yml +954 -916
  324. data/resources/locales/id/languages.yml +583 -580
  325. data/resources/locales/id/layout.yml +5 -5
  326. data/resources/locales/id/lists.yml +23 -7
  327. data/resources/locales/id/numbers.yml +61 -56
  328. data/resources/locales/id/plurals.yml +2 -2
  329. data/resources/locales/id/rbnf.yml +121 -0
  330. data/resources/locales/id/territories.yml +264 -263
  331. data/resources/locales/is/calendars.yml +281 -242
  332. data/resources/locales/is/currencies.yml +954 -916
  333. data/resources/locales/is/languages.yml +583 -580
  334. data/resources/locales/is/layout.yml +5 -5
  335. data/resources/locales/is/lists.yml +23 -7
  336. data/resources/locales/is/numbers.yml +62 -57
  337. data/resources/locales/is/plurals.yml +5 -2
  338. data/resources/locales/is/rbnf.yml +326 -0
  339. data/resources/locales/is/territories.yml +264 -263
  340. data/resources/locales/it/calendars.yml +275 -260
  341. data/resources/locales/it/currencies.yml +953 -920
  342. data/resources/locales/it/languages.yml +583 -580
  343. data/resources/locales/it/layout.yml +5 -5
  344. data/resources/locales/it/lists.yml +23 -7
  345. data/resources/locales/it/numbers.yml +59 -54
  346. data/resources/locales/it/plurals.yml +3 -2
  347. data/resources/locales/it/rbnf.yml +1189 -0
  348. data/resources/locales/it/territories.yml +264 -263
  349. data/resources/locales/ja/calendars.yml +269 -207
  350. data/resources/locales/ja/currencies.yml +953 -915
  351. data/resources/locales/ja/languages.yml +583 -580
  352. data/resources/locales/ja/layout.yml +5 -5
  353. data/resources/locales/ja/lists.yml +23 -7
  354. data/resources/locales/ja/numbers.yml +62 -57
  355. data/resources/locales/ja/plurals.yml +2 -2
  356. data/resources/locales/ja/rbnf.yml +209 -0
  357. data/resources/locales/ja/territories.yml +264 -263
  358. data/resources/locales/ko/calendars.yml +246 -213
  359. data/resources/locales/ko/currencies.yml +953 -915
  360. data/resources/locales/ko/languages.yml +583 -580
  361. data/resources/locales/ko/layout.yml +5 -5
  362. data/resources/locales/ko/lists.yml +23 -7
  363. data/resources/locales/ko/numbers.yml +60 -55
  364. data/resources/locales/ko/plurals.yml +2 -2
  365. data/resources/locales/ko/rbnf.yml +722 -0
  366. data/resources/locales/ko/territories.yml +264 -263
  367. data/resources/locales/lv/calendars.yml +286 -285
  368. data/resources/locales/lv/currencies.yml +1122 -1084
  369. data/resources/locales/lv/languages.yml +583 -580
  370. data/resources/locales/lv/layout.yml +5 -5
  371. data/resources/locales/lv/lists.yml +23 -7
  372. data/resources/locales/lv/numbers.yml +63 -58
  373. data/resources/locales/lv/plurals.yml +11 -3
  374. data/resources/locales/lv/rbnf.yml +238 -0
  375. data/resources/locales/lv/territories.yml +264 -263
  376. data/resources/locales/ms/calendars.yml +280 -279
  377. data/resources/locales/ms/currencies.yml +954 -916
  378. data/resources/locales/ms/languages.yml +583 -580
  379. data/resources/locales/ms/layout.yml +5 -5
  380. data/resources/locales/ms/lists.yml +23 -7
  381. data/resources/locales/ms/numbers.yml +62 -57
  382. data/resources/locales/ms/plurals.yml +2 -2
  383. data/resources/locales/ms/rbnf.yml +130 -0
  384. data/resources/locales/ms/territories.yml +264 -263
  385. data/resources/locales/nb/calendars.yml +284 -283
  386. data/resources/locales/nb/currencies.yml +958 -916
  387. data/resources/locales/nb/languages.yml +583 -580
  388. data/resources/locales/nb/layout.yml +5 -5
  389. data/resources/locales/nb/lists.yml +23 -7
  390. data/resources/locales/nb/numbers.yml +62 -57
  391. data/resources/locales/nb/plurals.yml +2 -2
  392. data/resources/locales/nb/rbnf.yml +191 -0
  393. data/resources/locales/nb/territories.yml +264 -263
  394. data/resources/locales/nl/calendars.yml +285 -284
  395. data/resources/locales/nl/currencies.yml +953 -917
  396. data/resources/locales/nl/languages.yml +583 -580
  397. data/resources/locales/nl/layout.yml +5 -5
  398. data/resources/locales/nl/lists.yml +23 -7
  399. data/resources/locales/nl/numbers.yml +62 -57
  400. data/resources/locales/nl/plurals.yml +3 -2
  401. data/resources/locales/nl/rbnf.yml +320 -0
  402. data/resources/locales/nl/territories.yml +264 -263
  403. data/resources/locales/pl/calendars.yml +288 -287
  404. data/resources/locales/pl/currencies.yml +1326 -1284
  405. data/resources/locales/pl/languages.yml +583 -580
  406. data/resources/locales/pl/layout.yml +5 -5
  407. data/resources/locales/pl/lists.yml +23 -7
  408. data/resources/locales/pl/numbers.yml +64 -59
  409. data/resources/locales/pl/plurals.yml +11 -4
  410. data/resources/locales/pl/rbnf.yml +410 -0
  411. data/resources/locales/pl/territories.yml +264 -263
  412. data/resources/locales/pt/calendars.yml +290 -289
  413. data/resources/locales/pt/currencies.yml +954 -916
  414. data/resources/locales/pt/languages.yml +583 -580
  415. data/resources/locales/pt/layout.yml +5 -5
  416. data/resources/locales/pt/lists.yml +23 -7
  417. data/resources/locales/pt/numbers.yml +62 -57
  418. data/resources/locales/pt/plurals.yml +4 -2
  419. data/resources/locales/pt/rbnf.yml +586 -0
  420. data/resources/locales/pt/territories.yml +264 -263
  421. data/resources/locales/ro/calendars.yml +284 -283
  422. data/resources/locales/ro/currencies.yml +1170 -1132
  423. data/resources/locales/ro/languages.yml +583 -580
  424. data/resources/locales/ro/layout.yml +5 -5
  425. data/resources/locales/ro/lists.yml +23 -7
  426. data/resources/locales/ro/numbers.yml +63 -58
  427. data/resources/locales/ro/plurals.yml +5 -2
  428. data/resources/locales/ro/rbnf.yml +250 -0
  429. data/resources/locales/ro/territories.yml +264 -263
  430. data/resources/locales/ru/calendars.yml +282 -281
  431. data/resources/locales/ru/currencies.yml +1118 -1247
  432. data/resources/locales/ru/languages.yml +583 -580
  433. data/resources/locales/ru/layout.yml +5 -5
  434. data/resources/locales/ru/lists.yml +23 -7
  435. data/resources/locales/ru/numbers.yml +63 -59
  436. data/resources/locales/ru/plurals.yml +8 -4
  437. data/resources/locales/ru/rbnf.yml +385 -0
  438. data/resources/locales/ru/territories.yml +264 -263
  439. data/resources/locales/sk/calendars.yml +254 -251
  440. data/resources/locales/sk/currencies.yml +1174 -1008
  441. data/resources/locales/sk/languages.yml +583 -580
  442. data/resources/locales/sk/layout.yml +5 -5
  443. data/resources/locales/sk/lists.yml +23 -7
  444. data/resources/locales/sk/numbers.yml +64 -58
  445. data/resources/locales/sk/plurals.yml +6 -2
  446. data/resources/locales/sk/rbnf.yml +304 -0
  447. data/resources/locales/sk/territories.yml +264 -263
  448. data/resources/locales/sq/calendars.yml +283 -206
  449. data/resources/locales/sq/currencies.yml +954 -916
  450. data/resources/locales/sq/languages.yml +583 -580
  451. data/resources/locales/sq/layout.yml +5 -5
  452. data/resources/locales/sq/lists.yml +23 -7
  453. data/resources/locales/sq/numbers.yml +62 -57
  454. data/resources/locales/sq/plurals.yml +2 -2
  455. data/resources/locales/sq/rbnf.yml +181 -0
  456. data/resources/locales/sq/territories.yml +264 -263
  457. data/resources/locales/sr/calendars.yml +290 -289
  458. data/resources/locales/sr/currencies.yml +1251 -1508
  459. data/resources/locales/sr/languages.yml +583 -580
  460. data/resources/locales/sr/layout.yml +5 -5
  461. data/resources/locales/sr/lists.yml +23 -7
  462. data/resources/locales/sr/numbers.yml +62 -58
  463. data/resources/locales/sr/plurals.yml +12 -4
  464. data/resources/locales/sr/rbnf.yml +429 -0
  465. data/resources/locales/sr/territories.yml +264 -263
  466. data/resources/locales/sv/calendars.yml +290 -289
  467. data/resources/locales/sv/currencies.yml +960 -930
  468. data/resources/locales/sv/languages.yml +583 -580
  469. data/resources/locales/sv/layout.yml +5 -5
  470. data/resources/locales/sv/lists.yml +23 -7
  471. data/resources/locales/sv/numbers.yml +63 -58
  472. data/resources/locales/sv/plurals.yml +3 -2
  473. data/resources/locales/sv/rbnf.yml +692 -0
  474. data/resources/locales/sv/territories.yml +264 -263
  475. data/resources/locales/ta/calendars.yml +281 -266
  476. data/resources/locales/ta/currencies.yml +953 -915
  477. data/resources/locales/ta/languages.yml +583 -580
  478. data/resources/locales/ta/layout.yml +5 -5
  479. data/resources/locales/ta/lists.yml +23 -7
  480. data/resources/locales/ta/numbers.yml +62 -57
  481. data/resources/locales/ta/plurals.yml +2 -2
  482. data/resources/locales/ta/rbnf.yml +241 -0
  483. data/resources/locales/ta/territories.yml +264 -263
  484. data/resources/locales/th/calendars.yml +278 -289
  485. data/resources/locales/th/currencies.yml +953 -915
  486. data/resources/locales/th/languages.yml +583 -580
  487. data/resources/locales/th/layout.yml +5 -5
  488. data/resources/locales/th/lists.yml +23 -7
  489. data/resources/locales/th/numbers.yml +62 -57
  490. data/resources/locales/th/plurals.yml +2 -2
  491. data/resources/locales/th/rbnf.yml +119 -0
  492. data/resources/locales/th/territories.yml +264 -263
  493. data/resources/locales/tr/calendars.yml +287 -286
  494. data/resources/locales/tr/currencies.yml +953 -916
  495. data/resources/locales/tr/languages.yml +583 -580
  496. data/resources/locales/tr/layout.yml +5 -5
  497. data/resources/locales/tr/lists.yml +23 -7
  498. data/resources/locales/tr/numbers.yml +61 -56
  499. data/resources/locales/tr/plurals.yml +2 -2
  500. data/resources/locales/tr/rbnf.yml +277 -0
  501. data/resources/locales/tr/territories.yml +264 -263
  502. data/resources/locales/uk/calendars.yml +286 -252
  503. data/resources/locales/uk/currencies.yml +1311 -1070
  504. data/resources/locales/uk/languages.yml +583 -580
  505. data/resources/locales/uk/layout.yml +5 -5
  506. data/resources/locales/uk/lists.yml +23 -7
  507. data/resources/locales/uk/numbers.yml +64 -59
  508. data/resources/locales/uk/plurals.yml +10 -4
  509. data/resources/locales/uk/rbnf.yml +430 -0
  510. data/resources/locales/uk/territories.yml +264 -263
  511. data/resources/locales/ur/calendars.yml +267 -228
  512. data/resources/locales/ur/currencies.yml +954 -916
  513. data/resources/locales/ur/languages.yml +583 -580
  514. data/resources/locales/ur/layout.yml +5 -5
  515. data/resources/locales/ur/lists.yml +23 -7
  516. data/resources/locales/ur/numbers.yml +62 -57
  517. data/resources/locales/ur/plurals.yml +3 -2
  518. data/resources/locales/ur/rbnf.yml +4 -0
  519. data/resources/locales/ur/territories.yml +264 -263
  520. data/resources/locales/vi/calendars.yml +256 -236
  521. data/resources/locales/vi/currencies.yml +953 -915
  522. data/resources/locales/vi/languages.yml +583 -580
  523. data/resources/locales/vi/layout.yml +5 -5
  524. data/resources/locales/vi/lists.yml +23 -7
  525. data/resources/locales/vi/numbers.yml +62 -57
  526. data/resources/locales/vi/plurals.yml +2 -2
  527. data/resources/locales/vi/rbnf.yml +164 -0
  528. data/resources/locales/vi/territories.yml +264 -263
  529. data/resources/locales/zh/calendars.yml +266 -265
  530. data/resources/locales/zh/currencies.yml +953 -915
  531. data/resources/locales/zh/languages.yml +583 -580
  532. data/resources/locales/zh/layout.yml +5 -5
  533. data/resources/locales/zh/lists.yml +23 -7
  534. data/resources/locales/zh/numbers.yml +62 -57
  535. data/resources/locales/zh/plurals.yml +2 -2
  536. data/resources/locales/zh/rbnf.yml +689 -0
  537. data/resources/locales/zh/territories.yml +264 -263
  538. data/resources/locales/zh-Hant/calendars.yml +266 -265
  539. data/resources/locales/zh-Hant/currencies.yml +955 -915
  540. data/resources/locales/zh-Hant/languages.yml +583 -580
  541. data/resources/locales/zh-Hant/layout.yml +5 -5
  542. data/resources/locales/zh-Hant/lists.yml +23 -7
  543. data/resources/locales/zh-Hant/numbers.yml +62 -57
  544. data/resources/locales/zh-Hant/plurals.yml +2 -2
  545. data/resources/locales/zh-Hant/rbnf.yml +647 -0
  546. data/resources/locales/zh-Hant/territories.yml +264 -263
  547. data/resources/shared/currency_digits_and_rounding.yml +67 -64
  548. data/resources/shared/numbering_systems.yml +176 -0
  549. data/resources/shared/rbnf_root.yml +1573 -0
  550. data/resources/shared/segments/segments_root.yml +728 -0
  551. data/resources/shared/segments/tailorings/en.yml +8 -0
  552. data/resources/uli/segments/de.yml +128 -0
  553. data/resources/uli/segments/en.yml +154 -0
  554. data/resources/uli/segments/es.yml +112 -0
  555. data/resources/uli/segments/fr.yml +47 -0
  556. data/resources/uli/segments/it.yml +37 -0
  557. data/resources/uli/segments/pt.yml +173 -0
  558. data/resources/uli/segments/ru.yml +10 -0
  559. data/resources/unicode_data/casefolding.yml +4765 -0
  560. data/resources/unicode_data/indices/bidi_class.yml +4572 -0
  561. data/resources/unicode_data/indices/bidi_mirrored.yml +3087 -0
  562. data/resources/unicode_data/indices/category.yml +10918 -0
  563. data/resources/unicode_data/indices/keys.yml +101 -0
  564. data/resources/unicode_data/properties/line_break.yml +9269 -0
  565. data/resources/unicode_data/properties/sentence_break.yml +8067 -0
  566. data/resources/unicode_data/properties/word_break.yml +3001 -0
  567. data/spec/collation/collation_spec.rb +2 -1
  568. data/spec/collation/collator_spec.rb +4 -3
  569. data/spec/collation/tailoring_spec.rb +2 -2
  570. data/spec/collation/tailoring_tests/he.txt +5 -2
  571. data/spec/{tokenizers/calendars → data_readers}/additional_date_format_selector_spec.rb +13 -13
  572. data/spec/data_readers/date_time_data_reader_spec.rb +26 -0
  573. data/spec/data_readers/number_data_reader_spec.rb +18 -0
  574. data/spec/data_readers/timespan_data_reader.rb +22 -0
  575. data/spec/formatters/calendars/datetime_formatter_spec.rb +18 -22
  576. data/spec/formatters/list_formatter_spec.rb +16 -87
  577. data/spec/formatters/numbers/abbreviated/abbreviated_number_formatter_spec.rb +15 -59
  578. data/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +32 -17
  579. data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +33 -17
  580. data/spec/formatters/numbers/currency_formatter_spec.rb +18 -13
  581. data/spec/formatters/numbers/decimal_formatter_spec.rb +16 -18
  582. data/spec/formatters/numbers/number_formatter_spec.rb +40 -31
  583. data/spec/formatters/numbers/percent_formatter_spec.rb +14 -6
  584. data/spec/formatters/numbers/rbnf/allowed_failures.yml +74 -0
  585. data/spec/formatters/numbers/rbnf/locales/af/rbnf_test.yml +706 -0
  586. data/spec/formatters/numbers/rbnf/locales/ar/rbnf_test.yml +706 -0
  587. data/spec/formatters/numbers/rbnf/locales/be/rbnf_test.yml +1174 -0
  588. data/spec/formatters/numbers/rbnf/locales/bg/rbnf_test.yml +706 -0
  589. data/spec/formatters/numbers/rbnf/locales/bn/rbnf_test.yml +1291 -0
  590. data/spec/formatters/numbers/rbnf/locales/ca/rbnf_test.yml +1174 -0
  591. data/spec/formatters/numbers/rbnf/locales/cs/rbnf_test.yml +823 -0
  592. data/spec/formatters/numbers/rbnf/locales/cy/rbnf_test.yml +940 -0
  593. data/spec/formatters/numbers/rbnf/locales/da/rbnf_test.yml +940 -0
  594. data/spec/formatters/numbers/rbnf/locales/de/rbnf_test.yml +940 -0
  595. data/spec/formatters/numbers/rbnf/locales/el/rbnf_test.yml +1174 -0
  596. data/spec/formatters/numbers/rbnf/locales/en/rbnf_test.yml +1291 -0
  597. data/spec/formatters/numbers/rbnf/locales/en-GB/rbnf_test.yml +1291 -0
  598. data/spec/formatters/numbers/rbnf/locales/es/rbnf_test.yml +1642 -0
  599. data/spec/formatters/numbers/rbnf/locales/eu/rbnf_test.yml +1291 -0
  600. data/spec/formatters/numbers/rbnf/locales/fa/rbnf_test.yml +589 -0
  601. data/spec/formatters/numbers/rbnf/locales/fi/rbnf_test.yml +706 -0
  602. data/spec/formatters/numbers/rbnf/locales/fil/rbnf_test.yml +706 -0
  603. data/spec/formatters/numbers/rbnf/locales/fr/rbnf_test.yml +1408 -0
  604. data/spec/formatters/numbers/rbnf/locales/ga/rbnf_test.yml +940 -0
  605. data/spec/formatters/numbers/rbnf/locales/gl/rbnf_test.yml +1291 -0
  606. data/spec/formatters/numbers/rbnf/locales/he/rbnf_test.yml +1057 -0
  607. data/spec/formatters/numbers/rbnf/locales/hi/rbnf_test.yml +823 -0
  608. data/spec/formatters/numbers/rbnf/locales/hr/rbnf_test.yml +1174 -0
  609. data/spec/formatters/numbers/rbnf/locales/hu/rbnf_test.yml +940 -0
  610. data/spec/formatters/numbers/rbnf/locales/id/rbnf_test.yml +706 -0
  611. data/spec/formatters/numbers/rbnf/locales/is/rbnf_test.yml +823 -0
  612. data/spec/formatters/numbers/rbnf/locales/it/rbnf_test.yml +1174 -0
  613. data/spec/formatters/numbers/rbnf/locales/ja/rbnf_test.yml +823 -0
  614. data/spec/formatters/numbers/rbnf/locales/ko/rbnf_test.yml +1408 -0
  615. data/spec/formatters/numbers/rbnf/locales/lv/rbnf_test.yml +706 -0
  616. data/spec/formatters/numbers/rbnf/locales/ms/rbnf_test.yml +706 -0
  617. data/spec/formatters/numbers/rbnf/locales/nb/rbnf_test.yml +940 -0
  618. data/spec/formatters/numbers/rbnf/locales/nl/rbnf_test.yml +706 -0
  619. data/spec/formatters/numbers/rbnf/locales/pl/rbnf_test.yml +823 -0
  620. data/spec/formatters/numbers/rbnf/locales/pt/rbnf_test.yml +1174 -0
  621. data/spec/formatters/numbers/rbnf/locales/ro/rbnf_test.yml +823 -0
  622. data/spec/formatters/numbers/rbnf/locales/ru/rbnf_test.yml +823 -0
  623. data/spec/formatters/numbers/rbnf/locales/sk/rbnf_test.yml +823 -0
  624. data/spec/formatters/numbers/rbnf/locales/sq/rbnf_test.yml +706 -0
  625. data/spec/formatters/numbers/rbnf/locales/sr/rbnf_test.yml +940 -0
  626. data/spec/formatters/numbers/rbnf/locales/sv/rbnf_test.yml +1876 -0
  627. data/spec/formatters/numbers/rbnf/locales/ta/rbnf_test.yml +706 -0
  628. data/spec/formatters/numbers/rbnf/locales/th/rbnf_test.yml +706 -0
  629. data/spec/formatters/numbers/rbnf/locales/tr/rbnf_test.yml +706 -0
  630. data/spec/formatters/numbers/rbnf/locales/uk/rbnf_test.yml +823 -0
  631. data/spec/formatters/numbers/rbnf/locales/ur/rbnf_test.yml +1291 -0
  632. data/spec/formatters/numbers/rbnf/locales/vi/rbnf_test.yml +706 -0
  633. data/spec/formatters/numbers/rbnf/locales/zh/rbnf_test.yml +940 -0
  634. data/spec/formatters/numbers/rbnf/locales/zh-Hant/rbnf_test.yml +940 -0
  635. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +98 -0
  636. data/spec/formatters/plurals/plural_formatter_spec.rb +4 -4
  637. data/spec/formatters/plurals/rules_spec.rb +5 -5
  638. data/spec/localized/localized_date_spec.rb +1 -1
  639. data/spec/localized/localized_datetime_spec.rb +8 -13
  640. data/spec/localized/localized_number_spec.rb +17 -32
  641. data/spec/localized/localized_object_spec.rb +0 -5
  642. data/spec/localized/localized_string_spec.rb +40 -2
  643. data/spec/localized/localized_time_spec.rb +3 -6
  644. data/spec/localized/localized_timespan_spec.rb +144 -0
  645. data/spec/normalization_spec.rb +12 -12
  646. data/spec/parsers/number_parser_spec.rb +5 -5
  647. data/spec/parsers/parser_spec.rb +60 -0
  648. data/spec/parsers/segmentation_parser_spec.rb +96 -0
  649. data/spec/parsers/symbol_table_spec.rb +32 -0
  650. data/spec/parsers/unicode_regex/character_class_spec.rb +117 -0
  651. data/spec/parsers/unicode_regex/character_range_spec.rb +21 -0
  652. data/spec/parsers/unicode_regex/character_set_spec.rb +36 -0
  653. data/spec/parsers/unicode_regex/literal_spec.rb +34 -0
  654. data/spec/parsers/unicode_regex/unicode_string_spec.rb +22 -0
  655. data/spec/parsers/unicode_regex_parser_spec.rb +86 -0
  656. data/spec/readme_spec.rb +8 -269
  657. data/spec/shared/break_iterator_spec.rb +72 -0
  658. data/spec/shared/calendar_spec.rb +5 -4
  659. data/spec/shared/casefolder_spec.rb +30 -0
  660. data/spec/shared/casefolding.txt +251 -0
  661. data/spec/shared/casefolding_expected.txt +251 -0
  662. data/spec/shared/code_point_spec.rb +44 -14
  663. data/spec/shared/numbering_system_spec.rb +41 -0
  664. data/spec/shared/territories_spec.rb +14 -6
  665. data/spec/shared/unicode_regex_spec.rb +203 -0
  666. data/spec/spec_helper.rb +17 -0
  667. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +26 -30
  668. data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +11 -90
  669. data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -5
  670. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +17 -7
  671. data/spec/tokenizers/numbers/number_tokenizer_spec.rb +28 -27
  672. data/spec/tokenizers/segmentation/segmentation_tokenizer_spec.rb +40 -0
  673. data/spec/tokenizers/unicode_regex/unicode_regex_tokenizer_spec.rb +190 -0
  674. data/spec/utils/range_set_spec.rb +171 -0
  675. data/spec/utils/yaml/yaml_spec.rb +62 -51
  676. data/twitter_cldr.gemspec +1 -1
  677. metadata +199 -30
  678. data/lib/twitter_cldr/formatters/base.rb +0 -47
  679. data/lib/twitter_cldr/formatters/calendars/date_formatter.rb +0 -19
  680. data/lib/twitter_cldr/formatters/calendars/time_formatter.rb +0 -19
  681. data/lib/twitter_cldr/normalization/base.rb +0 -37
  682. data/lib/twitter_cldr/normalization/hangul.rb +0 -79
  683. data/lib/twitter_cldr/normalization/nfc.rb +0 -24
  684. data/lib/twitter_cldr/normalization/nfd.rb +0 -26
  685. data/lib/twitter_cldr/normalization/nfkc.rb +0 -114
  686. data/lib/twitter_cldr/normalization/nfkd.rb +0 -120
  687. data/lib/twitter_cldr/normalization/quick_check.rb +0 -41
  688. data/lib/twitter_cldr/tokenizers/base.rb +0 -169
  689. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -131
  690. data/lib/twitter_cldr/utils/territories.rb +0 -56
  691. data/spec/formatters/base_spec.rb +0 -18
  692. data/spec/formatters/calendars/timespan_formatter_spec.rb +0 -112
  693. data/spec/normalization/NormalizationTestShort.txt +0 -602
  694. data/spec/normalization/base_spec.rb +0 -16
  695. data/spec/normalization/hangul_spec.rb +0 -42
  696. data/spec/normalization/normalization_spec.rb +0 -113
  697. data/spec/tokenizers/base_spec.rb +0 -259
  698. data/spec/utils/territories_spec.rb +0 -16
data/README.md CHANGED
@@ -1,4 +1,6 @@
1
- ## twitter-cldr-rb [![Build Status](https://secure.travis-ci.org/twitter/twitter-cldr-rb.png?branch=master)](http://travis-ci.org/twitter/twitter-cldr-rb) [![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/twitter/twitter-cldr-rb)
1
+
2
+
3
+ ## twitter-cldr-rb [![Build Status](https://secure.travis-ci.org/twitter/twitter-cldr-rb.png?branch=master)](http://travis-ci.org/twitter/twitter-cldr-rb) [![Code Climate](https://codeclimate.com/github/twitter/twitter-cldr-rb.png)](https://codeclimate.com/github/twitter/twitter-cldr-rb)
2
4
 
3
5
  TwitterCldr uses Unicode's Common Locale Data Repository (CLDR) to format certain types of text into their
4
6
  localized equivalents. Currently supported types of text include dates, times, currencies, decimals, percentages, and symbols.
@@ -18,7 +20,7 @@ require 'twitter_cldr'
18
20
  Get a list of all currently supported locales (these are all supported on twitter.com):
19
21
 
20
22
  ```ruby
21
- TwitterCldr.supported_locales # [:ar, :da, :de, :en, :es, ... ]
23
+ TwitterCldr.supported_locales # [:af, :ar, :be, :bg, :bn, :ca, ... ]
22
24
  ```
23
25
 
24
26
  Determine if a locale is supported by TwitterCLDR:
@@ -37,18 +39,18 @@ TwitterCldr patches core Ruby objects like `Fixnum` and `Date` to make localizat
37
39
 
38
40
  ```ruby
39
41
  # default formatting with to_s
40
- 1337.localize(:es).to_s # 1.337
42
+ 1337.localize(:es).to_s # "1 337"
41
43
 
42
44
  # currencies, default USD
43
- 1337.localize(:es).to_currency.to_s # 1.337,00 $
44
- 1337.localize(:es).to_currency.to_s(:currency => "EUR") # 1.337,00 €
45
+ 1337.localize(:es).to_currency.to_s # "1 337,00 $"
46
+ 1337.localize(:es).to_currency.to_s(:currency => "EUR") # "1 337,00 €"
45
47
 
46
48
  # percentages
47
- 1337.localize(:es).to_percent.to_s # 1.337%
48
- 1337.localize(:es).to_percent.to_s(:precision => 2) # 1.337,00%
49
+ 1337.localize(:es).to_percent.to_s # "1 337%"
50
+ 1337.localize(:es).to_percent.to_s(:precision => 2) # "1 337,00%"
49
51
 
50
52
  # decimals
51
- 1337.localize(:es).to_decimal.to_s(:precision => 3) # 1.337,000
53
+ 1337.localize(:es).to_decimal.to_s(:precision => 3) # "1 337,000"
52
54
  ```
53
55
 
54
56
  **Note**: The `:precision` option can be used with all these number formatters.
@@ -60,16 +62,18 @@ num = TwitterCldr::Localized::LocalizedNumber.new(1337, :es)
60
62
  num.to_currency.to_s # ...etc
61
63
  ```
62
64
 
65
+
66
+
63
67
  #### More on Currencies
64
68
 
65
69
  If you're looking for a list of supported currencies, use the `TwitterCldr::Shared::Currencies` class:
66
70
 
67
71
  ```ruby
68
72
  # all supported currency codes
69
- TwitterCldr::Shared::Currencies.currency_codes # ["LTL", "PHP" ... ]
73
+ TwitterCldr::Shared::Currencies.currency_codes # ["ADP", "AED", "AFA", "AFN", ... ]
70
74
 
71
75
  # data for a specific currency code
72
- TwitterCldr::Shared::Currencies.for_code("CAD") # { :currency => "Canadian dollar", :symbol => "$", :code_points => [36], :cldr_symbol => "CA$", :country => "Canada" }
76
+ TwitterCldr::Shared::Currencies.for_code("CAD") # {:currency=>:CAD, :name=>"Canadian dollar", :cldr_symbol=>"CA$", :symbol=>"$", :code_points=>[36]}
73
77
  ```
74
78
 
75
79
  #### Short / Long Decimals
@@ -77,11 +81,70 @@ TwitterCldr::Shared::Currencies.for_code("CAD") # { :currency => "Can
77
81
  In addition to formatting regular decimals, TwitterCLDR supports short and long decimals. Short decimals abbreviate the notation for the appropriate power of ten, for example "1M" for 1,000,000 or "2K" for 2,000. Long decimals include the full notation, for example "1 million" or "2 thousand". Long and short decimals can be generated using the appropriate `to_` method:
78
82
 
79
83
  ```ruby
80
- 2337.localize.to_short_decimal.to_s # 2K
81
- 1337123.localize.to_short_decimal.to_s # 1M
84
+ 2337.localize.to_short_decimal.to_s # "2K"
85
+ 1337123.localize.to_short_decimal.to_s # "1M"
86
+
87
+ 2337.localize.to_long_decimal.to_s # "2 thousand"
88
+ 1337123.localize.to_long_decimal.to_s # "1 million"
89
+ ```
90
+
91
+ ### Number Spellout, Ordinalization, and More
92
+
93
+ TwitterCLDR's rule-based number formatters are capable of transforming integers into their written equivalents. Note that rule-based formatting of decimal numbers is currently not supported for languages other than English.
94
+
95
+ #### Spellout
96
+
97
+ For easy spellout formatting, check out the `LocalizedNumber#spellout` method:
98
+
99
+ ```ruby
100
+ 123.localize.spellout # one hundred twenty-three
101
+ 25_641.localize.spellout # twenty-five thousand six hundred forty-one
102
+ ```
103
+
104
+ As always, you can call `#localize` with a locale symbol:
105
+
106
+ ```ruby
107
+ 123.localize(:es).spellout # ciento veintitrés
108
+ 25_641.localize(:ru).spellout # двадцать пять тысяч шестьсот сорок один
109
+ ```
110
+
111
+ #### Ordinalization and More
112
+
113
+ The available rule-based number formats defined by the CLDR data set vary by language. Some languages support ordinal and cardinal numbers, occasionally with an additional masculine/feminine option, while others do not. You'll need to consult the list of available formats for your language.
114
+
115
+ Rule-based number formats are categorized by groups, and within groups by rulesets. You'll need to specify both to make use of all the available formats for your language.
82
116
 
83
- 2337.localize.to_long_decimal.to_s # 2 thousand
84
- 1337123.localize.to_long_decimal.to_s # 1 million
117
+ To get a list of supported groups, use the `#group_names` method:
118
+
119
+ ```ruby
120
+ 123.localize(:pt).rbnf.group_names # ["SpelloutRules", "OrdinalRules"]
121
+ ```
122
+
123
+ To get a list of supported rulesets for a group name, use the `#rule_set_names_for_group` method:
124
+
125
+ ```ruby
126
+ # ["digits-ordinal-masculine", "digits-ordinal-feminine", "digits-ordinal"]
127
+ 123.localize(:pt).rbnf.rule_set_names_for_group("OrdinalRules")
128
+ ```
129
+
130
+ Once you've chosen a group and ruleset, you can pass them to the `to_rbnf_s` method:
131
+
132
+ ```ruby
133
+ 123.localize(:pt).to_rbnf_s("OrdinalRules", "digits-ordinal-feminine") # 123a
134
+ 123.localize(:pt).to_rbnf_s("OrdinalRules", "digits-ordinal-masculine") # 123o
135
+ ```
136
+
137
+ For comparison, here's what English ordinal formatting looks like:
138
+
139
+ ```ruby
140
+ 123.localize.to_rbnf_s("OrdinalRules", "digits-ordinal") # 123rd
141
+ ```
142
+
143
+ For English (and other languages), you can also specify an ordinal spellout:
144
+
145
+ ```ruby
146
+ 123.localize.to_rbnf_s("SpelloutRules", "spellout-ordinal") # one hundred twenty-third
147
+ 123.localize(:pt).to_rbnf_s("SpelloutRules", "spellout-ordinal-masculine") # centésimo vigésimo terceiro
85
148
  ```
86
149
 
87
150
  ### Dates and Times
@@ -89,26 +152,28 @@ In addition to formatting regular decimals, TwitterCLDR supports short and long
89
152
  `Time`, and `DateTime` objects are supported. `Date` objects are supported transiently:
90
153
 
91
154
  ```ruby
92
- DateTime.now.localize(:es).to_full_s # "lunes, 12 de diciembre de 2011 21:44:57 UTC -08:00"
93
- DateTime.now.localize(:es).to_long_s # "12 de diciembre de 2011 21:44:57 UTC"
94
- DateTime.now.localize(:es).to_medium_s # "12/12/2011 21:44:57"
95
- DateTime.now.localize(:es).to_short_s # "12/12/11 21:44"
155
+ DateTime.now.localize(:es).to_full_s # "viernes, 14 de febrero de 2014, 12:20:05 (UTC +00:00)"
156
+ DateTime.now.localize(:es).to_long_s # "14 de febrero de 2014, 12:20:05 UTC"
157
+ DateTime.now.localize(:es).to_medium_s # "14/2/2014 12:20:05"
158
+ DateTime.now.localize(:es).to_short_s # "14/2/14 12:20"
96
159
 
97
- Time.now.localize(:es).to_full_s # "21:44:57 UTC -0800"
98
- Time.now.localize(:es).to_long_s # "21:44:57 UTC"
99
- Time.now.localize(:es).to_medium_s # "21:44:57"
100
- Time.now.localize(:es).to_short_s # "21:44"
160
+ Time.now.localize(:es).to_full_s # "12:20:05 (UTC +00:00)"
161
+ Time.now.localize(:es).to_long_s # "12:20:05 UTC"
162
+ Time.now.localize(:es).to_medium_s # "12:20:05"
163
+ Time.now.localize(:es).to_short_s # "12:20"
101
164
 
102
- DateTime.now.localize(:es).to_date.to_full_s # "lunes 12 de diciembre de 2011"
103
- DateTime.now.localize(:es).to_date.to_long_s # "12 de diciembre de 2011"
104
- DateTime.now.localize(:es).to_date.to_medium_s # "12/12/2011"
105
- DateTime.now.localize(:es).to_date.to_short_s # "12/12/11"
165
+ DateTime.now.localize(:es).to_date.to_full_s # "viernes, 14 de febrero de 2014"
166
+ DateTime.now.localize(:es).to_date.to_long_s # "14 de febrero de 2014"
167
+ DateTime.now.localize(:es).to_date.to_medium_s # "14/2/2014"
168
+ DateTime.now.localize(:es).to_date.to_short_s # "14/2/14"
106
169
  ```
107
170
 
108
171
  The default CLDR data set only includes 4 date formats, full, long, medium, and short. See below for a list of additional formats.
109
172
 
110
173
  Behind the scenes, these convenience methods are creating instances of `LocalizedDate`, `LocalizedTime`, and `LocalizedDateTime`. You can do the same thing if you're feeling adventurous:
111
174
 
175
+
176
+
112
177
  ```ruby
113
178
  dt = TwitterCldr::Localized::LocalizedDateTime.new(DateTime.now, :es)
114
179
  dt.to_short_s # ...etc
@@ -116,58 +181,59 @@ dt.to_short_s # ...etc
116
181
 
117
182
  #### Additional Date Formats
118
183
 
119
- Besides the default date formats, CLDR supports a number of additional ones. The list of available formats varys for each locale. To get a full list, use the `additional_formats_for` method:
184
+ Besides the default date formats, CLDR supports a number of additional ones. The list of available formats varies for each locale. To get a full list, use the `additional_formats` method:
120
185
 
121
186
  ```ruby
122
- # ["EEEEd", "Ed", "GGGGyMd", "H", "Hm", "Hms", "M", "MEd", "MMM", "MMMEEEEd", "MMMEd", ... ]
123
- TwitterCldr::Formatters::DateTimeFormatter.additional_formats_for(:ja)
187
+ # ["EEEEd", "EHm", "EHms", "Ed", "Ehm", "Ehms", "Gy", "GyMMM", "GyMMMEEEEd", "GyMMMEd", "GyMMMd", "H", ... ]
188
+ DateTime.now.localize(:ja).additional_formats
124
189
  ```
125
190
 
126
- You can use any of the returned formats as the `:format` option when creating new instances of `LocalizedDateTime` or `DateTimeFormatter`:
191
+ You can use any of the returned formats as the argument to the `to_additional_s` method:
127
192
 
128
193
  ```ruby
129
- # 2011/12/12 21:44:57
130
- DateTime.now.localize(:ja).to_s
131
-
132
- # 12日月曜日
133
- DateTime.now.localize(:ja).to_s(:format => "EEEEd")
194
+ # "14日金曜日"
195
+ DateTime.now.localize(:ja).to_additional_s("EEEEd")
134
196
  ```
135
197
 
136
- It's important to know that, even though a format may not be available across locales, TwitterCLDR will do it's best to approximate if no exact match can be found.
198
+ It's important to know that, even though any given format may not be available across locales, TwitterCLDR will do it's best to approximate if no exact match can be found.
137
199
 
138
200
  ##### List of additional date format examples for English:
139
201
 
140
- | Format | Output |
141
- |:-------|------------------|
142
- | EHm | Wed 17:05 |
143
- | EHms | Wed 17:05:33 |
144
- | Ed | 28 Wed |
145
- | Ehm | Wed 5:05 p.m. |
146
- | Ehms | Wed 5:05:33 p.m. |
147
- | Gy | 2012 AD |
148
- | H | 17 |
149
- | Hm | 17:05 |
150
- | Hms | 17:05:33 |
151
- | M | 11 |
152
- | MEd | Wed 11/28 |
153
- | MMM | Nov |
154
- | MMMEd | Wed Nov 28 |
155
- | MMMd | Nov 28 |
156
- | Md | 11/28 |
157
- | d | 28 |
158
- | h | 5 p.m. |
159
- | hm | 5:05 p.m. |
160
- | hms | 5:05:33 p.m. |
161
- | ms | 05:33 |
162
- | y | 2012 |
163
- | yM | 11/2012 |
164
- | yMEd | Wed 11/28/2012 |
165
- | yMMM | Nov 2012 |
166
- | yMMMEd | Wed Nov 28 2012 |
167
- | yMMMd | Nov 28 2012 |
168
- | yMd | 11/28/2012 |
169
- | yQQQ | Q4 2012 |
170
- | yQQQQ | 4th quarter 2012 |
202
+ | Format | Output |
203
+ |:-----------|------------------------|
204
+ | EHm | Fri 12:20 |
205
+ | EHms | Fri 12:20:05 |
206
+ | Ed | 14 Fri |
207
+ | Ehm | Fri 12:20 PM |
208
+ | Ehms | Fri 12:20:05 PM |
209
+ | Gy | 2014 CE |
210
+ | GyMMM | Feb 2014 CE |
211
+ | GyMMMEd | Fri, Feb 14, 2014 CE |
212
+ | GyMMMd | Feb 14, 2014 CE |
213
+ | H | 12 |
214
+ | Hm | 12:20 |
215
+ | Hms | 12:20:05 |
216
+ | M | 2 |
217
+ | MEd | Fri, 2/14 |
218
+ | MMM | Feb |
219
+ | MMMEd | Fri, Feb 14 |
220
+ | MMMd | Feb 14 |
221
+ | Md | 2/14 |
222
+ | d | 14 |
223
+ | h | 12 PM |
224
+ | hm | 12:20 PM |
225
+ | hms | 12:20:05 PM |
226
+ | ms | 20:05 |
227
+ | y | 2014 |
228
+ | yM | 2/2014 |
229
+ | yMEd | Fri, 2/14/2014 |
230
+ | yMMM | Feb 2014 |
231
+ | yMMMEd | Fri, Feb 14, 2014 |
232
+ | yMMMd | Feb 14, 2014 |
233
+ | yMd | 2/14/2014 |
234
+ | yQQQ | Q1 2014 |
235
+ | yQQQQ | 1st quarter 2014 |
236
+
171
237
 
172
238
 
173
239
  #### Relative Dates and Times
@@ -175,52 +241,54 @@ It's important to know that, even though a format may not be available across lo
175
241
  In addition to formatting full dates and times, TwitterCLDR supports relative time spans via several convenience methods and the `LocalizedTimespan` class. TwitterCLDR tries to guess the best time unit (eg. days, hours, minutes, etc) based on the length of the time span. Unless otherwise specified, TwitterCLDR will use the current date and time as the reference point for the calculation.
176
242
 
177
243
  ```ruby
178
- (DateTime.now - 1).localize.ago.to_s # 1 day ago
179
- (DateTime.now - 0.5).localize.ago.to_s # 12 hours ago (i.e. half a day)
244
+ (DateTime.now - 1).localize.ago.to_s # "1 day ago"
245
+ (DateTime.now - 0.5).localize.ago.to_s # "12 hours ago" (i.e. half a day)
180
246
 
181
- (DateTime.now + 1).localize.until.to_s # In 1 day
182
- (DateTime.now + 0.5).localize.until.to_s # In 12 hours
247
+ (DateTime.now + 1).localize.until.to_s # "In 1 day"
248
+ (DateTime.now + 0.5).localize.until.to_s # "In 12 hours"
183
249
  ```
184
250
 
185
251
  Specify other locales:
186
252
 
187
253
  ```ruby
188
- (DateTime.now - 1).localize(:de).ago.to_s # Vor 1 Tag
189
- (DateTime.now + 1).localize(:de).until.to_s # In 1 Tag
254
+ (DateTime.now - 1).localize(:de).ago.to_s # "Vor 1 Tag"
255
+ (DateTime.now + 1).localize(:de).until.to_s # "In 1 Tag"
190
256
  ```
191
257
 
192
258
  Force TwitterCLDR to use a specific time unit by including the `:unit` option:
193
259
 
194
260
  ```ruby
195
- (DateTime.now - 1).localize(:de).ago.to_s(:unit => :hour) # Vor 24 Stunden
196
- (DateTime.now + 1).localize(:de).until.to_s(:unit => :hour) # In 24 Stunden
261
+ (DateTime.now - 1).localize(:de).ago.to_s(:unit => :hour) # "Vor 24 Stunden"
262
+ (DateTime.now + 1).localize(:de).until.to_s(:unit => :hour) # "In 24 Stunden"
197
263
  ```
198
264
 
199
265
  Specify a different reference point for the time span calculation:
200
266
 
201
267
  ```ruby
202
268
  # 86400 = 1 day in seconds, 259200 = 3 days in seconds
203
- (Time.now + 86400).localize(:de).ago(:base_time => (Time.now + 259200)).to_s(:unit => :hour) # Vor 48 Stunden
269
+ (Time.now + 86400).localize(:de).ago(:base_time => (Time.now + 259200)).to_s(:unit => :hour) # "Vor 48 Stunden"
204
270
  ```
205
271
 
206
272
  Behind the scenes, these convenience methods are creating instances of `LocalizedTimespan`, whose constructor accepts a number of seconds as the first argument. You can do the same thing if you're feeling adventurous:
207
273
 
208
274
  ```ruby
275
+
209
276
  ts = TwitterCldr::Localized::LocalizedTimespan.new(86400, :locale => :de)
210
- ts.to_s # In 1 Tag
211
- ts.to_s(:unit => :hour) # In 24 Stunden
277
+ ts.to_s # "In 1 Tag"
278
+ ts.to_s(:unit => :hour) # "In 24 Stunden"
279
+
212
280
 
213
281
  ts = TwitterCldr::Localized::LocalizedTimespan.new(-86400, :locale => :de)
214
- ts.to_s # Vor 1 Tag
215
- ts.to_s(:unit => :hour) # Vor 24 Stunden
282
+ ts.to_s # "Vor 1 Tag"
283
+ ts.to_s(:unit => :hour) # "Vor 24 Stunden"
216
284
  ```
217
285
 
218
286
  By default, timespans are exact representations of a given unit of elapsed time. TwitterCLDR also supports approximate timespans which round up to the nearest larger unit. For example, "44 seconds" remains "44 seconds" while "45 seconds" becomes "1 minute". To approximate, pass the `:approximate => true` option into `to_s`:
219
287
 
220
288
  ```ruby
221
- TwitterCldr::Localized::LocalizedTimespan.new(44).to_s(:approximate => true) # In 44 seconds
222
- TwitterCldr::Localized::LocalizedTimespan.new(45).to_s(:approximate => true) # In 1 minute
223
- TwitterCldr::Localized::LocalizedTimespan.new(52).to_s(:approximate => true) # In 1 minute
289
+ TwitterCldr::Localized::LocalizedTimespan.new(44).to_s(:approximate => true) # "In 44 seconds"
290
+ TwitterCldr::Localized::LocalizedTimespan.new(45).to_s(:approximate => true) # "In 1 minute"
291
+ TwitterCldr::Localized::LocalizedTimespan.new(52).to_s(:approximate => true) # "In 1 minute"
224
292
  ```
225
293
 
226
294
  ### Lists
@@ -228,17 +296,17 @@ TwitterCldr::Localized::LocalizedTimespan.new(52).to_s(:approximate => true) #
228
296
  TwitterCLDR supports formatting lists of strings as you might do in English by using commas, eg: "Apples, cherries, and oranges". Use the `localize` method on an array followed by a call to `to_sentence`:
229
297
 
230
298
  ```ruby
231
- ["apples", "cherries", "oranges"].localize.to_sentence # "apples, cherries, and oranges"
299
+ ["apples", "cherries", "oranges"].localize.to_sentence # "apples, cherries, and oranges"
232
300
  ["apples", "cherries", "oranges"].localize(:es).to_sentence # "apples, cherries y oranges"
233
301
  ```
234
302
 
235
303
  Behind the scenes, these convenience methods are creating instances of `ListFormatter`. You can do the same thing if you're feeling adventurous:
236
304
 
237
305
  ```ruby
238
- f = TwitterCldr::Formatters::ListFormatter.new(:locale => :en)
306
+ f = TwitterCldr::Formatters::ListFormatter.new(:en)
239
307
  f.format(["Larry", "Curly", "Moe"]) # "Larry, Curly, and Moe"
240
308
 
241
- f = TwitterCldr::Formatters::ListFormatter.new(:locale => :es)
309
+ f = TwitterCldr::Formatters::ListFormatter.new(:es)
242
310
  f.format(["Larry", "Curly", "Moe"]) # "Larry, Curly y Moe"
243
311
  ```
244
312
 
@@ -252,7 +320,7 @@ TwitterCLDR makes it easy to find the plural rules for any numeric value:
252
320
 
253
321
  ```ruby
254
322
  1.localize(:ru).plural_rule # :one
255
- 2.localize(:ru).plural_rule # :few
323
+ 2.localize(:ru).plural_rule # :other
256
324
  5.localize(:ru).plural_rule # :many
257
325
  ```
258
326
 
@@ -260,15 +328,15 @@ Behind the scenes, these convenience methods use the `TwitterCldr::Formatters::P
260
328
 
261
329
  ```ruby
262
330
  # get all rules for the default locale
263
- TwitterCldr::Formatters::Plurals::Rules.all # [:one, ... ]
331
+ TwitterCldr::Formatters::Plurals::Rules.all # [:one, :other]
264
332
 
265
333
  # get all rules for a specific locale
266
334
  TwitterCldr::Formatters::Plurals::Rules.all_for(:es) # [:one, :other]
267
- TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:one, :few, :many, :other]
335
+ TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:one, :many, :other]
268
336
 
269
337
  # get the rule for a number in a specific locale
270
338
  TwitterCldr::Formatters::Plurals::Rules.rule_for(1, :ru) # :one
271
- TwitterCldr::Formatters::Plurals::Rules.rule_for(2, :ru) # :few
339
+ TwitterCldr::Formatters::Plurals::Rules.rule_for(2, :ru) # :other
272
340
  ```
273
341
 
274
342
  ### Plurals
@@ -276,9 +344,13 @@ TwitterCldr::Formatters::Plurals::Rules.rule_for(2, :ru) # :few
276
344
  In addition to providing access to plural rules, TwitterCLDR allows you to embed plurals directly in your source code:
277
345
 
278
346
  ```ruby
279
- replacements = { :horse_count => 3,
280
- :horses => { :one => "is 1 horse",
281
- :other => "are %{horse_count} horses" } }
347
+ replacements = {
348
+ :horse_count => 3,
349
+ :horses => {
350
+ :one => "is 1 horse",
351
+ :other => "are %{horse_count} horses"
352
+ }
353
+ }
282
354
 
283
355
  # "there are 3 horses in the barn"
284
356
  "there %{horse_count:horses} in the barn".localize % replacements
@@ -336,7 +408,7 @@ You can use the localize convenience method on language code symbols to get thei
336
408
  Behind the scenes, these convenience methods are creating instances of `LocalizedSymbol`. You can do the same thing if you're feeling adventurous:
337
409
 
338
410
  ```ruby
339
- ls = LocalizedSymbol.new(:ru, :es)
411
+ ls = TwitterCldr::Localized::LocalizedSymbol.new(:ru, :es)
340
412
  ls.as_language_code # "ruso"
341
413
  ```
342
414
 
@@ -344,10 +416,10 @@ In addition to translating language codes, TwitterCLDR provides access to the fu
344
416
 
345
417
  ```ruby
346
418
  # get all languages for the default locale
347
- TwitterCldr::Shared::Languages.all # { ... :"zh-Hant" => "Traditional Chinese", :vi => "Vietnamese" ... }
419
+ TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Chinese" ... }
348
420
 
349
421
  # get all languages for a specific locale
350
- TwitterCldr::Shared::Languages.all_for(:es) # { ... :"zh-Hant" => "chino tradicional", :vi => "vietnamita" ... }
422
+ TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino tradicional" ... }
351
423
 
352
424
  # get a language by its code for the default locale
353
425
  TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Chinese"
@@ -383,7 +455,7 @@ TwitterCldr::Shared::PostalCodes.valid?(:ca, "V3H 1Z7") # true
383
455
  Get a list of supported territories by using the `#territories` method:
384
456
 
385
457
  ```ruby
386
- TwitterCldr::Shared::PostalCodes.territories # [:ve, :iq, :cx, :cv, ...]
458
+ TwitterCldr::Shared::PostalCodes.territories # [:ad, :am, :ar, :as, :at, ... ]
387
459
  ```
388
460
 
389
461
  Just want the regex? No problem:
@@ -413,12 +485,12 @@ TwitterCldr::Shared::PhoneCodes.code_for_territory(:dk) # "45"
413
485
  Get a list of supported territories by using the `#territories` method:
414
486
 
415
487
  ```ruby
416
- TwitterCldr::Shared::PhoneCodes.territories # [:zw, :an, :tr, :by, :mh, ...]
488
+ TwitterCldr::Shared::PhoneCodes.territories # [:ac, :ad, :ae, :af, :ag, ... ]
417
489
  ```
418
490
 
419
491
  ### Language Codes
420
492
 
421
- Over the years, different standards for language codes have accumulated. Probably the two most popular are ISO-639 and BCP-47 and their children. TwitterCLDR provides a way to convert between these codes systematically.
493
+ Over the years, different standards for language codes have accumulated. Probably the two most popular are ISO-639 and BCP-47 and their children. TwitterCLDR provides a way to convert between these codes programmatically.
422
494
 
423
495
  ```ruby
424
496
  TwitterCldr::Shared::LanguageCodes.convert(:es, :from => :bcp_47, :to => :iso_639_2) # :spa
@@ -441,7 +513,7 @@ TwitterCldr::Shared::LanguageCodes.standards_for_language(:Spanish)
441
513
  Get a list of supported languages:
442
514
 
443
515
  ```ruby
444
- TwitterCldr::Shared::LanguageCodes.languages # [:Spanish, :German, :Norwegian, :Arabic ... ]
516
+ TwitterCldr::Shared::LanguageCodes.languages # [:Arabic, :German, :Norwegian, :Spanish, ... ]
445
517
  ```
446
518
 
447
519
  Determine valid standards:
@@ -472,6 +544,84 @@ TwitterCldr::Shared::LanguageCodes.to_language(:spa, :iso_639_2) # "Spanish"
472
544
 
473
545
  **NOTE**: All of the functions in `TwitterCldr::Shared::LanguageCodes` accept both symbol and string parameters.
474
546
 
547
+ ### Unicode Regular Expressions
548
+
549
+ Unicode regular expressions are an extension of the normal regular expression syntax. All of the changes are local to the regex's character class feature and provide support for multi-character strings, Unicode character escapes, set operations (unions, intersections, and differences), and character sets.
550
+
551
+ #### Changes to Character Classes
552
+
553
+ Here's a complete list of the operations you can do inside a Unicode regex's character class.
554
+
555
+ | Regex | Description |
556
+ |:-------------------|:--------------------------------------------------------------------------------------------------------------------|
557
+ |`[a]` | The set containing 'a'. |
558
+ |`[a-z]` | The set containing 'a' through 'z' and all letters in between, in Unicode order. |
559
+ |`[^a-z]` | The set containing all characters except 'a' through 'z', that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF. |
560
+ |`[[pat1][pat2]]` | The union of sets specified by pat1 and pat2. |
561
+ |`[[pat1]&[pat2]]` | The intersection of sets specified by pat1 and pat2. |
562
+ |`[[pat1]-[pat2]]` | The [symmetric difference](http://en.wikipedia.org/wiki/Symmetric_difference) of sets specified by pat1 and pat2. |
563
+ |`[:Lu:] or \p{Lu}` | The set of characters having the specified Unicode property; in this case, Unicode uppercase letters. |
564
+ |`[:^Lu:] or \P{Lu}` | The set of characters not having the given Unicode property. |
565
+
566
+ For a description of available Unicode properties, see [Wikipedia](http://en.wikipedia.org/wiki/Unicode_character_property#General_Category) (click on "[show]").
567
+
568
+ #### Using Unicode Regexes
569
+
570
+ Create Unicode regular expressions via the `#compile` method:
571
+
572
+ ```ruby
573
+
574
+ regex = TwitterCldr::Shared::UnicodeRegex.compile("[:Lu:]+")
575
+ ```
576
+
577
+ Once compiled, instances of `UnicodeRegex` behave just like normal Ruby regexes and support the `#match` and `#=~` methods:
578
+
579
+ ```ruby
580
+
581
+ regex.match("ABC") # <MatchData "ABC">
582
+ regex =~ "fooABC" # 3
583
+ ```
584
+
585
+ Protip: Try to avoid negation in character classes (eg. [^abc] and \P{Lu}) as it tends to negatively affect both performance when constructing regexes as well as matching.
586
+
587
+ #### Support for Ruby 1.8
588
+
589
+ Ruby 1.8 does not allow escaped Unicode characters in regular expressions and restricts their maximum length. TwitterCLDR's `UnicodeRegex` class supports escaped unicode characters in Ruby 1.8, but cannot offer a work-around for the length issue. For this reason, Ruby 1.8 users are required to install the oniguruma regex engine and require the oniguruma gem in their projects.
590
+
591
+ To install oniguruma, run `brew install oniguruma` on MacOS, `[sudo] apt-get install libonig-dev` on Ubuntu (you may need to search for other instructions specific to your platform). Then, install the oniguruma gem via your Gemfile or on your system via `gem install oniguruma`. Once installed, `require oniguruma` somewhere in your project before making use of the `TwitterCldr::Shared::UnicodeRegex` class.
592
+
593
+ ### Text Segmentation
594
+
595
+ TwitterCLDR currently supports text segmentation by sentence as described in the [Unicode Technical Report #29](http://www.unicode.org/reports/tr29/). The segmentation algorithm makes use of Unicode regular expressions (described above). Because of this, if you're running Ruby 1.8, you'll need to follow the instructions above to install the oniguruma regular expression engine. Segmentation by word, line, and grapheme boundaries could also be supported if someone wants them.
596
+
597
+ You can break a string into sentences using the `LocalizedString#each_sentence` method:
598
+
599
+ ```ruby
600
+ "The. Quick. Brown. Fox.".localize.each_sentence do |sentence|
601
+ puts sentence.to_s # "The.", " Quick.", " Brown.", " Fox."
602
+ end
603
+ ```
604
+
605
+ Under the hood, text segmentation is performed by the `BreakIterator` class (name borrowed from ICU). You can use it directly if you're feeling adventurous:
606
+
607
+ ```ruby
608
+
609
+ iterator = TwitterCldr::Shared::BreakIterator.new(:en)
610
+ iterator.each_sentence("The. Quick. Brown. Fox.") do |sentence|
611
+ puts sentence # "The.", " Quick.", " Brown.", " Fox."
612
+ end
613
+ ```
614
+
615
+ To improve segmentation accuracy, a list of special segmentation exceptions have been created by the ULI (Unicode Interoperability Technical Committee, yikes what a mouthful). They help with special cases like the abbreviations "Mr." and "Ms." where breaks should not occur. ULI rules are enabled by default, but you can disable them via the `:use_uli_exceptions` option:
616
+
617
+ ```ruby
618
+
619
+ iterator = TwitterCldr::Shared::BreakIterator.new(:en, :use_uli_exceptions => false)
620
+ iterator.each_sentence("I like Ms. Murphy, she's nice.") do |sentence|
621
+ puts sentence # "I like Ms.", " Murphy, she's nice."
622
+ end
623
+ ```
624
+
475
625
  ### Unicode Data
476
626
 
477
627
  TwitterCLDR provides ways to retrieve individual code points as well as normalize and decompose Unicode text.
@@ -479,6 +629,7 @@ TwitterCLDR provides ways to retrieve individual code points as well as normaliz
479
629
  Retrieve data for code points:
480
630
 
481
631
  ```ruby
632
+
482
633
  code_point = TwitterCldr::Shared::CodePoint.find(0x1F3E9)
483
634
  code_point.name # "LOVE HOTEL"
484
635
  code_point.bidi_mirrored # "N"
@@ -489,7 +640,7 @@ code_point.combining_class # "0"
489
640
  Convert characters to code points:
490
641
 
491
642
  ```ruby
492
- TwitterCldr::Utils::CodePoints.from_string("¿") # [0xBF]
643
+ TwitterCldr::Utils::CodePoints.from_string("¿") # [191]
493
644
  ```
494
645
 
495
646
  Convert code points to characters:
@@ -498,10 +649,12 @@ Convert code points to characters:
498
649
  TwitterCldr::Utils::CodePoints.to_string([0xBF]) # "¿"
499
650
  ```
500
651
 
652
+ #### Normalization
653
+
501
654
  Normalize/decompose a Unicode string (NFD, NFKD, NFC, and NFKC implementations available). Note that the normalized string will almost always look the same as the original string because most character display systems automatically combine decomposed characters.
502
655
 
503
656
  ```ruby
504
- TwitterCldr::Normalization::NFD.normalize("français") # "français"
657
+ TwitterCldr::Normalization.normalize("français") # "français"
505
658
  ```
506
659
 
507
660
  Normalization is easier to see in hex:
@@ -511,7 +664,7 @@ Normalization is easier to see in hex:
511
664
  TwitterCldr::Utils::CodePoints.from_string("español")
512
665
 
513
666
  # [101, 115, 112, 97, 110, 771, 111, 108]
514
- TwitterCldr::Utils::CodePoints.from_string(TwitterCldr::Normalization::NFD.normalize("español"))
667
+ TwitterCldr::Utils::CodePoints.from_string(TwitterCldr::Normalization.normalize("español"))
515
668
  ```
516
669
 
517
670
  Notice in the example above that the letter "ñ" was transformed from `241` to `110 771`, which represent the "n" and the "˜" respectively.
@@ -533,6 +686,22 @@ Specify a specific normalization algorithm via the `:using` option. NFD, NFKD,
533
686
  "español".localize.normalize(:using => :NFKD).code_points
534
687
  ```
535
688
 
689
+ #### Casefolding
690
+
691
+ Casefolding is, generally speaking, the process of converting uppercase characters to lowercase ones so as to make text uniform and therefore easier to search. The canonical example of this is the German double "s". The "ß" character is transformed into "ss" by casefolding.
692
+
693
+ ```ruby
694
+ "Hello, World".localize.casefold.to_s # hello, world
695
+ "Weißrussland".localize.casefold.to_s # weissrussland
696
+ ```
697
+
698
+ Turkic languages make use of the regular and dotted uppercase i characters "I" and "İ". Normal casefolding will convert a dotless uppercase "I" to a lowercase, dotted "i", which is correct in English. Turkic languages however expect the lowercase version of a dotless uppercase "I" to be a lowercase, dotless "ı". Pass the `:t` option to the `casefold` method to force Turkic treatment of "i" characters. By default, the `:t` option is set to true for Turkish and Azerbaijani:
699
+
700
+ ```ruby
701
+ "Istanbul".localize.casefold(:t => true).to_s # ıstanbul
702
+ "Istanbul".localize(:tr).casefold.to_s # ıstanbul
703
+ ```
704
+
536
705
  ### Sorting (Collation)
537
706
 
538
707
  TwitterCLDR contains an implementation of the [Unicode Collation Algorithm (UCA)](http://unicode.org/reports/tr10/) that provides language-sensitive text sorting capabilities. Conveniently, all you have to do is use the `sort` method in combination with the familiar `localize` method. Notice the difference between the default Ruby sort, which simply compares bytes, and the proper language-aware sort from TwitterCLDR in this German example:
@@ -545,6 +714,7 @@ TwitterCLDR contains an implementation of the [Unicode Collation Algorithm (UCA)
545
714
  Behind the scenes, these convenience methods are creating instances of `LocalizedArray`, then using the `TwitterCldr::Collation::Collator` class to sort the elements:
546
715
 
547
716
  ```ruby
717
+
548
718
  collator = TwitterCldr::Collation::Collator.new(:de)
549
719
  collator.sort(["Art", "Wasa", "Älg", "Ved"]) # ["Älg", "Art", "Ved", "Wasa"]
550
720
  collator.sort!(["Art", "Wasa", "Älg", "Ved"]) # ["Älg", "Art", "Ved", "Wasa"]
@@ -553,6 +723,7 @@ collator.sort!(["Art", "Wasa", "Älg", "Ved"]) # ["Älg", "Art", "Ved", "Was
553
723
  The `TwitterCldr::Collation::Collator` class also provides methods to compare two strings, get sort keys, and calculate collation elements for individual strings:
554
724
 
555
725
  ```ruby
726
+
556
727
  collator = TwitterCldr::Collation::Collator.new(:de)
557
728
  collator.compare("Art", "Älg") # 1
558
729
  collator.compare("Älg", "Art") # -1
@@ -572,6 +743,7 @@ When it comes to displaying text written in both right-to-left (RTL) and left-to
572
743
  To mitigate this problem, Unicode supports special invisible characters that force visual reordering so that mixed RTL and LTR (called "bidirectional") text renders naturally on the screen. The Unicode Consortium has developed an algorithm (The Unicode Bidirectional Algorithm, or UBA) that intelligently inserts these control characters where appropriate. You can make use of the UBA implementation in TwitterCLDR by creating a new instance of `TwitterCldr::Shared::Bidi` using the `from_string` static method, and manipulating it like so:
573
744
 
574
745
  ```ruby
746
+
575
747
  bidi = TwitterCldr::Shared::Bidi.from_string("hello نزوة world", :direction => :RTL)
576
748
  bidi.reorder_visually!
577
749
  bidi.to_s
@@ -586,17 +758,17 @@ Ruby 1.8 does not come with great Unicode support, and nowhere is this more appa
586
758
  You can make use of TwitterCLDR's YAML dumper by calling `localize` and then `to_yaml` on an `Array`, `Hash`, or `String`:
587
759
 
588
760
  ```ruby
589
- { :hello => "world" }.localize.to_yaml
590
- ["hello", "world"].localize.to_yaml
591
- "hello, world".localize.to_yaml
761
+ { :hello => "world" }.localize.to_yaml
762
+ ["hello", "world"].localize.to_yaml
763
+ "hello, world".localize.to_yaml
592
764
  ```
593
765
 
594
766
  Behind the scenes, these convenience methods are using the `TwitterCldr::Shared::YAML` class. You can do the same thing if you're feeling adventurous:
595
767
 
596
768
  ```ruby
597
- TwitterCldr::Shared::YAML.dump({ :hello => "world" })
598
- TwitterCldr::Shared::YAML.dump(["hello", "world"])
599
- TwitterCldr::Shared::YAML.dump("hello, world")
769
+ TwitterCldr::Shared::YAML.dump({ :hello => "world" })
770
+ TwitterCldr::Shared::YAML.dump(["hello", "world"])
771
+ TwitterCldr::Shared::YAML.dump("hello, world")
600
772
  ```
601
773
 
602
774
  ## About Twitter-specific Locales
@@ -606,8 +778,8 @@ Twitter tries to always use BCP-47 language codes. Data from the CLDR doesn't a
606
778
  ```ruby
607
779
  TwitterCldr.convert_locale(:'zh-cn') # :zh
608
780
  TwitterCldr.convert_locale(:zh) # :zh
609
- TwitterCldr.convert_locale(:'zh-tw') # :'zh-Hant'
610
- TwitterCldr.convert_locale(:'zh-Hant') # :'zh-Hant'
781
+ TwitterCldr.convert_locale(:'zh-tw') # :"zh-Hant"
782
+ TwitterCldr.convert_locale(:'zh-Hant') # :"zh-Hant"
611
783
 
612
784
  TwitterCldr.convert_locale(:msa) # :ms
613
785
  TwitterCldr.convert_locale(:ms) # :ms
@@ -621,7 +793,19 @@ TwitterCldr.get_locale # will return :en
621
793
  require 'fast_gettext'
622
794
  FastGettext.locale = "ru"
623
795
 
624
- TwitterCldr.get_locale # will return :ru
796
+ TwitterCldr.locale # will return :ru
797
+ ```
798
+
799
+ ## Compatibility
800
+
801
+ TwitterCLDR is fully compatible with Ruby 1.8.7, 1.9.3, 2.0.0, and, experimentally, 2.1.0. We are considering dropping support for Ruby 1.8. If you still need to use TwitterCLDR in a Ruby 1.8 environment, please let us know as soon as possible. Please note that certain TwitterCLDR features require additional dependencies or considerations when run on Ruby 1.8. Refer to the sections above for details.
802
+
803
+ #### Notes on Ruby 1.8
804
+
805
+ Numerous TwitterCLDR features have been built with the assumption that they will only ever be used on UTF-8 encoded text, which is mostly due to the need to support Ruby 1.8. For this reason, you may find it necessary to set the global `$KCODE` variable to `"UTF-8"`. Setting this variable tells Ruby what encoding to use when loading source files. TwitterCLDR will **not** set this value for you.
806
+
807
+ ```ruby
808
+ $KCODE = "UTF-8"
625
809
  ```
626
810
 
627
811
  ## Requirements
@@ -655,6 +839,6 @@ TwitterCLDR currently supports localization of certain textual objects in JavaSc
655
839
 
656
840
  ## License
657
841
 
658
- Copyright 2012 Twitter, Inc.
842
+ Copyright 2014 Twitter, Inc.
659
843
 
660
844
  Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0