twitter_cldr 3.0.0.beta1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (698) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +16 -2
  3. data/History.txt +9 -1
  4. data/README.md +297 -113
  5. data/Rakefile +97 -17
  6. data/lib/twitter_cldr/collation/collator.rb +12 -3
  7. data/lib/twitter_cldr/collation/trie_builder.rb +1 -1
  8. data/lib/twitter_cldr/{tokenizers/calendars → data_readers}/additional_date_format_selector.rb +6 -4
  9. data/lib/twitter_cldr/data_readers/calendar_data_reader.rb +91 -0
  10. data/lib/twitter_cldr/data_readers/data_reader.rb +32 -0
  11. data/lib/twitter_cldr/data_readers/date_data_reader.rb +26 -0
  12. data/lib/twitter_cldr/data_readers/date_time_data_reader.rb +41 -0
  13. data/lib/twitter_cldr/data_readers/number_data_reader.rb +142 -0
  14. data/lib/twitter_cldr/data_readers/time_data_reader.rb +26 -0
  15. data/lib/twitter_cldr/data_readers/timespan_data_reader.rb +122 -0
  16. data/lib/twitter_cldr/data_readers.rb +17 -0
  17. data/lib/twitter_cldr/formatters/calendars/{datetime_formatter.rb → date_time_formatter.rb} +27 -42
  18. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +3 -64
  19. data/lib/twitter_cldr/formatters/formatter.rb +39 -0
  20. data/lib/twitter_cldr/formatters/list_formatter.rb +22 -12
  21. data/lib/twitter_cldr/formatters/numbers/abbreviated/abbreviated_number_formatter.rb +5 -26
  22. data/lib/twitter_cldr/formatters/numbers/currency_formatter.rb +2 -11
  23. data/lib/twitter_cldr/formatters/numbers/decimal_formatter.rb +4 -2
  24. data/lib/twitter_cldr/formatters/numbers/number_formatter.rb +45 -27
  25. data/lib/twitter_cldr/formatters/numbers/percent_formatter.rb +3 -13
  26. data/lib/twitter_cldr/formatters/numbers/rbnf/formatters.rb +224 -0
  27. data/lib/twitter_cldr/formatters/numbers/rbnf/post_processors/chinese.rb +122 -0
  28. data/lib/twitter_cldr/formatters/numbers/rbnf/rule.rb +93 -0
  29. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_group.rb +20 -0
  30. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_parser.rb +86 -0
  31. data/lib/twitter_cldr/formatters/numbers/rbnf/rule_set.rb +259 -0
  32. data/lib/twitter_cldr/formatters/numbers/rbnf/substitution.rb +30 -0
  33. data/lib/twitter_cldr/formatters/numbers/rbnf.rb +127 -0
  34. data/lib/twitter_cldr/formatters/plurals/plural_formatter.rb +18 -6
  35. data/lib/twitter_cldr/formatters.rb +4 -5
  36. data/lib/twitter_cldr/localized/localized_array.rb +1 -1
  37. data/lib/twitter_cldr/localized/localized_date.rb +6 -3
  38. data/lib/twitter_cldr/localized/localized_datetime.rb +38 -15
  39. data/lib/twitter_cldr/localized/localized_number.rb +40 -24
  40. data/lib/twitter_cldr/localized/localized_object.rb +4 -4
  41. data/lib/twitter_cldr/localized/localized_string.rb +40 -7
  42. data/lib/twitter_cldr/localized/localized_time.rb +9 -2
  43. data/lib/twitter_cldr/localized/localized_timespan.rb +50 -5
  44. data/lib/twitter_cldr/normalization.rb +8 -19
  45. data/lib/twitter_cldr/parsers/parser.rb +50 -0
  46. data/lib/twitter_cldr/parsers/segmentation_parser.rb +137 -0
  47. data/lib/twitter_cldr/parsers/symbol_table.rb +30 -0
  48. data/lib/twitter_cldr/parsers/unicode_regex/character_class.rb +91 -0
  49. data/lib/twitter_cldr/parsers/unicode_regex/character_range.rb +39 -0
  50. data/lib/twitter_cldr/parsers/unicode_regex/character_set.rb +65 -0
  51. data/lib/twitter_cldr/parsers/unicode_regex/component.rb +50 -0
  52. data/lib/twitter_cldr/parsers/unicode_regex/literal.rb +83 -0
  53. data/lib/twitter_cldr/parsers/unicode_regex/unicode_string.rb +41 -0
  54. data/lib/twitter_cldr/parsers/unicode_regex_parser.rb +262 -0
  55. data/lib/twitter_cldr/parsers.rb +5 -1
  56. data/lib/twitter_cldr/resources/casefolder.rb.erb +64 -0
  57. data/lib/twitter_cldr/resources/casefolder_class_generator.rb +75 -0
  58. data/lib/twitter_cldr/resources/download.rb +10 -4
  59. data/lib/twitter_cldr/resources/icu_based_importer.rb +18 -0
  60. data/lib/twitter_cldr/resources/locales_resources_importer.rb +24 -13
  61. data/lib/twitter_cldr/resources/normalization_quick_check_importer.rb +1 -14
  62. data/lib/twitter_cldr/resources/rbnf_test_importer.rb +107 -0
  63. data/lib/twitter_cldr/resources/readme_renderer.rb +115 -0
  64. data/lib/twitter_cldr/resources/tailoring_importer.rb +2 -8
  65. data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +62 -0
  66. data/lib/twitter_cldr/resources/uli.rb +12 -0
  67. data/lib/twitter_cldr/resources/unicode_data_importer.rb +84 -14
  68. data/lib/twitter_cldr/resources/unicode_importer.rb +37 -0
  69. data/lib/twitter_cldr/resources/unicode_properties_importer.rb +79 -0
  70. data/lib/twitter_cldr/resources.rb +8 -1
  71. data/lib/twitter_cldr/shared/break_iterator.rb +213 -0
  72. data/lib/twitter_cldr/shared/calendar.rb +38 -14
  73. data/lib/twitter_cldr/shared/casefolder.rb +210 -0
  74. data/lib/twitter_cldr/shared/code_point.rb +103 -16
  75. data/lib/twitter_cldr/shared/numbering_system.rb +58 -0
  76. data/lib/twitter_cldr/shared/territories.rb +43 -1
  77. data/lib/twitter_cldr/shared/unicode_regex.rb +81 -0
  78. data/lib/twitter_cldr/shared.rb +13 -9
  79. data/lib/twitter_cldr/tokenizers/calendars/date_time_tokenizer.rb +77 -0
  80. data/lib/twitter_cldr/tokenizers/calendars/date_tokenizer.rb +14 -29
  81. data/lib/twitter_cldr/tokenizers/calendars/time_tokenizer.rb +13 -28
  82. data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +11 -87
  83. data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +16 -71
  84. data/lib/twitter_cldr/tokenizers/numbers/rbnf_tokenizer.rb +53 -0
  85. data/lib/twitter_cldr/tokenizers/pattern_tokenizer.rb +42 -0
  86. data/lib/twitter_cldr/tokenizers/segmentation/segmentation_tokenizer.rb +39 -0
  87. data/lib/twitter_cldr/tokenizers/tokenizer.rb +116 -0
  88. data/lib/twitter_cldr/tokenizers/unicode_regex/unicode_regex_tokenizer.rb +52 -0
  89. data/lib/twitter_cldr/tokenizers.rb +8 -4
  90. data/lib/twitter_cldr/utils/code_points.rb +1 -1
  91. data/lib/twitter_cldr/utils/range_set.rb +242 -0
  92. data/lib/twitter_cldr/utils/yaml.rb +17 -12
  93. data/lib/twitter_cldr/utils.rb +1 -1
  94. data/lib/twitter_cldr/version.rb +1 -1
  95. data/lib/twitter_cldr.rb +2 -1
  96. data/resources/custom/locales/sv/units.yml +8 -0
  97. data/resources/locales/af/calendars.yml +278 -224
  98. data/resources/locales/af/currencies.yml +954 -916
  99. data/resources/locales/af/languages.yml +583 -580
  100. data/resources/locales/af/layout.yml +5 -5
  101. data/resources/locales/af/lists.yml +23 -7
  102. data/resources/locales/af/numbers.yml +59 -54
  103. data/resources/locales/af/plurals.yml +2 -2
  104. data/resources/locales/af/rbnf.yml +261 -0
  105. data/resources/locales/af/territories.yml +264 -263
  106. data/resources/locales/ar/calendars.yml +287 -259
  107. data/resources/locales/ar/currencies.yml +1730 -1692
  108. data/resources/locales/ar/languages.yml +583 -580
  109. data/resources/locales/ar/layout.yml +5 -5
  110. data/resources/locales/ar/lists.yml +23 -7
  111. data/resources/locales/ar/numbers.yml +66 -61
  112. data/resources/locales/ar/plurals.yml +5 -8
  113. data/resources/locales/ar/rbnf.yml +519 -0
  114. data/resources/locales/ar/territories.yml +264 -263
  115. data/resources/locales/be/calendars.yml +238 -237
  116. data/resources/locales/be/currencies.yml +954 -917
  117. data/resources/locales/be/languages.yml +583 -580
  118. data/resources/locales/be/layout.yml +5 -5
  119. data/resources/locales/be/lists.yml +23 -7
  120. data/resources/locales/be/numbers.yml +62 -57
  121. data/resources/locales/be/plurals.yml +7 -4
  122. data/resources/locales/be/rbnf.yml +1288 -0
  123. data/resources/locales/be/territories.yml +264 -263
  124. data/resources/locales/bg/calendars.yml +278 -218
  125. data/resources/locales/bg/currencies.yml +955 -917
  126. data/resources/locales/bg/languages.yml +583 -580
  127. data/resources/locales/bg/layout.yml +5 -5
  128. data/resources/locales/bg/lists.yml +23 -7
  129. data/resources/locales/bg/numbers.yml +62 -57
  130. data/resources/locales/bg/plurals.yml +2 -2
  131. data/resources/locales/bg/rbnf.yml +280 -0
  132. data/resources/locales/bg/territories.yml +264 -263
  133. data/resources/locales/bn/calendars.yml +287 -225
  134. data/resources/locales/bn/currencies.yml +953 -916
  135. data/resources/locales/bn/languages.yml +583 -580
  136. data/resources/locales/bn/layout.yml +5 -5
  137. data/resources/locales/bn/lists.yml +23 -7
  138. data/resources/locales/bn/numbers.yml +62 -57
  139. data/resources/locales/bn/plurals.yml +2 -2
  140. data/resources/locales/bn/rbnf.yml +4 -0
  141. data/resources/locales/bn/territories.yml +264 -263
  142. data/resources/locales/ca/calendars.yml +278 -278
  143. data/resources/locales/ca/currencies.yml +953 -916
  144. data/resources/locales/ca/languages.yml +583 -580
  145. data/resources/locales/ca/layout.yml +5 -5
  146. data/resources/locales/ca/lists.yml +23 -7
  147. data/resources/locales/ca/numbers.yml +62 -57
  148. data/resources/locales/ca/plurals.yml +3 -2
  149. data/resources/locales/ca/rbnf.yml +756 -0
  150. data/resources/locales/ca/territories.yml +264 -263
  151. data/resources/locales/cs/calendars.yml +269 -262
  152. data/resources/locales/cs/currencies.yml +1483 -1172
  153. data/resources/locales/cs/languages.yml +583 -580
  154. data/resources/locales/cs/layout.yml +5 -5
  155. data/resources/locales/cs/lists.yml +23 -7
  156. data/resources/locales/cs/numbers.yml +64 -58
  157. data/resources/locales/cs/plurals.yml +6 -2
  158. data/resources/locales/cs/rbnf.yml +367 -0
  159. data/resources/locales/cs/territories.yml +264 -263
  160. data/resources/locales/cy/calendars.yml +275 -274
  161. data/resources/locales/cy/currencies.yml +1585 -1548
  162. data/resources/locales/cy/languages.yml +583 -580
  163. data/resources/locales/cy/layout.yml +5 -5
  164. data/resources/locales/cy/lists.yml +23 -7
  165. data/resources/locales/cy/numbers.yml +66 -61
  166. data/resources/locales/cy/plurals.yml +4 -3
  167. data/resources/locales/cy/rbnf.yml +298 -0
  168. data/resources/locales/cy/territories.yml +264 -263
  169. data/resources/locales/da/calendars.yml +281 -280
  170. data/resources/locales/da/currencies.yml +954 -916
  171. data/resources/locales/da/languages.yml +583 -580
  172. data/resources/locales/da/layout.yml +5 -5
  173. data/resources/locales/da/lists.yml +23 -7
  174. data/resources/locales/da/numbers.yml +62 -57
  175. data/resources/locales/da/plurals.yml +3 -2
  176. data/resources/locales/da/rbnf.yml +194 -0
  177. data/resources/locales/da/territories.yml +264 -263
  178. data/resources/locales/de/calendars.yml +294 -293
  179. data/resources/locales/de/currencies.yml +954 -916
  180. data/resources/locales/de/languages.yml +583 -580
  181. data/resources/locales/de/layout.yml +5 -5
  182. data/resources/locales/de/lists.yml +23 -7
  183. data/resources/locales/de/numbers.yml +62 -57
  184. data/resources/locales/de/plurals.yml +3 -2
  185. data/resources/locales/de/rbnf.yml +346 -0
  186. data/resources/locales/de/territories.yml +264 -263
  187. data/resources/locales/el/calendars.yml +279 -282
  188. data/resources/locales/el/currencies.yml +954 -916
  189. data/resources/locales/el/languages.yml +583 -580
  190. data/resources/locales/el/layout.yml +5 -5
  191. data/resources/locales/el/lists.yml +23 -7
  192. data/resources/locales/el/numbers.yml +62 -57
  193. data/resources/locales/el/plurals.yml +2 -2
  194. data/resources/locales/el/rbnf.yml +880 -0
  195. data/resources/locales/el/territories.yml +264 -263
  196. data/resources/locales/en/calendars.yml +192 -191
  197. data/resources/locales/en/currencies.yml +953 -915
  198. data/resources/locales/en/languages.yml +583 -580
  199. data/resources/locales/en/layout.yml +5 -5
  200. data/resources/locales/en/lists.yml +23 -7
  201. data/resources/locales/en/numbers.yml +62 -57
  202. data/resources/locales/en/plurals.yml +3 -2
  203. data/resources/locales/en/rbnf.yml +542 -0
  204. data/resources/locales/en/territories.yml +264 -263
  205. data/resources/locales/en-GB/calendars.yml +195 -194
  206. data/resources/locales/en-GB/currencies.yml +953 -915
  207. data/resources/locales/en-GB/languages.yml +583 -580
  208. data/resources/locales/en-GB/layout.yml +5 -5
  209. data/resources/locales/en-GB/lists.yml +23 -7
  210. data/resources/locales/en-GB/numbers.yml +62 -57
  211. data/resources/locales/en-GB/plurals.yml +2 -1
  212. data/resources/locales/en-GB/rbnf.yml +4 -0
  213. data/resources/locales/en-GB/territories.yml +264 -263
  214. data/resources/locales/es/calendars.yml +288 -238
  215. data/resources/locales/es/currencies.yml +953 -922
  216. data/resources/locales/es/languages.yml +583 -580
  217. data/resources/locales/es/layout.yml +5 -5
  218. data/resources/locales/es/lists.yml +23 -7
  219. data/resources/locales/es/numbers.yml +62 -57
  220. data/resources/locales/es/plurals.yml +2 -2
  221. data/resources/locales/es/rbnf.yml +913 -0
  222. data/resources/locales/es/territories.yml +264 -263
  223. data/resources/locales/eu/calendars.yml +277 -218
  224. data/resources/locales/eu/currencies.yml +953 -916
  225. data/resources/locales/eu/languages.yml +583 -580
  226. data/resources/locales/eu/layout.yml +5 -5
  227. data/resources/locales/eu/lists.yml +23 -7
  228. data/resources/locales/eu/numbers.yml +56 -51
  229. data/resources/locales/eu/plurals.yml +2 -2
  230. data/resources/locales/eu/rbnf.yml +4 -0
  231. data/resources/locales/eu/territories.yml +264 -263
  232. data/resources/locales/fa/calendars.yml +294 -293
  233. data/resources/locales/fa/currencies.yml +955 -916
  234. data/resources/locales/fa/languages.yml +583 -580
  235. data/resources/locales/fa/layout.yml +5 -5
  236. data/resources/locales/fa/lists.yml +23 -7
  237. data/resources/locales/fa/numbers.yml +62 -57
  238. data/resources/locales/fa/plurals.yml +2 -2
  239. data/resources/locales/fa/rbnf.yml +157 -0
  240. data/resources/locales/fa/territories.yml +264 -263
  241. data/resources/locales/fi/calendars.yml +284 -283
  242. data/resources/locales/fi/currencies.yml +953 -915
  243. data/resources/locales/fi/languages.yml +583 -580
  244. data/resources/locales/fi/layout.yml +5 -5
  245. data/resources/locales/fi/lists.yml +23 -7
  246. data/resources/locales/fi/numbers.yml +62 -57
  247. data/resources/locales/fi/plurals.yml +3 -2
  248. data/resources/locales/fi/rbnf.yml +206 -0
  249. data/resources/locales/fi/territories.yml +264 -263
  250. data/resources/locales/fil/calendars.yml +281 -230
  251. data/resources/locales/fil/currencies.yml +953 -916
  252. data/resources/locales/fil/languages.yml +583 -580
  253. data/resources/locales/fil/layout.yml +5 -5
  254. data/resources/locales/fil/lists.yml +23 -7
  255. data/resources/locales/fil/numbers.yml +62 -57
  256. data/resources/locales/fil/plurals.yml +3 -2
  257. data/resources/locales/fil/rbnf.yml +158 -0
  258. data/resources/locales/fil/territories.yml +264 -263
  259. data/resources/locales/fr/calendars.yml +297 -296
  260. data/resources/locales/fr/currencies.yml +968 -949
  261. data/resources/locales/fr/languages.yml +583 -580
  262. data/resources/locales/fr/layout.yml +5 -5
  263. data/resources/locales/fr/lists.yml +23 -7
  264. data/resources/locales/fr/numbers.yml +62 -57
  265. data/resources/locales/fr/plurals.yml +2 -2
  266. data/resources/locales/fr/rbnf.yml +621 -0
  267. data/resources/locales/fr/territories.yml +264 -263
  268. data/resources/locales/ga/calendars.yml +192 -191
  269. data/resources/locales/ga/currencies.yml +954 -916
  270. data/resources/locales/ga/languages.yml +583 -580
  271. data/resources/locales/ga/layout.yml +5 -5
  272. data/resources/locales/ga/lists.yml +23 -7
  273. data/resources/locales/ga/numbers.yml +62 -57
  274. data/resources/locales/ga/plurals.yml +4 -3
  275. data/resources/locales/ga/rbnf.yml +615 -0
  276. data/resources/locales/ga/territories.yml +264 -263
  277. data/resources/locales/gl/calendars.yml +283 -217
  278. data/resources/locales/gl/currencies.yml +953 -916
  279. data/resources/locales/gl/languages.yml +583 -580
  280. data/resources/locales/gl/layout.yml +5 -5
  281. data/resources/locales/gl/lists.yml +23 -7
  282. data/resources/locales/gl/numbers.yml +62 -57
  283. data/resources/locales/gl/plurals.yml +3 -2
  284. data/resources/locales/gl/rbnf.yml +4 -0
  285. data/resources/locales/gl/territories.yml +264 -263
  286. data/resources/locales/he/calendars.yml +248 -220
  287. data/resources/locales/he/currencies.yml +992 -932
  288. data/resources/locales/he/languages.yml +583 -580
  289. data/resources/locales/he/layout.yml +5 -5
  290. data/resources/locales/he/lists.yml +23 -7
  291. data/resources/locales/he/numbers.yml +64 -59
  292. data/resources/locales/he/plurals.yml +6 -3
  293. data/resources/locales/he/rbnf.yml +1029 -0
  294. data/resources/locales/he/territories.yml +264 -263
  295. data/resources/locales/hi/calendars.yml +284 -216
  296. data/resources/locales/hi/currencies.yml +953 -915
  297. data/resources/locales/hi/languages.yml +583 -580
  298. data/resources/locales/hi/layout.yml +5 -5
  299. data/resources/locales/hi/lists.yml +23 -7
  300. data/resources/locales/hi/numbers.yml +60 -55
  301. data/resources/locales/hi/plurals.yml +2 -2
  302. data/resources/locales/hi/rbnf.yml +430 -0
  303. data/resources/locales/hi/territories.yml +264 -263
  304. data/resources/locales/hr/calendars.yml +308 -307
  305. data/resources/locales/hr/currencies.yml +1248 -1504
  306. data/resources/locales/hr/languages.yml +583 -580
  307. data/resources/locales/hr/layout.yml +5 -5
  308. data/resources/locales/hr/lists.yml +23 -7
  309. data/resources/locales/hr/numbers.yml +63 -59
  310. data/resources/locales/hr/plurals.yml +12 -4
  311. data/resources/locales/hr/rbnf.yml +599 -0
  312. data/resources/locales/hr/territories.yml +264 -263
  313. data/resources/locales/hu/calendars.yml +285 -284
  314. data/resources/locales/hu/currencies.yml +954 -916
  315. data/resources/locales/hu/languages.yml +583 -580
  316. data/resources/locales/hu/layout.yml +5 -5
  317. data/resources/locales/hu/lists.yml +23 -7
  318. data/resources/locales/hu/numbers.yml +62 -57
  319. data/resources/locales/hu/plurals.yml +2 -2
  320. data/resources/locales/hu/rbnf.yml +363 -0
  321. data/resources/locales/hu/territories.yml +264 -263
  322. data/resources/locales/id/calendars.yml +276 -275
  323. data/resources/locales/id/currencies.yml +954 -916
  324. data/resources/locales/id/languages.yml +583 -580
  325. data/resources/locales/id/layout.yml +5 -5
  326. data/resources/locales/id/lists.yml +23 -7
  327. data/resources/locales/id/numbers.yml +61 -56
  328. data/resources/locales/id/plurals.yml +2 -2
  329. data/resources/locales/id/rbnf.yml +121 -0
  330. data/resources/locales/id/territories.yml +264 -263
  331. data/resources/locales/is/calendars.yml +281 -242
  332. data/resources/locales/is/currencies.yml +954 -916
  333. data/resources/locales/is/languages.yml +583 -580
  334. data/resources/locales/is/layout.yml +5 -5
  335. data/resources/locales/is/lists.yml +23 -7
  336. data/resources/locales/is/numbers.yml +62 -57
  337. data/resources/locales/is/plurals.yml +5 -2
  338. data/resources/locales/is/rbnf.yml +326 -0
  339. data/resources/locales/is/territories.yml +264 -263
  340. data/resources/locales/it/calendars.yml +275 -260
  341. data/resources/locales/it/currencies.yml +953 -920
  342. data/resources/locales/it/languages.yml +583 -580
  343. data/resources/locales/it/layout.yml +5 -5
  344. data/resources/locales/it/lists.yml +23 -7
  345. data/resources/locales/it/numbers.yml +59 -54
  346. data/resources/locales/it/plurals.yml +3 -2
  347. data/resources/locales/it/rbnf.yml +1189 -0
  348. data/resources/locales/it/territories.yml +264 -263
  349. data/resources/locales/ja/calendars.yml +269 -207
  350. data/resources/locales/ja/currencies.yml +953 -915
  351. data/resources/locales/ja/languages.yml +583 -580
  352. data/resources/locales/ja/layout.yml +5 -5
  353. data/resources/locales/ja/lists.yml +23 -7
  354. data/resources/locales/ja/numbers.yml +62 -57
  355. data/resources/locales/ja/plurals.yml +2 -2
  356. data/resources/locales/ja/rbnf.yml +209 -0
  357. data/resources/locales/ja/territories.yml +264 -263
  358. data/resources/locales/ko/calendars.yml +246 -213
  359. data/resources/locales/ko/currencies.yml +953 -915
  360. data/resources/locales/ko/languages.yml +583 -580
  361. data/resources/locales/ko/layout.yml +5 -5
  362. data/resources/locales/ko/lists.yml +23 -7
  363. data/resources/locales/ko/numbers.yml +60 -55
  364. data/resources/locales/ko/plurals.yml +2 -2
  365. data/resources/locales/ko/rbnf.yml +722 -0
  366. data/resources/locales/ko/territories.yml +264 -263
  367. data/resources/locales/lv/calendars.yml +286 -285
  368. data/resources/locales/lv/currencies.yml +1122 -1084
  369. data/resources/locales/lv/languages.yml +583 -580
  370. data/resources/locales/lv/layout.yml +5 -5
  371. data/resources/locales/lv/lists.yml +23 -7
  372. data/resources/locales/lv/numbers.yml +63 -58
  373. data/resources/locales/lv/plurals.yml +11 -3
  374. data/resources/locales/lv/rbnf.yml +238 -0
  375. data/resources/locales/lv/territories.yml +264 -263
  376. data/resources/locales/ms/calendars.yml +280 -279
  377. data/resources/locales/ms/currencies.yml +954 -916
  378. data/resources/locales/ms/languages.yml +583 -580
  379. data/resources/locales/ms/layout.yml +5 -5
  380. data/resources/locales/ms/lists.yml +23 -7
  381. data/resources/locales/ms/numbers.yml +62 -57
  382. data/resources/locales/ms/plurals.yml +2 -2
  383. data/resources/locales/ms/rbnf.yml +130 -0
  384. data/resources/locales/ms/territories.yml +264 -263
  385. data/resources/locales/nb/calendars.yml +284 -283
  386. data/resources/locales/nb/currencies.yml +958 -916
  387. data/resources/locales/nb/languages.yml +583 -580
  388. data/resources/locales/nb/layout.yml +5 -5
  389. data/resources/locales/nb/lists.yml +23 -7
  390. data/resources/locales/nb/numbers.yml +62 -57
  391. data/resources/locales/nb/plurals.yml +2 -2
  392. data/resources/locales/nb/rbnf.yml +191 -0
  393. data/resources/locales/nb/territories.yml +264 -263
  394. data/resources/locales/nl/calendars.yml +285 -284
  395. data/resources/locales/nl/currencies.yml +953 -917
  396. data/resources/locales/nl/languages.yml +583 -580
  397. data/resources/locales/nl/layout.yml +5 -5
  398. data/resources/locales/nl/lists.yml +23 -7
  399. data/resources/locales/nl/numbers.yml +62 -57
  400. data/resources/locales/nl/plurals.yml +3 -2
  401. data/resources/locales/nl/rbnf.yml +320 -0
  402. data/resources/locales/nl/territories.yml +264 -263
  403. data/resources/locales/pl/calendars.yml +288 -287
  404. data/resources/locales/pl/currencies.yml +1326 -1284
  405. data/resources/locales/pl/languages.yml +583 -580
  406. data/resources/locales/pl/layout.yml +5 -5
  407. data/resources/locales/pl/lists.yml +23 -7
  408. data/resources/locales/pl/numbers.yml +64 -59
  409. data/resources/locales/pl/plurals.yml +11 -4
  410. data/resources/locales/pl/rbnf.yml +410 -0
  411. data/resources/locales/pl/territories.yml +264 -263
  412. data/resources/locales/pt/calendars.yml +290 -289
  413. data/resources/locales/pt/currencies.yml +954 -916
  414. data/resources/locales/pt/languages.yml +583 -580
  415. data/resources/locales/pt/layout.yml +5 -5
  416. data/resources/locales/pt/lists.yml +23 -7
  417. data/resources/locales/pt/numbers.yml +62 -57
  418. data/resources/locales/pt/plurals.yml +4 -2
  419. data/resources/locales/pt/rbnf.yml +586 -0
  420. data/resources/locales/pt/territories.yml +264 -263
  421. data/resources/locales/ro/calendars.yml +284 -283
  422. data/resources/locales/ro/currencies.yml +1170 -1132
  423. data/resources/locales/ro/languages.yml +583 -580
  424. data/resources/locales/ro/layout.yml +5 -5
  425. data/resources/locales/ro/lists.yml +23 -7
  426. data/resources/locales/ro/numbers.yml +63 -58
  427. data/resources/locales/ro/plurals.yml +5 -2
  428. data/resources/locales/ro/rbnf.yml +250 -0
  429. data/resources/locales/ro/territories.yml +264 -263
  430. data/resources/locales/ru/calendars.yml +282 -281
  431. data/resources/locales/ru/currencies.yml +1118 -1247
  432. data/resources/locales/ru/languages.yml +583 -580
  433. data/resources/locales/ru/layout.yml +5 -5
  434. data/resources/locales/ru/lists.yml +23 -7
  435. data/resources/locales/ru/numbers.yml +63 -59
  436. data/resources/locales/ru/plurals.yml +8 -4
  437. data/resources/locales/ru/rbnf.yml +385 -0
  438. data/resources/locales/ru/territories.yml +264 -263
  439. data/resources/locales/sk/calendars.yml +254 -251
  440. data/resources/locales/sk/currencies.yml +1174 -1008
  441. data/resources/locales/sk/languages.yml +583 -580
  442. data/resources/locales/sk/layout.yml +5 -5
  443. data/resources/locales/sk/lists.yml +23 -7
  444. data/resources/locales/sk/numbers.yml +64 -58
  445. data/resources/locales/sk/plurals.yml +6 -2
  446. data/resources/locales/sk/rbnf.yml +304 -0
  447. data/resources/locales/sk/territories.yml +264 -263
  448. data/resources/locales/sq/calendars.yml +283 -206
  449. data/resources/locales/sq/currencies.yml +954 -916
  450. data/resources/locales/sq/languages.yml +583 -580
  451. data/resources/locales/sq/layout.yml +5 -5
  452. data/resources/locales/sq/lists.yml +23 -7
  453. data/resources/locales/sq/numbers.yml +62 -57
  454. data/resources/locales/sq/plurals.yml +2 -2
  455. data/resources/locales/sq/rbnf.yml +181 -0
  456. data/resources/locales/sq/territories.yml +264 -263
  457. data/resources/locales/sr/calendars.yml +290 -289
  458. data/resources/locales/sr/currencies.yml +1251 -1508
  459. data/resources/locales/sr/languages.yml +583 -580
  460. data/resources/locales/sr/layout.yml +5 -5
  461. data/resources/locales/sr/lists.yml +23 -7
  462. data/resources/locales/sr/numbers.yml +62 -58
  463. data/resources/locales/sr/plurals.yml +12 -4
  464. data/resources/locales/sr/rbnf.yml +429 -0
  465. data/resources/locales/sr/territories.yml +264 -263
  466. data/resources/locales/sv/calendars.yml +290 -289
  467. data/resources/locales/sv/currencies.yml +960 -930
  468. data/resources/locales/sv/languages.yml +583 -580
  469. data/resources/locales/sv/layout.yml +5 -5
  470. data/resources/locales/sv/lists.yml +23 -7
  471. data/resources/locales/sv/numbers.yml +63 -58
  472. data/resources/locales/sv/plurals.yml +3 -2
  473. data/resources/locales/sv/rbnf.yml +692 -0
  474. data/resources/locales/sv/territories.yml +264 -263
  475. data/resources/locales/ta/calendars.yml +281 -266
  476. data/resources/locales/ta/currencies.yml +953 -915
  477. data/resources/locales/ta/languages.yml +583 -580
  478. data/resources/locales/ta/layout.yml +5 -5
  479. data/resources/locales/ta/lists.yml +23 -7
  480. data/resources/locales/ta/numbers.yml +62 -57
  481. data/resources/locales/ta/plurals.yml +2 -2
  482. data/resources/locales/ta/rbnf.yml +241 -0
  483. data/resources/locales/ta/territories.yml +264 -263
  484. data/resources/locales/th/calendars.yml +278 -289
  485. data/resources/locales/th/currencies.yml +953 -915
  486. data/resources/locales/th/languages.yml +583 -580
  487. data/resources/locales/th/layout.yml +5 -5
  488. data/resources/locales/th/lists.yml +23 -7
  489. data/resources/locales/th/numbers.yml +62 -57
  490. data/resources/locales/th/plurals.yml +2 -2
  491. data/resources/locales/th/rbnf.yml +119 -0
  492. data/resources/locales/th/territories.yml +264 -263
  493. data/resources/locales/tr/calendars.yml +287 -286
  494. data/resources/locales/tr/currencies.yml +953 -916
  495. data/resources/locales/tr/languages.yml +583 -580
  496. data/resources/locales/tr/layout.yml +5 -5
  497. data/resources/locales/tr/lists.yml +23 -7
  498. data/resources/locales/tr/numbers.yml +61 -56
  499. data/resources/locales/tr/plurals.yml +2 -2
  500. data/resources/locales/tr/rbnf.yml +277 -0
  501. data/resources/locales/tr/territories.yml +264 -263
  502. data/resources/locales/uk/calendars.yml +286 -252
  503. data/resources/locales/uk/currencies.yml +1311 -1070
  504. data/resources/locales/uk/languages.yml +583 -580
  505. data/resources/locales/uk/layout.yml +5 -5
  506. data/resources/locales/uk/lists.yml +23 -7
  507. data/resources/locales/uk/numbers.yml +64 -59
  508. data/resources/locales/uk/plurals.yml +10 -4
  509. data/resources/locales/uk/rbnf.yml +430 -0
  510. data/resources/locales/uk/territories.yml +264 -263
  511. data/resources/locales/ur/calendars.yml +267 -228
  512. data/resources/locales/ur/currencies.yml +954 -916
  513. data/resources/locales/ur/languages.yml +583 -580
  514. data/resources/locales/ur/layout.yml +5 -5
  515. data/resources/locales/ur/lists.yml +23 -7
  516. data/resources/locales/ur/numbers.yml +62 -57
  517. data/resources/locales/ur/plurals.yml +3 -2
  518. data/resources/locales/ur/rbnf.yml +4 -0
  519. data/resources/locales/ur/territories.yml +264 -263
  520. data/resources/locales/vi/calendars.yml +256 -236
  521. data/resources/locales/vi/currencies.yml +953 -915
  522. data/resources/locales/vi/languages.yml +583 -580
  523. data/resources/locales/vi/layout.yml +5 -5
  524. data/resources/locales/vi/lists.yml +23 -7
  525. data/resources/locales/vi/numbers.yml +62 -57
  526. data/resources/locales/vi/plurals.yml +2 -2
  527. data/resources/locales/vi/rbnf.yml +164 -0
  528. data/resources/locales/vi/territories.yml +264 -263
  529. data/resources/locales/zh/calendars.yml +266 -265
  530. data/resources/locales/zh/currencies.yml +953 -915
  531. data/resources/locales/zh/languages.yml +583 -580
  532. data/resources/locales/zh/layout.yml +5 -5
  533. data/resources/locales/zh/lists.yml +23 -7
  534. data/resources/locales/zh/numbers.yml +62 -57
  535. data/resources/locales/zh/plurals.yml +2 -2
  536. data/resources/locales/zh/rbnf.yml +689 -0
  537. data/resources/locales/zh/territories.yml +264 -263
  538. data/resources/locales/zh-Hant/calendars.yml +266 -265
  539. data/resources/locales/zh-Hant/currencies.yml +955 -915
  540. data/resources/locales/zh-Hant/languages.yml +583 -580
  541. data/resources/locales/zh-Hant/layout.yml +5 -5
  542. data/resources/locales/zh-Hant/lists.yml +23 -7
  543. data/resources/locales/zh-Hant/numbers.yml +62 -57
  544. data/resources/locales/zh-Hant/plurals.yml +2 -2
  545. data/resources/locales/zh-Hant/rbnf.yml +647 -0
  546. data/resources/locales/zh-Hant/territories.yml +264 -263
  547. data/resources/shared/currency_digits_and_rounding.yml +67 -64
  548. data/resources/shared/numbering_systems.yml +176 -0
  549. data/resources/shared/rbnf_root.yml +1573 -0
  550. data/resources/shared/segments/segments_root.yml +728 -0
  551. data/resources/shared/segments/tailorings/en.yml +8 -0
  552. data/resources/uli/segments/de.yml +128 -0
  553. data/resources/uli/segments/en.yml +154 -0
  554. data/resources/uli/segments/es.yml +112 -0
  555. data/resources/uli/segments/fr.yml +47 -0
  556. data/resources/uli/segments/it.yml +37 -0
  557. data/resources/uli/segments/pt.yml +173 -0
  558. data/resources/uli/segments/ru.yml +10 -0
  559. data/resources/unicode_data/casefolding.yml +4765 -0
  560. data/resources/unicode_data/indices/bidi_class.yml +4572 -0
  561. data/resources/unicode_data/indices/bidi_mirrored.yml +3087 -0
  562. data/resources/unicode_data/indices/category.yml +10918 -0
  563. data/resources/unicode_data/indices/keys.yml +101 -0
  564. data/resources/unicode_data/properties/line_break.yml +9269 -0
  565. data/resources/unicode_data/properties/sentence_break.yml +8067 -0
  566. data/resources/unicode_data/properties/word_break.yml +3001 -0
  567. data/spec/collation/collation_spec.rb +2 -1
  568. data/spec/collation/collator_spec.rb +4 -3
  569. data/spec/collation/tailoring_spec.rb +2 -2
  570. data/spec/collation/tailoring_tests/he.txt +5 -2
  571. data/spec/{tokenizers/calendars → data_readers}/additional_date_format_selector_spec.rb +13 -13
  572. data/spec/data_readers/date_time_data_reader_spec.rb +26 -0
  573. data/spec/data_readers/number_data_reader_spec.rb +18 -0
  574. data/spec/data_readers/timespan_data_reader.rb +22 -0
  575. data/spec/formatters/calendars/datetime_formatter_spec.rb +18 -22
  576. data/spec/formatters/list_formatter_spec.rb +16 -87
  577. data/spec/formatters/numbers/abbreviated/abbreviated_number_formatter_spec.rb +15 -59
  578. data/spec/formatters/numbers/abbreviated/long_decimal_formatter_spec.rb +32 -17
  579. data/spec/formatters/numbers/abbreviated/short_decimal_formatter_spec.rb +33 -17
  580. data/spec/formatters/numbers/currency_formatter_spec.rb +18 -13
  581. data/spec/formatters/numbers/decimal_formatter_spec.rb +16 -18
  582. data/spec/formatters/numbers/number_formatter_spec.rb +40 -31
  583. data/spec/formatters/numbers/percent_formatter_spec.rb +14 -6
  584. data/spec/formatters/numbers/rbnf/allowed_failures.yml +74 -0
  585. data/spec/formatters/numbers/rbnf/locales/af/rbnf_test.yml +706 -0
  586. data/spec/formatters/numbers/rbnf/locales/ar/rbnf_test.yml +706 -0
  587. data/spec/formatters/numbers/rbnf/locales/be/rbnf_test.yml +1174 -0
  588. data/spec/formatters/numbers/rbnf/locales/bg/rbnf_test.yml +706 -0
  589. data/spec/formatters/numbers/rbnf/locales/bn/rbnf_test.yml +1291 -0
  590. data/spec/formatters/numbers/rbnf/locales/ca/rbnf_test.yml +1174 -0
  591. data/spec/formatters/numbers/rbnf/locales/cs/rbnf_test.yml +823 -0
  592. data/spec/formatters/numbers/rbnf/locales/cy/rbnf_test.yml +940 -0
  593. data/spec/formatters/numbers/rbnf/locales/da/rbnf_test.yml +940 -0
  594. data/spec/formatters/numbers/rbnf/locales/de/rbnf_test.yml +940 -0
  595. data/spec/formatters/numbers/rbnf/locales/el/rbnf_test.yml +1174 -0
  596. data/spec/formatters/numbers/rbnf/locales/en/rbnf_test.yml +1291 -0
  597. data/spec/formatters/numbers/rbnf/locales/en-GB/rbnf_test.yml +1291 -0
  598. data/spec/formatters/numbers/rbnf/locales/es/rbnf_test.yml +1642 -0
  599. data/spec/formatters/numbers/rbnf/locales/eu/rbnf_test.yml +1291 -0
  600. data/spec/formatters/numbers/rbnf/locales/fa/rbnf_test.yml +589 -0
  601. data/spec/formatters/numbers/rbnf/locales/fi/rbnf_test.yml +706 -0
  602. data/spec/formatters/numbers/rbnf/locales/fil/rbnf_test.yml +706 -0
  603. data/spec/formatters/numbers/rbnf/locales/fr/rbnf_test.yml +1408 -0
  604. data/spec/formatters/numbers/rbnf/locales/ga/rbnf_test.yml +940 -0
  605. data/spec/formatters/numbers/rbnf/locales/gl/rbnf_test.yml +1291 -0
  606. data/spec/formatters/numbers/rbnf/locales/he/rbnf_test.yml +1057 -0
  607. data/spec/formatters/numbers/rbnf/locales/hi/rbnf_test.yml +823 -0
  608. data/spec/formatters/numbers/rbnf/locales/hr/rbnf_test.yml +1174 -0
  609. data/spec/formatters/numbers/rbnf/locales/hu/rbnf_test.yml +940 -0
  610. data/spec/formatters/numbers/rbnf/locales/id/rbnf_test.yml +706 -0
  611. data/spec/formatters/numbers/rbnf/locales/is/rbnf_test.yml +823 -0
  612. data/spec/formatters/numbers/rbnf/locales/it/rbnf_test.yml +1174 -0
  613. data/spec/formatters/numbers/rbnf/locales/ja/rbnf_test.yml +823 -0
  614. data/spec/formatters/numbers/rbnf/locales/ko/rbnf_test.yml +1408 -0
  615. data/spec/formatters/numbers/rbnf/locales/lv/rbnf_test.yml +706 -0
  616. data/spec/formatters/numbers/rbnf/locales/ms/rbnf_test.yml +706 -0
  617. data/spec/formatters/numbers/rbnf/locales/nb/rbnf_test.yml +940 -0
  618. data/spec/formatters/numbers/rbnf/locales/nl/rbnf_test.yml +706 -0
  619. data/spec/formatters/numbers/rbnf/locales/pl/rbnf_test.yml +823 -0
  620. data/spec/formatters/numbers/rbnf/locales/pt/rbnf_test.yml +1174 -0
  621. data/spec/formatters/numbers/rbnf/locales/ro/rbnf_test.yml +823 -0
  622. data/spec/formatters/numbers/rbnf/locales/ru/rbnf_test.yml +823 -0
  623. data/spec/formatters/numbers/rbnf/locales/sk/rbnf_test.yml +823 -0
  624. data/spec/formatters/numbers/rbnf/locales/sq/rbnf_test.yml +706 -0
  625. data/spec/formatters/numbers/rbnf/locales/sr/rbnf_test.yml +940 -0
  626. data/spec/formatters/numbers/rbnf/locales/sv/rbnf_test.yml +1876 -0
  627. data/spec/formatters/numbers/rbnf/locales/ta/rbnf_test.yml +706 -0
  628. data/spec/formatters/numbers/rbnf/locales/th/rbnf_test.yml +706 -0
  629. data/spec/formatters/numbers/rbnf/locales/tr/rbnf_test.yml +706 -0
  630. data/spec/formatters/numbers/rbnf/locales/uk/rbnf_test.yml +823 -0
  631. data/spec/formatters/numbers/rbnf/locales/ur/rbnf_test.yml +1291 -0
  632. data/spec/formatters/numbers/rbnf/locales/vi/rbnf_test.yml +706 -0
  633. data/spec/formatters/numbers/rbnf/locales/zh/rbnf_test.yml +940 -0
  634. data/spec/formatters/numbers/rbnf/locales/zh-Hant/rbnf_test.yml +940 -0
  635. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +98 -0
  636. data/spec/formatters/plurals/plural_formatter_spec.rb +4 -4
  637. data/spec/formatters/plurals/rules_spec.rb +5 -5
  638. data/spec/localized/localized_date_spec.rb +1 -1
  639. data/spec/localized/localized_datetime_spec.rb +8 -13
  640. data/spec/localized/localized_number_spec.rb +17 -32
  641. data/spec/localized/localized_object_spec.rb +0 -5
  642. data/spec/localized/localized_string_spec.rb +40 -2
  643. data/spec/localized/localized_time_spec.rb +3 -6
  644. data/spec/localized/localized_timespan_spec.rb +144 -0
  645. data/spec/normalization_spec.rb +12 -12
  646. data/spec/parsers/number_parser_spec.rb +5 -5
  647. data/spec/parsers/parser_spec.rb +60 -0
  648. data/spec/parsers/segmentation_parser_spec.rb +96 -0
  649. data/spec/parsers/symbol_table_spec.rb +32 -0
  650. data/spec/parsers/unicode_regex/character_class_spec.rb +117 -0
  651. data/spec/parsers/unicode_regex/character_range_spec.rb +21 -0
  652. data/spec/parsers/unicode_regex/character_set_spec.rb +36 -0
  653. data/spec/parsers/unicode_regex/literal_spec.rb +34 -0
  654. data/spec/parsers/unicode_regex/unicode_string_spec.rb +22 -0
  655. data/spec/parsers/unicode_regex_parser_spec.rb +86 -0
  656. data/spec/readme_spec.rb +8 -269
  657. data/spec/shared/break_iterator_spec.rb +72 -0
  658. data/spec/shared/calendar_spec.rb +5 -4
  659. data/spec/shared/casefolder_spec.rb +30 -0
  660. data/spec/shared/casefolding.txt +251 -0
  661. data/spec/shared/casefolding_expected.txt +251 -0
  662. data/spec/shared/code_point_spec.rb +44 -14
  663. data/spec/shared/numbering_system_spec.rb +41 -0
  664. data/spec/shared/territories_spec.rb +14 -6
  665. data/spec/shared/unicode_regex_spec.rb +203 -0
  666. data/spec/spec_helper.rb +17 -0
  667. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +26 -30
  668. data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +11 -90
  669. data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -5
  670. data/spec/tokenizers/calendars/timespan_tokenizer_spec.rb +17 -7
  671. data/spec/tokenizers/numbers/number_tokenizer_spec.rb +28 -27
  672. data/spec/tokenizers/segmentation/segmentation_tokenizer_spec.rb +40 -0
  673. data/spec/tokenizers/unicode_regex/unicode_regex_tokenizer_spec.rb +190 -0
  674. data/spec/utils/range_set_spec.rb +171 -0
  675. data/spec/utils/yaml/yaml_spec.rb +62 -51
  676. data/twitter_cldr.gemspec +1 -1
  677. metadata +199 -30
  678. data/lib/twitter_cldr/formatters/base.rb +0 -47
  679. data/lib/twitter_cldr/formatters/calendars/date_formatter.rb +0 -19
  680. data/lib/twitter_cldr/formatters/calendars/time_formatter.rb +0 -19
  681. data/lib/twitter_cldr/normalization/base.rb +0 -37
  682. data/lib/twitter_cldr/normalization/hangul.rb +0 -79
  683. data/lib/twitter_cldr/normalization/nfc.rb +0 -24
  684. data/lib/twitter_cldr/normalization/nfd.rb +0 -26
  685. data/lib/twitter_cldr/normalization/nfkc.rb +0 -114
  686. data/lib/twitter_cldr/normalization/nfkd.rb +0 -120
  687. data/lib/twitter_cldr/normalization/quick_check.rb +0 -41
  688. data/lib/twitter_cldr/tokenizers/base.rb +0 -169
  689. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +0 -131
  690. data/lib/twitter_cldr/utils/territories.rb +0 -56
  691. data/spec/formatters/base_spec.rb +0 -18
  692. data/spec/formatters/calendars/timespan_formatter_spec.rb +0 -112
  693. data/spec/normalization/NormalizationTestShort.txt +0 -602
  694. data/spec/normalization/base_spec.rb +0 -16
  695. data/spec/normalization/hangul_spec.rb +0 -42
  696. data/spec/normalization/normalization_spec.rb +0 -113
  697. data/spec/tokenizers/base_spec.rb +0 -259
  698. data/spec/utils/territories_spec.rb +0 -16
@@ -0,0 +1,213 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Shared
8
+ class BreakIterator
9
+
10
+ attr_reader :locale, :use_uli_exceptions
11
+
12
+ def initialize(locale = TwitterCldr.locale, options = {})
13
+ @use_uli_exceptions = !!options.fetch(:use_uli_exceptions, true)
14
+ @locale = locale
15
+ end
16
+
17
+ def each_sentence(str, &block)
18
+ each_boundary(str, "sentence", &block)
19
+ end
20
+
21
+ def each_word(str, &block)
22
+ raise NotImplementedError.new("Word segmentation is not currently supported.")
23
+ end
24
+
25
+ def each_line(str, &block)
26
+ raise NotImplementedError.new("Line segmentation is not currently supported.")
27
+ end
28
+
29
+ private
30
+
31
+ def boundary_name_for(str)
32
+ str.gsub(/(?:^|\_)([A-Za-z])/) { |s| $1.upcase } + "Break"
33
+ end
34
+
35
+ def each_boundary(str, boundary_type)
36
+ if block_given?
37
+ rules = compile_rules_for(locale, boundary_type)
38
+ match = nil
39
+ last_offset = 0
40
+ current_position = 0
41
+ search_str = str.dup
42
+
43
+ until search_str.size == 0
44
+ rule = rules.find { |rule| match = rule.match(search_str) }
45
+
46
+ if rule.boundary_symbol == :break
47
+ break_offset = current_position + match.boundary_offset
48
+ yield str[last_offset...break_offset]
49
+ last_offset = break_offset
50
+ end
51
+
52
+ search_str = search_str[match.boundary_offset..-1]
53
+ current_position += match.boundary_offset
54
+ end
55
+
56
+ if last_offset < (str.size - 1)
57
+ yield str[last_offset..-1]
58
+ end
59
+ else
60
+ to_enum(__method__, str, boundary_type)
61
+ end
62
+ end
63
+
64
+ # See the comment above exceptions_for. Basically, we only support exceptions
65
+ # for the "sentence" boundary type since the ULI JSON data doesn't distinguish
66
+ # between boundary types.
67
+ def compile_exception_rule_for(locale, boundary_type, boundary_name)
68
+ if boundary_type == "sentence"
69
+ cache_key = TwitterCldr::Utils.compute_cache_key(locale, boundary_type)
70
+ self.class.exceptions_cache[cache_key] ||= begin
71
+ exceptions = exceptions_for(locale, boundary_name)
72
+ regex_contents = exceptions.map { |exc| Regexp.escape(exc) }.join("|")
73
+ segmentation_parser.parse(
74
+ segmentation_tokenizer.tokenize("(?:#{regex_contents}) ×")
75
+ )
76
+ end
77
+ end
78
+ end
79
+
80
+ def self.exceptions_cache
81
+ @exceptions_cache ||= {}
82
+ end
83
+
84
+ # Grabs rules from segment_root, applies custom tailorings (our own, NOT from CLDR),
85
+ # and optionally integrates ULI exceptions.
86
+ def compile_rules_for(locale, boundary_type)
87
+ rules = self.class.rule_cache[boundary_type] ||= begin
88
+ boundary_name = boundary_name_for(boundary_type)
89
+ boundary_data = resource_for(boundary_name)
90
+ symbol_table = symbol_table_for(boundary_data)
91
+ root_rules = rules_for(boundary_data, symbol_table)
92
+
93
+ tailoring_boundary_data = tailoring_resource_for(locale, boundary_name)
94
+ tailoring_rules = rules_for(tailoring_boundary_data, symbol_table)
95
+ merge_rules(root_rules, tailoring_rules)
96
+ end
97
+
98
+ if use_uli_exceptions
99
+ exception_rule = compile_exception_rule_for(locale, boundary_type, boundary_name)
100
+ rules = rules.dup # avoid modifying the cached rules
101
+ rules.insert(0, exception_rule)
102
+ end
103
+
104
+ rules
105
+ end
106
+
107
+ # replaces ruleset1's rules with rules with the same id from ruleset2
108
+ def merge_rules(ruleset1, ruleset2)
109
+ result = ruleset1.dup
110
+ ruleset2.each do |new_rule|
111
+ if existing_idx = result.find_index { |rule| rule.id == new_rule.id }
112
+ result[existing_idx] = new_rule
113
+ end
114
+ end
115
+ result
116
+ end
117
+
118
+ def self.rule_cache
119
+ @rule_cache ||= {}
120
+ end
121
+
122
+ def symbol_table_for(boundary_data)
123
+ table = TwitterCldr::Parsers::SymbolTable.new
124
+ boundary_data[:variables].each do |variable|
125
+ id = variable[:id].to_s
126
+ tokens = segmentation_tokenizer.tokenize(variable[:value])
127
+ # note: variables can be redefined (add replaces if key already exists)
128
+ table.add(id, resolve_symbols(tokens, table))
129
+ end
130
+ table
131
+ end
132
+
133
+ def resolve_symbols(tokens, symbol_table)
134
+ tokens.inject([]) do |ret, token|
135
+ if token.type == :variable
136
+ ret += symbol_table.fetch(token.value)
137
+ else
138
+ ret << token
139
+ end
140
+ ret
141
+ end
142
+ end
143
+
144
+ def rules_for(boundary_data, symbol_table)
145
+ boundary_data[:rules].map do |rule|
146
+ r = segmentation_parser.parse(
147
+ segmentation_tokenizer.tokenize(rule[:value]), {
148
+ :symbol_table => symbol_table
149
+ }
150
+ )
151
+
152
+ r.string = rule[:value]
153
+ r.id = rule[:id]
154
+ r
155
+ end
156
+ end
157
+
158
+ def self.segmentation_tokenizer
159
+ @segmentation_tokenizer ||= TwitterCldr::Tokenizers::SegmentationTokenizer.new
160
+ end
161
+
162
+ def segmentation_tokenizer
163
+ self.class.segmentation_tokenizer
164
+ end
165
+
166
+ def self.segmentation_parser
167
+ @segmentation_parser ||= TwitterCldr::Parsers::SegmentationParser.new
168
+ end
169
+
170
+ def segmentation_parser
171
+ self.class.segmentation_parser
172
+ end
173
+
174
+ def resource_for(boundary_name)
175
+ self.class.root_resource[:segments][boundary_name.to_sym]
176
+ end
177
+
178
+ def tailoring_resource_for(locale, boundary_name)
179
+ cache_key = TwitterCldr::Utils.compute_cache_key(locale, boundary_name)
180
+ self.class.tailoring_resource_cache[cache_key] ||= begin
181
+ res = TwitterCldr.get_resource("shared", "segments", "tailorings", locale)
182
+ res[locale][:segments][boundary_name.to_sym]
183
+ end
184
+ end
185
+
186
+ def self.tailoring_resource_cache
187
+ @tailoring_resource_cache ||= {}
188
+ end
189
+
190
+ def self.root_resource
191
+ @root_resource ||= TwitterCldr.get_resource("shared", "segments", "segments_root")
192
+ end
193
+
194
+ # The boundary_name param is not currently used since the ULI JSON resource that
195
+ # exceptions are generated from does not distinguish between boundary types. The
196
+ # XML version does, however, so the JSON will hopefully catch up at some point and
197
+ # we can make use of this second parameter. For the time being, compile_exception_rule_for
198
+ # (which calls this function) assumes a "sentence" boundary type.
199
+ def exceptions_for(locale, boundary_name)
200
+ self.class.exceptions_resource_cache[locale] ||= begin
201
+ TwitterCldr.get_resource("uli", "segments", locale)[locale][:exceptions]
202
+ rescue ArgumentError
203
+ []
204
+ end
205
+ end
206
+
207
+ def self.exceptions_resource_cache
208
+ @exceptions_resource_cache ||= {}
209
+ end
210
+
211
+ end
212
+ end
213
+ end
@@ -38,53 +38,77 @@ module TwitterCldr
38
38
  end
39
39
 
40
40
  def months(names_form = :wide, format = DEFAULT_FORMAT)
41
- data = get_with_names_form(:months, names_form, format)
42
- data && data.sort_by { |m| m.first }.map { |m| m.last }
41
+ cache_field_data(:months, names_form, format) do
42
+ data = get_with_names_form(:months, names_form, format)
43
+ data && data.sort_by { |m| m.first }.map { |m| m.last }
44
+ end
43
45
  end
44
46
 
45
47
  def weekdays(names_form = :wide, format = DEFAULT_FORMAT)
46
- get_with_names_form(:days, names_form, format)
48
+ cache_field_data(:weekdays, names_form, format) do
49
+ get_with_names_form(:days, names_form, format)
50
+ end
47
51
  end
48
52
 
49
53
  def fields
50
- get_data(:fields)
54
+ cache_field_data(:fields) do
55
+ get_data(:fields)
56
+ end
51
57
  end
52
58
 
53
59
  def quarters(names_form = :wide, format = DEFAULT_FORMAT)
54
- get_with_names_form(:quarters, names_form, format)
60
+ cache_field_data(:quarters, names_form, format) do
61
+ get_with_names_form(:quarters, names_form, format)
62
+ end
55
63
  end
56
64
 
57
65
  def periods(names_form = :wide, format = DEFAULT_PERIOD_FORMAT)
58
- get_with_names_form(:periods, names_form, format)
66
+ cache_field_data(:periods, names_form, format) do
67
+ get_with_names_form(:periods, names_form, format)
68
+ end
59
69
  end
60
70
 
61
71
  def eras(names_form = :name)
62
- get_data(:eras)[names_form]
72
+ cache_field_data(:eras, names_form) do
73
+ get_data(:eras)[names_form]
74
+ end
63
75
  end
64
76
 
65
77
  def date_order(options = {})
66
- get_order_for(TwitterCldr::Tokenizers::DateTokenizer, options)
78
+ get_order_for(TwitterCldr::DataReaders::DateDataReader, options)
67
79
  end
68
80
 
69
81
  def time_order(options = {})
70
- get_order_for(TwitterCldr::Tokenizers::TimeTokenizer, options)
82
+ get_order_for(TwitterCldr::DataReaders::TimeDataReader, options)
71
83
  end
72
84
 
73
85
  def datetime_order(options = {})
74
- get_order_for(TwitterCldr::Tokenizers::DateTimeTokenizer, options)
86
+ get_order_for(TwitterCldr::DataReaders::DateTimeDataReader, options)
75
87
  end
76
88
 
77
89
  private
78
90
 
91
+ def cache_field_data(field, names_form = nil, format = nil)
92
+ cache_key = TwitterCldr::Utils.compute_cache_key(locale, field, names_form, format)
93
+ field_cache[cache_key] ||= begin
94
+ yield
95
+ end
96
+ end
97
+
98
+ def field_cache
99
+ @@field_cache ||= {}
100
+ end
101
+
79
102
  def calendar_cache
80
103
  @@calendar_cache ||= {}
81
104
  end
82
105
 
83
- def get_order_for(const, options)
84
- opts = options.merge(:locale => @locale)
85
- cache_key = TwitterCldr::Utils.compute_cache_key([const.to_s] + opts.keys.sort + opts.values.sort)
106
+ def get_order_for(data_reader_const, options)
107
+ key_array = [data_reader_const.to_s, @locale] + options.keys.sort + options.values.sort
108
+ cache_key = TwitterCldr::Utils.compute_cache_key(key_array)
86
109
  calendar_cache.fetch(cache_key) do |key|
87
- tokens = const.new(opts).tokens
110
+ data_reader = data_reader_const.new(@locale, options)
111
+ tokens = data_reader.tokenizer.tokenize(data_reader.pattern)
88
112
  calendar_cache[cache_key] = resolve_methods(methods_for_tokens(tokens))
89
113
  end
90
114
  end
@@ -0,0 +1,210 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Shared
8
+ class Casefolder
9
+ class << self
10
+
11
+ CASEFOLDING_REGEX_C = /\341\271\246|\317\236|\341\273\264|\342\204\252|\321\254|\352\235\273|\352\231\240|\342\263\206|\306\251|\323\272|\342\204\253|\304\234|\341\271\250|\317\240|\321\256|\341\273\266|\352\235\275|\352\231\242|\342\263\210|\323\274|\304\236|\352\235\276|\341\271\252|\306\254|\317\242|\310\272|\321\260|\341\273\270|\352\231\244|\342\263\212|\323\276|\310\273|\304\240|\352\236\200|\341\271\254|\306\256|\317\244|\321\262|\341\273\272|\352\231\246|\342\263\214|\306\257|\324\200|\310\275|\304\242|\352\236\202|\341\271\256|\317\246|\310\276|\342\204\262|\321\264|\341\273\274|\352\231\250|\342\263\216|\306\261|\324\202|\304\244|\352\236\204|\341\271\260|\306\262|\317\250|\321\266|\341\273\276|\352\231\252|\342\263\220|\306\263|\324\204|\341\202\240|\311\201|\304\246|\352\236\206|\317\252|\341\271\262|\321\270|\341\202\241|\352\231\254|\342\263\222|\306\265|\324\206|\311\203|\304\250|\341\202\242|\317\254|\341\271\264|\311\204|\321\272|\341\202\243|\342\263\224|\306\267|\324\210|\311\205|\304\252|\341\202\244|\306\270|\317\256|\341\271\266|\311\206|\321\274|\341\202\245|\352\236\213|\342\263\226|\324\212|\304\254|\341\202\246|\317\260|\341\271\270|\311\210|\321\276|\341\202\247|\352\236\215|\342\263\230|\324\214|\317\261|\304\256|\341\202\250|\306\274|\341\271\272|\311\212|\322\200|\341\274\210|\341\202\251|\342\263\232|\324\216|\341\274\211|\341\202\252|\352\236\220|\317\264|\341\271\274|\311\214|\341\274\212|\341\202\253|\342\263\234|\324\220|\317\265|\304\262|\341\274\213|\341\202\254|\352\236\222|\341\271\276|\341\274\214|\341\202\255|\311\216|\342\263\236|\324\222|\317\267|\304\264|\341\274\215|\341\202\256|\341\272\200|\341\274\216|\341\202\257|\342\263\240|\324\224|\317\271|\304\266|\341\274\217|\341\202\260|\307\204|\317\272|\341\272\202|\341\202\261|\342\263\242|\307\205|\324\226|\341\202\262|\341\272\204|\304\271|\322\212|\341\202\263|\307\207|\324\230|\317\275|\315\260|\341\202\264|\307\210|\317\276|\341\272\206|\304\273|\322\214|\341\202\265|\352\232\200|\324\232|\317\277|\315\262|\341\202\266|\320\200|\341\272\210|\307\212|\304\275|\322\216|\341\202\267|\352\232\202|\324\234|\320\201|\307\213|\341\202\270|\320\202|\341\272\212|\304\277|\322\220|\341\274\230|\341\202\271|\352\232\204|\320\203|\307\215|\324\236|\341\274\231|\315\266|\341\202\272|\352\236\240|\342\263\253|\320\204|\341\272\214|\305\201|\322\222|\341\274\232|\341\202\273|\352\232\206|\320\205|\307\217|\324\240|\341\274\233|\341\202\274|\341\270\200|\352\236\242|\342\263\255|\302\265|\320\206|\341\272\216|\342\261\240|\305\203|\322\224|\341\274\234|\341\202\275|\352\232\210|\320\207|\307\221|\324\242|\341\274\235|\341\202\276|\341\270\202|\352\236\244|\320\210|\341\272\220|\342\261\242|\305\205|\322\226|\341\202\277|\352\232\212|\320\211|\307\223|\324\244|\342\261\243|\341\203\200|\341\270\204|\352\236\246|\320\212|\341\272\222|\342\261\244|\322\230|\341\203\201|\305\207|\352\232\214|\342\263\262|\320\213|\307\225|\324\246|\341\203\202|\341\270\206|\352\236\250|\320\214|\341\272\224|\322\232|\341\203\203|\352\232\216|\320\215|\307\227|\342\261\247|\341\203\204|\341\270\210|\305\212|\352\236\252|\320\216|\341\203\205|\322\234|\352\232\220|\320\217|\307\231|\342\261\251|\341\270\212|\305\214|\320\220|\341\203\207|\322\236|\352\232\222|\303\200|\320\221|\307\233|\342\261\253|\341\270\214|\305\216|\303\201|\320\222|\341\274\250|\322\240|\352\232\224|\303\202|\320\223|\341\272\233|\352\234\242|\342\261\255|\341\274\251|\316\206|\341\270\216|\305\220|\303\203|\320\224|\307\236|\342\261\256|\341\274\252|\342\205\240|\322\242|\352\232\226|\341\276\270|\320\225|\303\204|\360\220\220\200|\352\234\244|\342\261\257|\341\274\253|\316\210|\341\270\220|\342\205\241|\305\222|\341\276\271|\320\226|\307\240|\324\261|\303\205|\360\220\220\201|\342\261\260|\341\274\254|\316\211|\341\203\215|\342\205\242|\322\244|\320\227|\324\262|\303\206|\341\276\272|\360\220\220\202|\352\234\246|\341\274\255|\316\212|\341\270\222|\342\205\243|\305\224|\341\272\240|\307\242|\324\263|\303\207|\341\276\273|\320\230|\360\220\220\203|\342\261\262|\341\274\256|\342\205\244|\322\246|\324\264|\303\210|\320\231|\360\220\220\204|\352\234\250|\341\274\257|\316\214|\341\270\224|\342\205\245|\342\222\266|\305\226|\341\272\242|\307\244|\324\265|\303\211|\320\232|\360\220\220\205|\342\205\246|\342\222\267|\322\250|\324\266|\303\212|\341\276\276|\320\233|\360\220\220\206|\352\234\252|\342\261\265|\316\216|\341\270\226|\342\205\247|\342\222\270|\305\230|\341\272\244|\307\246|\324\267|\303\213|\320\234|\360\220\220\207|\316\217|\342\205\250|\342\222\271|\322\252|\324\270|\303\214|\320\235|\360\220\220\210|\352\234\254|\341\270\230|\342\205\251|\342\222\272|\305\232|\341\272\246|\307\250|\324\271|\303\215|\320\236|\360\220\220\211|\316\221|\342\205\252|\342\222\273|\322\254|\324\272|\303\216|\320\237|\360\220\220\212|\352\234\256|\316\222|\341\270\232|\342\205\253|\342\222\274|\305\234|\101|\341\272\250|\307\252|\324\273|\303\217|\320\240|\360\220\220\213|\316\223|\342\205\254|\342\222\275|\322\256|\102|\324\274|\303\220|\320\241|\360\220\220\214|\316\224|\341\270\234|\342\205\255|\342\222\276|\305\236|\103|\341\272\252|\307\254|\324\275|\303\221|\320\242|\360\220\220\215|\342\205\256|\342\222\277|\322\260|\104|\341\274\270|\316\225|\324\276|\303\222|\320\243|\360\220\220\216|\352\234\262|\341\270\236|\342\205\257|\342\223\200|\305\240|\105|\341\274\271|\316\226|\341\272\254|\307\256|\324\277|\303\223|\320\244|\360\220\220\217|\342\261\276|\342\223\201|\322\262|\106|\341\274\272|\316\227|\325\200|\303\224|\341\277\210|\320\245|\360\220\220\220|\352\234\264|\342\261\277|\341\270\240|\342\223\202|\305\242|\107|\341\274\273|\316\230|\341\272\256|\325\201|\303\225|\341\277\211|\320\246|\360\220\220\221|\342\262\200|\342\223\203|\322\264|\110|\341\274\274|\316\231|\307\261|\325\202|\303\226|\341\277\212|\320\247|\360\220\220\222|\352\234\266|\341\270\242|\342\223\204|\305\244|\111|\341\274\275|\316\232|\341\272\260|\307\262|\325\203|\341\277\213|\320\250|\360\220\220\223|\342\262\202|\342\223\205|\322\266|\112|\341\274\276|\316\233|\325\204|\303\230|\320\251|\360\220\220\224|\352\234\270|\341\270\244|\342\223\206|\305\246|\113|\341\274\277|\316\234|\341\272\262|\307\264|\325\205|\303\231|\320\252|\360\220\220\225|\342\262\204|\342\223\207|\322\270|\114|\316\235|\325\206|\303\232|\320\253|\360\220\220\226|\352\234\272|\341\270\246|\342\223\210|\305\250|\115|\316\236|\307\266|\325\207|\303\233|\320\254|\341\272\264|\360\220\220\227|\342\262\206|\342\223\211|\322\272|\116|\316\237|\307\267|\325\210|\303\234|\320\255|\360\220\220\230|\352\234\274|\341\270\250|\342\223\212|\305\252|\117|\316\240|\307\270|\325\211|\303\235|\320\256|\341\272\266|\360\220\220\231|\342\262\210|\342\223\213|\322\274|\120|\316\241|\325\212|\303\236|\320\257|\360\220\220\232|\352\234\276|\341\270\252|\342\223\214|\305\254|\121|\357\274\241|\307\272|\325\213|\341\272\270|\360\220\220\233|\342\262\212|\342\223\215|\322\276|\122|\316\243|\357\274\242|\325\214|\360\220\220\234|\352\235\200|\341\270\254|\342\223\216|\305\256|\123|\316\244|\357\274\243|\307\274|\325\215|\341\272\272|\360\220\220\235|\342\262\214|\342\223\217|\323\200|\124|\341\275\210|\316\245|\357\274\244|\325\216|\360\220\220\236|\352\235\202|\341\270\256|\305\260|\323\201|\125|\341\275\211|\316\246|\357\274\245|\342\260\200|\307\276|\325\217|\341\272\274|\360\220\220\237|\342\262\216|\126|\341\275\212|\316\247|\357\274\246|\342\260\201|\325\220|\341\277\230|\360\220\220\240|\352\235\204|\341\270\260|\305\262|\323\203|\127|\341\275\213|\316\250|\357\274\247|\310\200|\325\221|\341\277\231|\341\272\276|\342\260\202|\360\220\220\241|\342\262\220|\130|\341\275\214|\316\251|\357\274\250|\325\222|\341\277\232|\342\260\203|\360\220\220\242|\352\235\206|\342\206\203|\305\264|\323\205|\131|\341\275\215|\316\252|\341\270\262|\357\274\251|\310\202|\325\223|\341\277\233|\341\273\200|\342\260\204|\360\220\220\243|\342\262\222|\132|\316\253|\357\274\252|\325\224|\342\260\205|\360\220\220\244|\352\235\210|\305\266|\323\207|\341\270\264|\357\274\253|\310\204|\325\225|\341\273\202|\342\260\206|\360\220\220\245|\342\262\224|\357\274\254|\325\226|\342\260\207|\360\220\220\246|\352\235\212|\305\270|\323\211|\341\270\266|\357\274\255|\310\206|\341\273\204|\342\260\210|\360\220\220\247|\342\262\226|\305\271|\357\274\256|\342\260\211|\352\235\214|\323\213|\341\270\270|\357\274\257|\310\210|\341\273\206|\342\260\212|\342\262\230|\305\273|\357\274\260|\342\260\213|\352\235\216|\323\215|\341\270\272|\357\274\261|\310\212|\341\273\210|\342\260\214|\342\262\232|\305\275|\357\274\262|\342\260\215|\352\235\220|\341\270\274|\357\274\263|\341\273\212|\342\260\216|\310\214|\342\262\234|\305\277|\323\220|\357\274\264|\342\260\217|\352\235\222|\341\275\231|\341\270\276|\357\274\265|\341\273\214|\342\260\220|\310\216|\342\262\236|\306\201|\323\222|\357\274\266|\341\277\250|\342\260\221|\352\235\224|\306\202|\341\275\233|\341\271\200|\357\274\267|\341\277\251|\341\273\216|\342\260\222|\310\220|\342\262\240|\323\224|\357\274\270|\341\277\252|\342\260\223|\352\235\226|\306\204|\341\275\235|\341\271\202|\357\274\271|\341\277\253|\341\273\220|\342\260\224|\310\222|\342\262\242|\323\226|\357\274\272|\341\277\254|\342\260\225|\352\235\230|\306\206|\341\275\237|\341\271\204|\341\273\222|\342\260\226|\310\224|\342\262\244|\306\207|\323\230|\342\260\227|\352\235\232|\341\271\206|\341\273\224|\342\260\230|\310\226|\352\231\200|\342\262\246|\323\232|\306\211|\342\260\231|\352\235\234|\341\271\210|\306\212|\341\273\226|\342\260\232|\310\230|\352\231\202|\342\262\250|\306\213|\323\234|\342\260\233|\352\235\236|\317\202|\341\271\212|\341\273\230|\342\260\234|\310\232|\352\231\204|\342\262\252|\323\236|\304\200|\342\260\235|\352\235\240|\341\271\214|\306\216|\341\273\232|\342\260\236|\310\234|\352\231\206|\342\262\254|\341\275\250|\306\217|\323\240|\342\260\237|\304\202|\352\235\242|\341\275\251|\341\271\216|\306\220|\342\260\240|\341\273\234|\310\236|\352\231\210|\342\262\256|\341\275\252|\306\221|\323\242|\342\260\241|\304\204|\341\277\270|\352\235\244|\341\275\253|\341\271\220|\342\260\242|\341\277\271|\341\273\236|\310\240|\352\231\212|\342\262\260|\341\275\254|\306\223|\323\244|\342\260\243|\341\277\272|\304\206|\352\235\246|\341\275\255|\341\271\222|\306\224|\342\260\244|\341\273\240|\310\242|\341\277\273|\352\231\214|\342\262\262|\341\275\256|\323\246|\342\260\245|\304\210|\352\235\250|\341\275\257|\341\271\224|\306\226|\342\260\246|\341\273\242|\310\244|\352\231\216|\342\262\264|\306\227|\323\250|\342\260\247|\304\212|\352\235\252|\341\271\226|\306\230|\342\260\250|\341\273\244|\310\246|\352\231\220|\342\262\266|\317\217|\323\252|\342\260\251|\304\214|\352\235\254|\317\220|\341\271\230|\342\260\252|\341\273\246|\310\250|\352\231\222|\342\262\270|\317\221|\323\254|\342\260\253|\304\216|\352\235\256|\341\271\232|\306\234|\342\260\254|\341\273\250|\315\205|\310\252|\321\240|\352\231\224|\342\262\272|\306\235|\323\256|\342\260\255|\304\220|\341\271\234|\342\260\256|\341\273\252|\310\254|\321\242|\352\231\226|\342\262\274|\317\225|\306\237|\323\260|\304\222|\341\271\236|\306\240|\317\226|\341\273\254|\310\256|\321\244|\352\231\230|\342\262\276|\323\262|\304\224|\341\271\240|\306\242|\317\230|\341\273\256|\310\260|\321\246|\352\231\232|\342\263\200|\323\264|\304\226|\341\271\242|\306\244|\317\232|\341\273\260|\310\262|\342\204\246|\321\250|\352\231\234|\342\263\202|\323\266|\304\230|\341\271\244|\306\246|\317\234|\341\273\262|\321\252|\352\235\271|\352\231\236|\342\263\204|\306\247|\323\270|\304\232/
12
+ CASEFOLDING_REGEX_F = /\341\277\263|\341\276\224|\341\276\201|\326\207|\341\276\247|\357\254\225|\357\254\202|\341\277\227|\341\277\204|\316\220|\341\275\222|\341\276\236|\304\260|\341\276\213|\341\277\264|\341\276\202|\341\276\250|\341\272\236|\341\276\225|\357\254\226|\357\254\203|\341\276\237|\341\276\214|\341\276\262|\341\277\242|\341\276\274|\341\276\203|\341\276\251|\341\276\226|\357\254\227|\357\254\204|\341\277\206|\341\276\263|\341\272\226|\341\276\215|\307\260|\341\275\224|\341\276\240|\341\277\266|\341\277\243|\341\276\252|\341\276\227|\341\276\204|\357\254\205|\341\277\207|\341\276\264|\341\272\227|\341\276\216|\341\276\241|\341\277\267|\341\277\244|\316\260|\341\276\230|\341\276\205|\341\276\253|\357\254\206|\341\275\226|\341\276\242|\341\272\230|\341\276\217|\341\277\222|\341\276\231|\303\237|\341\276\206|\341\276\254|\341\276\266|\341\276\243|\341\272\231|\341\276\220|\341\277\246|\341\277\223|\341\276\207|\341\276\255|\341\276\232|\341\276\267|\305\211|\341\276\244|\341\272\232|\341\276\221|\341\277\247|\341\276\210|\341\276\256|\341\276\233|\341\276\222|\341\276\245|\357\254\223|\357\254\200|\341\277\202|\341\276\211|\341\276\257|\341\275\220|\341\276\234|\341\277\262|\341\277\214|\341\276\223|\341\276\200|\341\276\246|\357\254\224|\357\254\201|\341\277\274|\341\277\226|\341\277\203|\341\276\235|\341\276\212/
13
+ CASEFOLDING_REGEX_S = /\341\276\232|\341\276\217|\341\277\214|\341\276\253|\341\276\212|\341\276\274|\341\276\233|\341\272\236|\341\276\254|\341\276\213|\341\276\234|\341\276\255|\341\276\214|\341\276\250|\341\276\235|\341\276\256|\341\276\230|\341\276\215|\341\276\251|\341\276\236|\341\276\210|\341\277\274|\341\276\257|\341\276\231|\341\276\216|\341\276\252|\341\276\237|\341\276\211/
14
+ CASEFOLDING_REGEX_T = /\304\260|\111/
15
+
16
+ CASEFOLDING_HASH = {
17
+ 8065=>[7937, 953], 7782=>[7783], 990=>[991], 1415=>[1381, 1410], 7924=>[7925], 8490=>[107], 1132=>[1133], 8066=>[7938, 953],
18
+ 42875=>[42876], 425=>[643], 1274=>[1275], 42592=>[42593], 11462=>[11463], 8491=>[229], 284=>[285], 8067=>[7939, 953],
19
+ 7784=>[7785], 992=>[993], 1134=>[1135], 7926=>[7927], 8068=>[7940, 953], 42877=>[7545], 1276=>[1277], 42594=>[42595],
20
+ 11464=>[11465], 286=>[287], 8069=>[7941, 953], 7786=>[7787], 42878=>[42879], 428=>[429], 994=>[995], 570=>[11365],
21
+ 1136=>[1137], 7928=>[7929], 8070=>[7942, 953], 1278=>[1279], 42596=>[42597], 11466=>[11467], 571=>[572], 288=>[289],
22
+ 8071=>[7943, 953], 7788=>[7789], 42880=>[42881], 430=>[648], 996=>[997], 1138=>[1139], 7930=>[7931], 8072=>[8064],
23
+ 431=>[432], 1280=>[1281], 42598=>[42599], 11468=>[11469], 573=>[410], 290=>[291], 8073=>[8065], 7790=>[7791],
24
+ 42882=>[42883], 998=>[999], 574=>[11366], 8498=>[8526], 1140=>[1141], 7932=>[7933], 8074=>[8066], 433=>[650],
25
+ 1282=>[1283], 42600=>[42601], 11470=>[11471], 292=>[293], 8075=>[8067], 7792=>[7793], 42884=>[42885], 434=>[651],
26
+ 1000=>[1001], 1142=>[1143], 7934=>[7935], 8076=>[8068], 435=>[436], 1284=>[1285], 42602=>[42603], 11472=>[11473],
27
+ 4256=>[11520], 577=>[578], 294=>[295], 8077=>[8069], 42886=>[42887], 1002=>[1003], 7794=>[7795], 1144=>[1145],
28
+ 4257=>[11521], 8078=>[8070], 437=>[438], 1286=>[1287], 42604=>[42605], 11474=>[11475], 579=>[384], 296=>[297],
29
+ 4258=>[11522], 8079=>[8071], 1004=>[1005], 7796=>[7797], 580=>[649], 1146=>[1147], 4259=>[11523], 8080=>[7968, 953],
30
+ 439=>[658], 1288=>[1289], 11476=>[11477], 64256=>[102, 102], 581=>[652], 298=>[299], 4260=>[11524], 8081=>[7969, 953],
31
+ 440=>[441], 1006=>[1007], 7798=>[7799], 64257=>[102, 105], 582=>[583], 1148=>[1149], 4261=>[11525], 8082=>[7970, 953],
32
+ 42891=>[42892], 1290=>[1291], 11478=>[11479], 64258=>[102, 108], 300=>[301], 4262=>[11526], 8083=>[7971, 953], 1008=>[954],
33
+ 7800=>[7801], 64259=>[102, 102, 105], 584=>[585], 1150=>[1151], 4263=>[11527], 8084=>[7972, 953], 42893=>[613], 1292=>[1293],
34
+ 1009=>[961], 11480=>[11481], 64260=>[102, 102, 108], 302=>[303], 4264=>[11528], 8085=>[7973, 953], 444=>[445], 7802=>[7803],
35
+ 64261=>[115, 116], 586=>[587], 1152=>[1153], 7944=>[7936], 4265=>[11529], 8086=>[7974, 953], 1294=>[1295], 11482=>[11483],
36
+ 64262=>[115, 116], 304=>[105, 775], 7945=>[7937], 4266=>[11530], 8087=>[7975, 953], 42896=>[42897], 1012=>[952], 7804=>[7805],
37
+ 588=>[589], 7946=>[7938], 4267=>[11531], 8088=>[8080], 1296=>[1297], 1013=>[949], 11484=>[11485], 306=>[307],
38
+ 7947=>[7939], 4268=>[11532], 8089=>[8081], 42898=>[42899], 7806=>[7807], 7948=>[7940], 4269=>[11533], 590=>[591],
39
+ 8090=>[8082], 1298=>[1299], 1015=>[1016], 11486=>[11487], 308=>[309], 7949=>[7941], 4270=>[11534], 8091=>[8083],
40
+ 7808=>[7809], 7950=>[7942], 4271=>[11535], 8092=>[8084], 1300=>[1301], 1017=>[1010], 11488=>[11489], 310=>[311],
41
+ 7951=>[7943], 4272=>[11536], 8093=>[8085], 452=>[454], 1018=>[1019], 7810=>[7811], 4273=>[11537], 8094=>[8086],
42
+ 453=>[454], 1302=>[1303], 11490=>[11491], 4274=>[11538], 8095=>[8087], 7812=>[7813], 313=>[314], 1162=>[1163],
43
+ 4275=>[11539], 8096=>[8032, 953], 455=>[457], 1304=>[1305], 1021=>[891], 880=>[881], 4276=>[11540], 8097=>[8033, 953],
44
+ 456=>[457], 1022=>[892], 7814=>[7815], 315=>[316], 1164=>[1165], 4277=>[11541], 8098=>[8034, 953], 1306=>[1307],
45
+ 42624=>[42625], 1023=>[893], 882=>[883], 4278=>[11542], 8099=>[8035, 953], 1024=>[1104], 7816=>[7817], 458=>[460],
46
+ 64275=>[1396, 1398], 317=>[318], 1166=>[1167], 4279=>[11543], 8100=>[8036, 953], 1308=>[1309], 42626=>[42627], 1025=>[1105],
47
+ 459=>[460], 64276=>[1396, 1381], 4280=>[11544], 8101=>[8037, 953], 1026=>[1106], 7818=>[7819], 64277=>[1396, 1387], 319=>[320],
48
+ 1168=>[1169], 7960=>[7952], 4281=>[11545], 8102=>[8038, 953], 42628=>[42629], 1027=>[1107], 461=>[462], 1310=>[1311],
49
+ 64278=>[1406, 1398], 7961=>[7953], 886=>[887], 4282=>[11546], 8103=>[8039, 953], 1028=>[1108], 11499=>[11500], 7820=>[7821],
50
+ 42912=>[42913], 64279=>[1396, 1389], 321=>[322], 1170=>[1171], 7962=>[7954], 4283=>[11547], 8104=>[8096], 42630=>[42631],
51
+ 1029=>[1109], 463=>[464], 1312=>[1313], 7963=>[7955], 4284=>[11548], 7680=>[7681], 8105=>[8097], 181=>[956],
52
+ 1030=>[1110], 11501=>[11502], 7822=>[7823], 42914=>[42915], 323=>[324], 1172=>[1173], 7964=>[7956], 11360=>[11361],
53
+ 4285=>[11549], 8106=>[8098], 42632=>[42633], 1031=>[1111], 465=>[466], 1314=>[1315], 7965=>[7957], 4286=>[11550],
54
+ 7682=>[7683], 8107=>[8099], 1032=>[1112], 7824=>[7825], 42916=>[42917], 325=>[326], 1174=>[1175], 11362=>[619],
55
+ 4287=>[11551], 8108=>[8100], 42634=>[42635], 1033=>[1113], 467=>[468], 1316=>[1317], 11363=>[7549], 4288=>[11552],
56
+ 7684=>[7685], 8109=>[8101], 1034=>[1114], 7826=>[7827], 42918=>[42919], 1176=>[1177], 11364=>[637], 4289=>[11553],
57
+ 327=>[328], 8110=>[8102], 42636=>[42637], 1035=>[1115], 11506=>[11507], 469=>[470], 1318=>[1319], 4290=>[11554],
58
+ 7686=>[7687], 8111=>[8103], 1036=>[1116], 7828=>[7829], 42920=>[42921], 329=>[700, 110], 1178=>[1179], 4291=>[11555],
59
+ 42638=>[42639], 1037=>[1117], 471=>[472], 11367=>[11368], 4292=>[11556], 7688=>[7689], 330=>[331], 7830=>[104, 817],
60
+ 1038=>[1118], 42922=>[614], 4293=>[11557], 1180=>[1181], 7831=>[116, 776], 8114=>[8048, 953], 1039=>[1119], 473=>[474],
61
+ 42640=>[42641], 11369=>[11370], 7690=>[7691], 332=>[333], 7832=>[119, 778], 8115=>[945, 953], 1040=>[1072], 4295=>[11559],
62
+ 1182=>[1183], 7833=>[121, 778], 8116=>[940, 953], 192=>[224], 1041=>[1073], 475=>[476], 42642=>[42643], 11371=>[11372],
63
+ 7692=>[7693], 334=>[335], 7834=>[97, 702], 193=>[225], 1042=>[1074], 7976=>[7968], 1184=>[1185], 8118=>[945, 834],
64
+ 194=>[226], 1043=>[1075], 7835=>[7777], 42644=>[42645], 7977=>[7969], 902=>[940], 11373=>[593], 7694=>[7695],
65
+ 42786=>[42787], 336=>[337], 8119=>[945, 834, 953], 195=>[227], 1044=>[1076], 478=>[479], 7978=>[7970], 11374=>[625],
66
+ 8544=>[8560], 1186=>[1187], 8120=>[8112], 1045=>[1077], 42646=>[42647], 196=>[228], 7979=>[7971], 904=>[941],
67
+ 11375=>[592], 7696=>[7697], 8545=>[8561], 42788=>[42789], 66560=>[66600], 338=>[339], 7838=>[223], 8121=>[8113],
68
+ 1046=>[1078], 480=>[481], 1329=>[1377], 197=>[229], 7980=>[7972], 905=>[942], 11376=>[594], 4301=>[11565],
69
+ 8546=>[8562], 66561=>[66601], 1188=>[1189], 1047=>[1079], 1330=>[1378], 198=>[230], 8122=>[8048], 7981=>[7973],
70
+ 906=>[943], 7698=>[7699], 8547=>[8563], 42790=>[42791], 66562=>[66602], 340=>[341], 7840=>[7841], 482=>[483],
71
+ 1331=>[1379], 199=>[231], 8123=>[8049], 1048=>[1080], 7982=>[7974], 11378=>[11379], 8548=>[8564], 66563=>[66603],
72
+ 1190=>[1191], 8124=>[8115], 1332=>[1380], 200=>[232], 1049=>[1081], 7983=>[7975], 908=>[972], 7700=>[7701],
73
+ 8549=>[8565], 9398=>[9424], 42792=>[42793], 66564=>[66604], 342=>[343], 7842=>[7843], 484=>[485], 1333=>[1381],
74
+ 201=>[233], 1050=>[1082], 8550=>[8566], 9399=>[9425], 66565=>[66605], 1192=>[1193], 1334=>[1382], 202=>[234],
75
+ 8126=>[953], 1051=>[1083], 910=>[973], 11381=>[11382], 7702=>[7703], 8551=>[8567], 9400=>[9426], 42794=>[42795],
76
+ 66566=>[66606], 344=>[345], 7844=>[7845], 486=>[487], 1335=>[1383], 203=>[235], 1052=>[1084], 911=>[974],
77
+ 8552=>[8568], 9401=>[9427], 66567=>[66607], 1194=>[1195], 1336=>[1384], 204=>[236], 1053=>[1085], 912=>[953, 776, 769],
78
+ 7704=>[7705], 8553=>[8569], 9402=>[9428], 42796=>[42797], 66568=>[66608], 346=>[347], 7846=>[7847], 488=>[489],
79
+ 1337=>[1385], 205=>[237], 1054=>[1086], 913=>[945], 8554=>[8570], 9403=>[9429], 66569=>[66609], 1196=>[1197],
80
+ 8130=>[8052, 953], 1338=>[1386], 206=>[238], 1055=>[1087], 914=>[946], 7706=>[7707], 8555=>[8571], 9404=>[9430],
81
+ 42798=>[42799], 66570=>[66610], 348=>[349], 65=>[97], 8131=>[951, 953], 7848=>[7849], 490=>[491], 1339=>[1387],
82
+ 207=>[239], 1056=>[1088], 915=>[947], 8556=>[8572], 9405=>[9431], 66571=>[66611], 1198=>[1199], 66=>[98],
83
+ 8132=>[942, 953], 1340=>[1388], 208=>[240], 1057=>[1089], 916=>[948], 7708=>[7709], 8557=>[8573], 9406=>[9432],
84
+ 66572=>[66612], 350=>[351], 67=>[99], 7850=>[7851], 492=>[493], 1341=>[1389], 209=>[241], 1058=>[1090],
85
+ 8558=>[8574], 9407=>[9433], 66573=>[66613], 1200=>[1201], 68=>[100], 7992=>[7984], 917=>[949], 8134=>[951, 834],
86
+ 1342=>[1390], 210=>[242], 1059=>[1091], 7710=>[7711], 8559=>[8575], 9408=>[9434], 42802=>[42803], 66574=>[66614],
87
+ 352=>[353], 69=>[101], 7993=>[7985], 918=>[950], 8135=>[951, 834, 953], 7852=>[7853], 494=>[495], 1343=>[1391],
88
+ 211=>[243], 1060=>[1092], 11390=>[575], 9409=>[9435], 66575=>[66615], 1202=>[1203], 70=>[102], 7994=>[7986],
89
+ 919=>[951], 1344=>[1392], 212=>[244], 8136=>[8050], 1061=>[1093], 11391=>[576], 7712=>[7713], 9410=>[9436],
90
+ 42804=>[42805], 66576=>[66616], 354=>[355], 71=>[103], 7995=>[7987], 920=>[952], 496=>[106, 780], 7854=>[7855],
91
+ 1345=>[1393], 213=>[245], 8137=>[8051], 1062=>[1094], 11392=>[11393], 9411=>[9437], 66577=>[66617], 1204=>[1205],
92
+ 72=>[104], 7996=>[7988], 921=>[953], 497=>[499], 1346=>[1394], 214=>[246], 8138=>[8052], 1063=>[1095],
93
+ 7714=>[7715], 9412=>[9438], 42806=>[42807], 66578=>[66618], 356=>[357], 73=>[105], 7997=>[7989], 922=>[954],
94
+ 7856=>[7857], 498=>[499], 1347=>[1395], 8139=>[8053], 1064=>[1096], 11394=>[11395], 9413=>[9439], 66579=>[66619],
95
+ 1206=>[1207], 74=>[106], 7998=>[7990], 923=>[955], 8140=>[8131], 1348=>[1396], 216=>[248], 1065=>[1097],
96
+ 7716=>[7717], 9414=>[9440], 42808=>[42809], 66580=>[66620], 358=>[359], 75=>[107], 7999=>[7991], 924=>[956],
97
+ 7858=>[7859], 500=>[501], 1349=>[1397], 217=>[249], 1066=>[1098], 11396=>[11397], 9415=>[9441], 66581=>[66621],
98
+ 1208=>[1209], 76=>[108], 925=>[957], 1350=>[1398], 218=>[250], 1067=>[1099], 7718=>[7719], 9416=>[9442],
99
+ 42810=>[42811], 66582=>[66622], 360=>[361], 77=>[109], 926=>[958], 502=>[405], 1351=>[1399], 219=>[251],
100
+ 1068=>[1100], 7860=>[7861], 9417=>[9443], 66583=>[66623], 1210=>[1211], 78=>[110], 927=>[959], 11398=>[11399],
101
+ 503=>[447], 1352=>[1400], 220=>[252], 1069=>[1101], 7720=>[7721], 9418=>[9444], 42812=>[42813], 66584=>[66624],
102
+ 362=>[363], 79=>[111], 928=>[960], 504=>[505], 1353=>[1401], 221=>[253], 1070=>[1102], 7862=>[7863],
103
+ 9419=>[9445], 66585=>[66625], 1212=>[1213], 80=>[112], 929=>[961], 11400=>[11401], 8146=>[953, 776, 768], 1354=>[1402],
104
+ 222=>[254], 1071=>[1103], 7722=>[7723], 9420=>[9446], 42814=>[42815], 66586=>[66626], 364=>[365], 81=>[113],
105
+ 8147=>[953, 776, 769], 223=>[115, 115], 506=>[507], 1355=>[1403], 65313=>[65345], 7864=>[7865], 9421=>[9447], 66587=>[66627],
106
+ 1214=>[1215], 82=>[114], 931=>[963], 11402=>[11403], 1356=>[1404], 65314=>[65346], 7724=>[7725], 9422=>[9448],
107
+ 42816=>[42817], 66588=>[66628], 366=>[367], 83=>[115], 932=>[964], 508=>[509], 1357=>[1405], 65315=>[65347],
108
+ 7866=>[7867], 9423=>[9449], 66589=>[66629], 1216=>[1231], 84=>[116], 8008=>[8000], 933=>[965], 11404=>[11405],
109
+ 8150=>[953, 834], 1358=>[1406], 65316=>[65348], 7726=>[7727], 42818=>[42819], 66590=>[66630], 368=>[369], 1217=>[1218],
110
+ 85=>[117], 8009=>[8001], 934=>[966], 8151=>[953, 776, 834], 11264=>[11312], 510=>[511], 1359=>[1407], 65317=>[65349],
111
+ 7868=>[7869], 66591=>[66631], 86=>[118], 8010=>[8002], 935=>[967], 11406=>[11407], 11265=>[11313], 1360=>[1408],
112
+ 8152=>[8144], 65318=>[65350], 7728=>[7729], 42820=>[42821], 66592=>[66632], 370=>[371], 1219=>[1220], 87=>[119],
113
+ 8011=>[8003], 936=>[968], 512=>[513], 1361=>[1409], 8153=>[8145], 65319=>[65351], 7870=>[7871], 11266=>[11314],
114
+ 66593=>[66633], 88=>[120], 8012=>[8004], 937=>[969], 11408=>[11409], 1362=>[1410], 8154=>[8054], 65320=>[65352],
115
+ 11267=>[11315], 8579=>[8580], 42822=>[42823], 66594=>[66634], 372=>[373], 1221=>[1222], 89=>[121], 8013=>[8005],
116
+ 938=>[970], 7730=>[7731], 514=>[515], 1363=>[1411], 8155=>[8055], 65321=>[65353], 7872=>[7873], 11268=>[11316],
117
+ 66595=>[66635], 90=>[122], 939=>[971], 11410=>[11411], 1364=>[1412], 65322=>[65354], 11269=>[11317], 42824=>[42825],
118
+ 66596=>[66636], 374=>[375], 1223=>[1224], 7732=>[7733], 516=>[517], 1365=>[1413], 65323=>[65355], 7874=>[7875],
119
+ 11270=>[11318], 8016=>[965, 787], 66597=>[66637], 11412=>[11413], 1366=>[1414], 65324=>[65356], 11271=>[11319], 42826=>[42827],
120
+ 66598=>[66638], 376=>[255], 1225=>[1226], 7734=>[7735], 518=>[519], 65325=>[65357], 7876=>[7877], 11272=>[11320],
121
+ 8018=>[965, 787, 768], 66599=>[66639], 377=>[378], 11414=>[11415], 65326=>[65358], 11273=>[11321], 944=>[965, 776, 769], 42828=>[42829],
122
+ 1227=>[1228], 7736=>[7737], 520=>[521], 65327=>[65359], 7878=>[7879], 11274=>[11322], 8020=>[965, 787, 769], 379=>[380],
123
+ 11416=>[11417], 8162=>[965, 776, 768], 65328=>[65360], 11275=>[11323], 42830=>[42831], 1229=>[1230], 7738=>[7739], 8163=>[965, 776, 769],
124
+ 522=>[523], 65329=>[65361], 7880=>[7881], 11276=>[11324], 8022=>[965, 787, 834], 381=>[382], 11418=>[11419], 8164=>[961, 787],
125
+ 65330=>[65362], 11277=>[11325], 42832=>[42833], 7740=>[7741], 65331=>[65363], 7882=>[7883], 11278=>[11326], 524=>[525],
126
+ 383=>[115], 1232=>[1233], 11420=>[11421], 8166=>[965, 834], 65332=>[65364], 11279=>[11327], 42834=>[42835], 8025=>[8017],
127
+ 7742=>[7743], 8167=>[965, 776, 834], 526=>[527], 11280=>[11328], 7884=>[7885], 65333=>[65365], 1234=>[1235], 385=>[595],
128
+ 11422=>[11423], 11281=>[11329], 8168=>[8160], 65334=>[65366], 7744=>[7745], 8027=>[8019], 386=>[387], 42836=>[42837],
129
+ 528=>[529], 11282=>[11330], 7886=>[7887], 8169=>[8161], 65335=>[65367], 1236=>[1237], 11424=>[11425], 11283=>[11331],
130
+ 8170=>[8058], 65336=>[65368], 7746=>[7747], 8029=>[8021], 388=>[389], 42838=>[42839], 530=>[531], 11284=>[11332],
131
+ 7888=>[7889], 8171=>[8059], 65337=>[65369], 1238=>[1239], 11426=>[11427], 11285=>[11333], 8172=>[8165], 65338=>[65370],
132
+ 7748=>[7749], 8031=>[8023], 390=>[596], 42840=>[42841], 532=>[533], 11286=>[11334], 7890=>[7891], 1240=>[1241],
133
+ 391=>[392], 11428=>[11429], 11287=>[11335], 7750=>[7751], 42842=>[42843], 534=>[535], 11288=>[11336], 7892=>[7893],
134
+ 393=>[598], 1242=>[1243], 11430=>[11431], 42560=>[42561], 11289=>[11337], 394=>[599], 7752=>[7753], 42844=>[42845],
135
+ 536=>[537], 11290=>[11338], 7894=>[7895], 1244=>[1245], 395=>[396], 11432=>[11433], 42562=>[42563], 8178=>[8060, 953],
136
+ 11291=>[11339], 7754=>[7755], 962=>[963], 42846=>[42847], 8179=>[969, 953], 538=>[539], 11292=>[11340], 7896=>[7897],
137
+ 1246=>[1247], 11434=>[11435], 42564=>[42565], 8180=>[974, 953], 11293=>[11341], 256=>[257], 398=>[477], 7756=>[7757],
138
+ 42848=>[42849], 540=>[541], 11294=>[11342], 7898=>[7899], 1248=>[1249], 399=>[601], 8040=>[8032], 11436=>[11437],
139
+ 42566=>[42567], 8182=>[969, 834], 258=>[259], 11295=>[11343], 400=>[603], 7758=>[7759], 8041=>[8033], 42850=>[42851],
140
+ 8183=>[969, 834, 953], 542=>[543], 7900=>[7901], 11296=>[11344], 1250=>[1251], 401=>[402], 8042=>[8034], 11438=>[11439],
141
+ 42568=>[42569], 8184=>[8056], 260=>[261], 11297=>[11345], 7760=>[7761], 8043=>[8035], 42852=>[42853], 544=>[414],
142
+ 7902=>[7903], 8185=>[8057], 11298=>[11346], 1252=>[1253], 403=>[608], 8044=>[8036], 11440=>[11441], 42570=>[42571],
143
+ 262=>[263], 8186=>[8060], 11299=>[11347], 404=>[611], 7762=>[7763], 8045=>[8037], 42854=>[42855], 8187=>[8061],
144
+ 546=>[547], 7904=>[7905], 11300=>[11348], 1254=>[1255], 8046=>[8038], 11442=>[11443], 42572=>[42573], 8188=>[8179],
145
+ 264=>[265], 11301=>[11349], 406=>[617], 7764=>[7765], 8047=>[8039], 42856=>[42857], 548=>[549], 7906=>[7907],
146
+ 11302=>[11350], 1256=>[1257], 407=>[616], 11444=>[11445], 42574=>[42575], 266=>[267], 11303=>[11351], 408=>[409],
147
+ 7766=>[7767], 42858=>[42859], 550=>[551], 7908=>[7909], 11304=>[11352], 1258=>[1259], 975=>[983], 11446=>[11447],
148
+ 42576=>[42577], 268=>[269], 11305=>[11353], 7768=>[7769], 976=>[946], 42860=>[42861], 552=>[553], 7910=>[7911],
149
+ 11306=>[11354], 1260=>[1261], 977=>[952], 11448=>[11449], 42578=>[42579], 270=>[271], 11307=>[11355], 412=>[623],
150
+ 7770=>[7771], 42862=>[42863], 1120=>[1121], 554=>[555], 837=>[953], 7912=>[7913], 11308=>[11356], 1262=>[1263],
151
+ 413=>[626], 11450=>[11451], 42580=>[42581], 272=>[273], 11309=>[11357], 7772=>[7773], 1122=>[1123], 556=>[557],
152
+ 7914=>[7915], 11310=>[11358], 1264=>[1265], 415=>[629], 981=>[966], 11452=>[11453], 42582=>[42583], 274=>[275],
153
+ 982=>[960], 416=>[417], 7774=>[7775], 1124=>[1125], 558=>[559], 7916=>[7917], 1266=>[1267], 11454=>[11455],
154
+ 42584=>[42585], 276=>[277], 984=>[985], 418=>[419], 7776=>[7777], 1126=>[1127], 560=>[561], 7918=>[7919],
155
+ 1268=>[1269], 11456=>[11457], 42586=>[42587], 278=>[279], 986=>[987], 420=>[421], 7778=>[7779], 1128=>[1129],
156
+ 8486=>[969], 562=>[563], 7920=>[7921], 1270=>[1271], 11458=>[11459], 42588=>[42589], 280=>[281], 988=>[989],
157
+ 422=>[640], 7780=>[7781], 1130=>[1131], 7922=>[7923], 8064=>[7936, 953], 1272=>[1273], 423=>[424], 11460=>[11461],
158
+ 42590=>[42591], 42873=>[42874], 282=>[283]
159
+ }
160
+ CASEFOLDING_HASH_T = CASEFOLDING_HASH.merge(
161
+ {
162
+ 73=>[305], 304=>[105]
163
+ }
164
+ )
165
+
166
+ def simple_casefold(str, t = false)
167
+ perform_casefold(str, simple_casefold_regex, t)
168
+ end
169
+
170
+ def full_casefold(str, t = false)
171
+ perform_casefold(str, full_casefold_regex, t)
172
+ end
173
+
174
+ alias :casefold :full_casefold
175
+
176
+ def common_casefold(str)
177
+ perform_casefold(str, CASEFOLDING_REGEX_C, false)
178
+ end
179
+
180
+ private
181
+
182
+ def perform_casefold(str, regex, t)
183
+ regex = regex_with_t(regex) if t
184
+ casefolding_hash = t ? CASEFOLDING_HASH_T : CASEFOLDING_HASH
185
+
186
+ str.gsub(regex) do |s|
187
+ s.unpack("U*").inject([]) do |ret, ss|
188
+ ret + casefolding_hash[ss]
189
+ end.pack("U*")
190
+ end
191
+ end
192
+
193
+ def simple_casefold_regex
194
+ @simple_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_S)
195
+ end
196
+
197
+ def full_casefold_regex
198
+ @full_casefold_regex ||= Regexp.union(CASEFOLDING_REGEX_C, CASEFOLDING_REGEX_F)
199
+ end
200
+
201
+ def regex_with_t(regex)
202
+ @regex_with_t_cache ||= {}
203
+ @regex_with_t_cache[regex.source] ||=
204
+ Regexp.union(regex, CASEFOLDING_REGEX_T)
205
+ end
206
+
207
+ end
208
+ end
209
+ end
210
+ end