twitter_cldr 4.0.0 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (308) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -2
  3. data/README.md +18 -2
  4. data/Rakefile +39 -122
  5. data/lib/twitter_cldr.rb +3 -0
  6. data/lib/twitter_cldr/formatters/numbers/rbnf.rb +5 -1
  7. data/lib/twitter_cldr/resources.rb +86 -5
  8. data/lib/twitter_cldr/resources/bidi_test_importer.rb +50 -44
  9. data/lib/twitter_cldr/resources/casefolder_class_generator.rb +22 -13
  10. data/lib/twitter_cldr/resources/collation_tries_importer.rb +44 -0
  11. data/lib/twitter_cldr/resources/hyphenation_importer.rb +16 -42
  12. data/lib/twitter_cldr/resources/import_resolver.rb +71 -0
  13. data/lib/twitter_cldr/resources/importer.rb +107 -0
  14. data/lib/twitter_cldr/resources/language_codes_importer.rb +35 -38
  15. data/lib/twitter_cldr/resources/loader.rb +3 -3
  16. data/lib/twitter_cldr/resources/locales_resources_importer.rb +48 -35
  17. data/lib/twitter_cldr/resources/phone_codes_importer.rb +24 -23
  18. data/lib/twitter_cldr/resources/postal_codes_importer.rb +10 -11
  19. data/lib/twitter_cldr/resources/properties.rb +0 -4
  20. data/lib/twitter_cldr/resources/properties/age_property_importer.rb +13 -9
  21. data/lib/twitter_cldr/resources/properties/arabic_shaping_property_importer.rb +9 -11
  22. data/lib/twitter_cldr/resources/properties/bidi_brackets_property_importer.rb +11 -9
  23. data/lib/twitter_cldr/resources/properties/blocks_property_importer.rb +13 -9
  24. data/lib/twitter_cldr/resources/properties/derived_core_properties_importer.rb +9 -11
  25. data/lib/twitter_cldr/resources/properties/east_asian_width_property_importer.rb +13 -9
  26. data/lib/twitter_cldr/resources/properties/grapheme_break_property_importer.rb +13 -9
  27. data/lib/twitter_cldr/resources/properties/hangul_syllable_type_property_importer.rb +13 -9
  28. data/lib/twitter_cldr/resources/properties/indic_positional_category_property_importer.rb +13 -9
  29. data/lib/twitter_cldr/resources/properties/indic_syllabic_category_property_importer.rb +13 -9
  30. data/lib/twitter_cldr/resources/properties/jamo_property_importer.rb +13 -9
  31. data/lib/twitter_cldr/resources/properties/line_break_property_importer.rb +13 -9
  32. data/lib/twitter_cldr/resources/properties/prop_list_importer.rb +9 -11
  33. data/lib/twitter_cldr/resources/properties/property_importer.rb +13 -22
  34. data/lib/twitter_cldr/resources/properties/script_extensions_property_importer.rb +12 -10
  35. data/lib/twitter_cldr/resources/properties/script_property_importer.rb +13 -9
  36. data/lib/twitter_cldr/resources/properties/sentence_break_property_importer.rb +13 -9
  37. data/lib/twitter_cldr/resources/properties/unicode_data_properties_importer.rb +11 -9
  38. data/lib/twitter_cldr/resources/properties/word_break_property_importer.rb +13 -9
  39. data/lib/twitter_cldr/resources/rbnf_test_importer.rb +41 -38
  40. data/lib/twitter_cldr/resources/readme_renderer.rb +1 -2
  41. data/lib/twitter_cldr/resources/requirements.rb +18 -0
  42. data/lib/twitter_cldr/resources/requirements/cldr_requirement.rb +66 -0
  43. data/lib/twitter_cldr/resources/requirements/dependency_requirement.rb +23 -0
  44. data/lib/twitter_cldr/resources/requirements/git_requirement.rb +66 -0
  45. data/lib/twitter_cldr/resources/requirements/icu_requirement.rb +111 -0
  46. data/lib/twitter_cldr/resources/requirements/unicode_requirement.rb +51 -0
  47. data/lib/twitter_cldr/resources/segment_tests_importer.rb +15 -30
  48. data/lib/twitter_cldr/resources/tailoring_importer.rb +33 -26
  49. data/lib/twitter_cldr/resources/transform_test_importer.rb +15 -17
  50. data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +29 -17
  51. data/lib/twitter_cldr/resources/unicode_data_importer.rb +38 -31
  52. data/lib/twitter_cldr/resources/unicode_file_parser.rb +37 -0
  53. data/lib/twitter_cldr/resources/unicode_property_aliases_importer.rb +23 -27
  54. data/lib/twitter_cldr/shared/casefolder.rb +139 -115
  55. data/lib/twitter_cldr/version.rb +1 -1
  56. data/lib/twitter_cldr/versions.rb +0 -4
  57. data/resources/collation/tailoring/bo.yml +4 -0
  58. data/resources/collation/tries/bo.dump +0 -0
  59. data/resources/locales/bo/calendars.yml +247 -0
  60. data/resources/locales/bo/currencies.yml +208 -0
  61. data/resources/locales/bo/fields.yml +31 -0
  62. data/resources/locales/bo/languages.yml +24 -0
  63. data/resources/locales/bo/layout.yml +5 -0
  64. data/resources/locales/bo/lists.yml +12 -0
  65. data/resources/locales/bo/numbers.yml +111 -0
  66. data/resources/locales/bo/plural_rules.yml +6 -0
  67. data/resources/locales/bo/plurals.yml +12 -0
  68. data/resources/locales/bo/territories.yml +14 -0
  69. data/resources/locales/bo/units.yml +283 -0
  70. data/resources/shared/transforms/Arab-Latn.yml +109 -0
  71. data/resources/shared/transforms/Beng-Deva.yml +13 -0
  72. data/resources/shared/transforms/Beng-Gujr.yml +13 -0
  73. data/resources/shared/transforms/Beng-Guru.yml +13 -0
  74. data/resources/shared/transforms/Beng-Knda.yml +13 -0
  75. data/resources/shared/transforms/Beng-Latn.yml +13 -0
  76. data/resources/shared/transforms/Beng-Mlym.yml +13 -0
  77. data/resources/shared/transforms/Beng-Orya.yml +13 -0
  78. data/resources/shared/transforms/Beng-Taml.yml +13 -0
  79. data/resources/shared/transforms/Beng-Telu.yml +13 -0
  80. data/resources/shared/transforms/Cyrl-Latn.yml +128 -0
  81. data/resources/shared/transforms/Deva-Beng.yml +13 -0
  82. data/resources/shared/transforms/Deva-Gujr.yml +13 -0
  83. data/resources/shared/transforms/Deva-Guru.yml +13 -0
  84. data/resources/shared/transforms/Deva-Knda.yml +13 -0
  85. data/resources/shared/transforms/Deva-Latn.yml +13 -0
  86. data/resources/shared/transforms/Deva-Mlym.yml +13 -0
  87. data/resources/shared/transforms/Deva-Orya.yml +13 -0
  88. data/resources/shared/transforms/Deva-Taml.yml +13 -0
  89. data/resources/shared/transforms/Deva-Telu.yml +13 -0
  90. data/resources/shared/transforms/Geor-Latn.yml +43 -0
  91. data/resources/shared/transforms/Grek-Latn-UNGEGN.yml +160 -0
  92. data/resources/shared/transforms/Grek-Latn.yml +206 -0
  93. data/resources/shared/transforms/Gujr-Beng.yml +13 -0
  94. data/resources/shared/transforms/Gujr-Deva.yml +13 -0
  95. data/resources/shared/transforms/Gujr-Guru.yml +13 -0
  96. data/resources/shared/transforms/Gujr-Knda.yml +13 -0
  97. data/resources/shared/transforms/Gujr-Latn.yml +13 -0
  98. data/resources/shared/transforms/Gujr-Mlym.yml +13 -0
  99. data/resources/shared/transforms/Gujr-Orya.yml +13 -0
  100. data/resources/shared/transforms/Gujr-Taml.yml +13 -0
  101. data/resources/shared/transforms/Gujr-Telu.yml +13 -0
  102. data/resources/shared/transforms/Guru-Beng.yml +13 -0
  103. data/resources/shared/transforms/Guru-Deva.yml +13 -0
  104. data/resources/shared/transforms/Guru-Gujr.yml +13 -0
  105. data/resources/shared/transforms/Guru-Knda.yml +13 -0
  106. data/resources/shared/transforms/Guru-Latn.yml +13 -0
  107. data/resources/shared/transforms/Guru-Mlym.yml +13 -0
  108. data/resources/shared/transforms/Guru-Orya.yml +13 -0
  109. data/resources/shared/transforms/Guru-Taml.yml +13 -0
  110. data/resources/shared/transforms/Guru-Telu.yml +13 -0
  111. data/resources/shared/transforms/Han-Spacedhan.yml +1 -1
  112. data/resources/shared/transforms/Hang-Latn.yml +12 -0
  113. data/resources/shared/transforms/Hani-Latn.yml +1605 -0
  114. data/resources/shared/transforms/Hans-Hant.yml +3982 -0
  115. data/resources/shared/transforms/Hebr-Latn.yml +72 -0
  116. data/resources/shared/transforms/Hira-Kana.yml +114 -0
  117. data/resources/shared/transforms/Hira-Latn.yml +15 -0
  118. data/resources/shared/transforms/InterIndic-Latin.yml +2 -2
  119. data/resources/shared/transforms/Jamo-Latn.yml +12 -0
  120. data/resources/shared/transforms/Knda-Beng.yml +13 -0
  121. data/resources/shared/transforms/Knda-Deva.yml +13 -0
  122. data/resources/shared/transforms/Knda-Gujr.yml +13 -0
  123. data/resources/shared/transforms/Knda-Guru.yml +13 -0
  124. data/resources/shared/transforms/Knda-Latn.yml +13 -0
  125. data/resources/shared/transforms/Knda-Mlym.yml +13 -0
  126. data/resources/shared/transforms/Knda-Orya.yml +13 -0
  127. data/resources/shared/transforms/Knda-Taml.yml +13 -0
  128. data/resources/shared/transforms/Knda-Telu.yml +13 -0
  129. data/resources/shared/transforms/Latin-ASCII.yml +16 -1
  130. data/resources/shared/transforms/Latin-InterIndic.yml +2 -2
  131. data/resources/shared/transforms/Latn-Armn.yml +90 -0
  132. data/resources/shared/transforms/Latn-Beng.yml +14 -0
  133. data/resources/shared/transforms/Latn-Bopo.yml +1336 -0
  134. data/resources/shared/transforms/Latn-Cans.yml +190 -0
  135. data/resources/shared/transforms/Latn-Deva.yml +14 -0
  136. data/resources/shared/transforms/Latn-Ethi.yml +278 -0
  137. data/resources/shared/transforms/Latn-Gujr.yml +14 -0
  138. data/resources/shared/transforms/Latn-Guru.yml +14 -0
  139. data/resources/shared/transforms/Latn-Hang.yml +13 -0
  140. data/resources/shared/transforms/Latn-Jamo.yml +13 -0
  141. data/resources/shared/transforms/Latn-Kana.yml +274 -0
  142. data/resources/shared/transforms/Latn-Knda.yml +14 -0
  143. data/resources/shared/transforms/Latn-Mlym.yml +14 -0
  144. data/resources/shared/transforms/Latn-Orya.yml +14 -0
  145. data/resources/shared/transforms/Latn-Taml.yml +14 -0
  146. data/resources/shared/transforms/Latn-Telu.yml +14 -0
  147. data/resources/shared/transforms/Latn-Thaa.yml +439 -0
  148. data/resources/shared/transforms/Latn-Thai.yml +13 -0
  149. data/resources/shared/transforms/Mlym-Beng.yml +13 -0
  150. data/resources/shared/transforms/Mlym-Deva.yml +13 -0
  151. data/resources/shared/transforms/Mlym-Gujr.yml +13 -0
  152. data/resources/shared/transforms/Mlym-Guru.yml +13 -0
  153. data/resources/shared/transforms/Mlym-Knda.yml +13 -0
  154. data/resources/shared/transforms/Mlym-Latn.yml +13 -0
  155. data/resources/shared/transforms/Mlym-Orya.yml +13 -0
  156. data/resources/shared/transforms/Mlym-Taml.yml +13 -0
  157. data/resources/shared/transforms/Mlym-Telu.yml +13 -0
  158. data/resources/shared/transforms/Orya-Beng.yml +13 -0
  159. data/resources/shared/transforms/Orya-Deva.yml +13 -0
  160. data/resources/shared/transforms/Orya-Gujr.yml +13 -0
  161. data/resources/shared/transforms/Orya-Guru.yml +13 -0
  162. data/resources/shared/transforms/Orya-Knda.yml +13 -0
  163. data/resources/shared/transforms/Orya-Latn.yml +13 -0
  164. data/resources/shared/transforms/Orya-Mlym.yml +13 -0
  165. data/resources/shared/transforms/Orya-Taml.yml +13 -0
  166. data/resources/shared/transforms/Orya-Telu.yml +13 -0
  167. data/resources/shared/transforms/Syrc-Latn.yml +55 -0
  168. data/resources/shared/transforms/Taml-Beng.yml +13 -0
  169. data/resources/shared/transforms/Taml-Deva.yml +13 -0
  170. data/resources/shared/transforms/Taml-Gujr.yml +13 -0
  171. data/resources/shared/transforms/Taml-Guru.yml +13 -0
  172. data/resources/shared/transforms/Taml-Knda.yml +13 -0
  173. data/resources/shared/transforms/Taml-Latn.yml +13 -0
  174. data/resources/shared/transforms/Taml-Mlym.yml +13 -0
  175. data/resources/shared/transforms/Taml-Orya.yml +13 -0
  176. data/resources/shared/transforms/Taml-Telu.yml +13 -0
  177. data/resources/shared/transforms/Telu-Beng.yml +13 -0
  178. data/resources/shared/transforms/Telu-Deva.yml +13 -0
  179. data/resources/shared/transforms/Telu-Gujr.yml +13 -0
  180. data/resources/shared/transforms/Telu-Guru.yml +13 -0
  181. data/resources/shared/transforms/Telu-Knda.yml +13 -0
  182. data/resources/shared/transforms/Telu-Latn.yml +13 -0
  183. data/resources/shared/transforms/Telu-Mlym.yml +13 -0
  184. data/resources/shared/transforms/Telu-Orya.yml +13 -0
  185. data/resources/shared/transforms/Telu-Taml.yml +13 -0
  186. data/resources/shared/transforms/Thai-Latn.yml +15 -0
  187. data/resources/shared/transforms/am-am_FONIPA.yml +609 -0
  188. data/resources/shared/transforms/am-am_Latn-BGN.yml +336 -0
  189. data/resources/shared/transforms/am-ar.yml +11 -0
  190. data/resources/shared/transforms/am-fa.yml +10 -0
  191. data/resources/shared/transforms/ar-ar_Latn-BGN.yml +122 -0
  192. data/resources/shared/transforms/az_Cyrl-az-BGN.yml +93 -0
  193. data/resources/shared/transforms/be-be_Latn-BGN.yml +108 -0
  194. data/resources/shared/transforms/bg-bg_Latn-BGN.yml +99 -0
  195. data/resources/shared/transforms/ch-am.yml +10 -0
  196. data/resources/shared/transforms/ch-ar.yml +10 -0
  197. data/resources/shared/transforms/ch-ch_FONIPA.yml +0 -8
  198. data/resources/shared/transforms/ch-fa.yml +10 -0
  199. data/resources/shared/transforms/cs-am.yml +10 -0
  200. data/resources/shared/transforms/cs-ar.yml +10 -0
  201. data/resources/shared/transforms/cs-fa.yml +10 -0
  202. data/resources/shared/transforms/dsb-dsb_FONIPA.yml +0 -5
  203. data/resources/shared/transforms/dv-dv_Latn-BGN.yml +112 -0
  204. data/resources/shared/transforms/el-el_Latn-BGN.yml +208 -0
  205. data/resources/shared/transforms/eo-am.yml +10 -0
  206. data/resources/shared/transforms/eo-ar.yml +10 -0
  207. data/resources/shared/transforms/eo-eo_FONIPA.yml +52 -0
  208. data/resources/shared/transforms/eo-fa.yml +10 -0
  209. data/resources/shared/transforms/es-ar.yml +13 -0
  210. data/resources/shared/transforms/es-fa.yml +13 -0
  211. data/resources/shared/transforms/es_419-am.yml +11 -0
  212. data/resources/shared/transforms/es_419-ar.yml +14 -0
  213. data/resources/shared/transforms/es_419-fa.yml +14 -0
  214. data/resources/shared/transforms/fa-fa_Latn-BGN.yml +123 -0
  215. data/resources/shared/transforms/he-he_Latn-BGN.yml +62 -0
  216. data/resources/shared/transforms/hy-am.yml +10 -0
  217. data/resources/shared/transforms/hy-ar.yml +10 -0
  218. data/resources/shared/transforms/hy-fa.yml +10 -0
  219. data/resources/shared/transforms/hy-hy_FONIPA.yml +56 -0
  220. data/resources/shared/transforms/hy-hy_Latn-BGN.yml +133 -0
  221. data/resources/shared/transforms/hy_AREVMDA-am.yml +10 -0
  222. data/resources/shared/transforms/hy_AREVMDA-ar.yml +10 -0
  223. data/resources/shared/transforms/hy_AREVMDA-fa.yml +10 -0
  224. data/resources/shared/transforms/hy_AREVMDA-hy_AREVMDA_FONIPA.yml +82 -0
  225. data/resources/shared/transforms/ia-am.yml +10 -0
  226. data/resources/shared/transforms/ia-ar.yml +10 -0
  227. data/resources/shared/transforms/ia-fa.yml +10 -0
  228. data/resources/shared/transforms/ia-ia_FONIPA.yml +69 -0
  229. data/resources/shared/transforms/ja_Hrkt-ja_Latn-BGN.yml +310 -0
  230. data/resources/shared/transforms/ka-ka_Latn-BGN.yml +44 -0
  231. data/resources/shared/transforms/kk-am.yml +10 -0
  232. data/resources/shared/transforms/kk-ar.yml +10 -0
  233. data/resources/shared/transforms/kk-fa.yml +10 -0
  234. data/resources/shared/transforms/kk-kk_FONIPA.yml +53 -0
  235. data/resources/shared/transforms/kk-kk_Latn-BGN.yml +136 -0
  236. data/resources/shared/transforms/ko-ko_Latn-BGN.yml +282 -0
  237. data/resources/shared/transforms/ky-am.yml +10 -0
  238. data/resources/shared/transforms/ky-ar.yml +10 -0
  239. data/resources/shared/transforms/ky-fa.yml +10 -0
  240. data/resources/shared/transforms/ky-ky_FONIPA.yml +73 -0
  241. data/resources/shared/transforms/ky-ky_Latn-BGN.yml +107 -0
  242. data/resources/shared/transforms/la-la_FONIPA.yml +0 -8
  243. data/resources/shared/transforms/mk-mk_Latn-BGN.yml +89 -0
  244. data/resources/shared/transforms/mn-mn_Latn-BGN.yml +101 -0
  245. data/resources/shared/transforms/mn-mn_Latn-MNS.yml +89 -0
  246. data/resources/shared/transforms/my-am.yml +10 -0
  247. data/resources/shared/transforms/my-ar.yml +10 -0
  248. data/resources/shared/transforms/my-fa.yml +10 -0
  249. data/resources/shared/transforms/my-my_FONIPA.yml +260 -0
  250. data/resources/shared/transforms/pl-am.yml +10 -0
  251. data/resources/shared/transforms/pl-ar.yml +10 -0
  252. data/resources/shared/transforms/pl-fa.yml +10 -0
  253. data/resources/shared/transforms/ps-ps_Latn-BGN.yml +151 -0
  254. data/resources/shared/transforms/rm_SURSILV-am.yml +10 -0
  255. data/resources/shared/transforms/rm_SURSILV-ar.yml +10 -0
  256. data/resources/shared/transforms/rm_SURSILV-fa.yml +10 -0
  257. data/resources/shared/transforms/rm_SURSILV-rm_FONIPA_SURSILV.yml +84 -0
  258. data/resources/shared/transforms/ro-am.yml +10 -0
  259. data/resources/shared/transforms/ro-ar.yml +10 -0
  260. data/resources/shared/transforms/ro-fa.yml +10 -0
  261. data/resources/shared/transforms/ro-ro_FONIPA.yml +38 -6
  262. data/resources/shared/transforms/ro_FONIPA-ja.yml +1 -0
  263. data/resources/shared/transforms/ru-ru_Latn-BGN.yml +121 -0
  264. data/resources/shared/transforms/ru_Latn-ru-BGN.yml +101 -0
  265. data/resources/shared/transforms/sat-am.yml +10 -0
  266. data/resources/shared/transforms/sat-ar.yml +10 -0
  267. data/resources/shared/transforms/sat-fa.yml +10 -0
  268. data/resources/shared/transforms/sat_Olck-sat_FONIPA.yml +132 -0
  269. data/resources/shared/transforms/si-am.yml +10 -0
  270. data/resources/shared/transforms/si-ar.yml +10 -0
  271. data/resources/shared/transforms/si-fa.yml +10 -0
  272. data/resources/shared/transforms/si-si_FONIPA.yml +128 -0
  273. data/resources/shared/transforms/si-si_Latn.yml +96 -0
  274. data/resources/shared/transforms/sk-am.yml +10 -0
  275. data/resources/shared/transforms/sk-ar.yml +10 -0
  276. data/resources/shared/transforms/sk-fa.yml +10 -0
  277. data/resources/shared/transforms/sk-sk_FONIPA.yml +18 -2
  278. data/resources/shared/transforms/sk_FONIPA-ja.yml +2 -0
  279. data/resources/shared/transforms/sr-sr_Latn-BGN.yml +81 -0
  280. data/resources/shared/transforms/tk_Cyrl-tk-BGN.yml +122 -0
  281. data/resources/shared/transforms/tlh-am.yml +10 -0
  282. data/resources/shared/transforms/tlh-ar.yml +10 -0
  283. data/resources/shared/transforms/tlh-fa.yml +10 -0
  284. data/resources/shared/transforms/tlh-tlh_FONIPA.yml +0 -8
  285. data/resources/shared/transforms/uk-uk_Latn-BGN.yml +115 -0
  286. data/resources/shared/transforms/und_FONIPA-ar.yml +96 -0
  287. data/resources/shared/transforms/und_FONIPA-fa.yml +88 -0
  288. data/resources/shared/transforms/und_FONIPA-und_FONXSAMP.yml +198 -0
  289. data/resources/shared/transforms/uz_Cyrl-uz-BGN.yml +117 -0
  290. data/resources/shared/transforms/xh-am.yml +10 -0
  291. data/resources/shared/transforms/xh-ar.yml +10 -0
  292. data/resources/shared/transforms/xh-fa.yml +10 -0
  293. data/resources/shared/transforms/xh-xh_FONIPA.yml +71 -0
  294. data/resources/shared/transforms/zu-am.yml +10 -0
  295. data/resources/shared/transforms/zu-ar.yml +10 -0
  296. data/resources/shared/transforms/zu-fa.yml +10 -0
  297. data/resources/shared/transforms/zu-zu_FONIPA.yml +58 -0
  298. data/spec/formatters/numbers/rbnf/rbnf_spec.rb +3 -1
  299. data/spec/resources/loader_spec.rb +12 -5
  300. data/spec/spec_helper.rb +1 -1
  301. metadata +242 -10
  302. data/History.txt +0 -282
  303. data/lib/twitter_cldr/resources/collation_tries_dumper.rb +0 -43
  304. data/lib/twitter_cldr/resources/custom_locales_resources_importer.rb +0 -80
  305. data/lib/twitter_cldr/resources/download.rb +0 -64
  306. data/lib/twitter_cldr/resources/icu_based_importer.rb +0 -18
  307. data/lib/twitter_cldr/resources/properties/properties_importer.rb +0 -59
  308. data/lib/twitter_cldr/resources/unicode_importer.rb +0 -37
@@ -3,29 +3,22 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'twitter_cldr/resources/download'
7
6
  require 'fileutils'
8
7
 
9
8
  module TwitterCldr
10
9
  module Resources
11
- class SegmentTestsImporter < UnicodeImporter
10
+ class SegmentTestsImporter < Importer
12
11
 
13
- URL_ROOT = "ucd/auxiliary"
14
12
  TEST_FILES = [
15
- 'WordBreakTest.txt', 'SentenceBreakTest.txt'
13
+ 'ucd/auxiliary/WordBreakTest.txt',
14
+ 'ucd/auxiliary/SentenceBreakTest.txt'
16
15
  ]
17
16
 
18
- attr_reader :input_path, :output_path
19
-
20
- def initialize(input_path, output_path)
21
- @input_path = input_path
22
- @output_path = output_path
23
- end
24
-
25
- def import
26
- FileUtils.mkdir_p(input_path)
27
- FileUtils.mkdir_p(output_path)
17
+ requirement :unicode, Versions.unicode_version, TEST_FILES
18
+ output_path 'shared/segments/tests'
19
+ ruby_engine :mri
28
20
 
21
+ def execute
29
22
  TEST_FILES.each do |test_file|
30
23
  import_test_file(test_file)
31
24
  end
@@ -34,27 +27,19 @@ module TwitterCldr
34
27
  private
35
28
 
36
29
  def import_test_file(test_file)
37
- url = "#{URL_ROOT}/#{test_file}"
38
- input_file = input_file_for(test_file)
39
- output_file = output_path_for(test_file)
40
- download(input_file, url)
41
- result = parse_standard_file(input_file).map(&:first)
42
- File.write(output_file, YAML.dump(result))
30
+ source_file = source_path_for(test_file)
31
+ FileUtils.mkdir_p(File.dirname(source_file))
32
+ result = UnicodeFileParser.parse_standard_file(source_file).map(&:first)
33
+ File.write(output_path_for(test_file), YAML.dump(result))
43
34
  end
44
35
 
45
- def input_file_for(test_file)
46
- File.join(input_path, test_file)
36
+ def source_path_for(test_file)
37
+ requirements[:unicode].source_path_for(test_file)
47
38
  end
48
39
 
49
40
  def output_path_for(test_file)
50
- base = underscore(test_file.chomp(File.extname(test_file)))
51
- File.join(output_path, "#{base}.yml")
52
- end
53
-
54
- def download(input_file, url)
55
- TwitterCldr::Resources.download_unicode_data_if_necessary(
56
- input_file, url
57
- )
41
+ file = underscore(File.basename(test_file).chomp(File.extname(test_file)))
42
+ File.join(params.fetch(:output_path), "#{file}.yml")
58
43
  end
59
44
 
60
45
  def underscore(str)
@@ -4,16 +4,19 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  require 'nokogiri'
7
- require 'java'
8
-
9
- require 'twitter_cldr/resources/download'
10
7
 
11
8
  module TwitterCldr
12
9
  module Resources
13
10
  # This class should be used with JRuby 1.7 in 1.9 mode, ICU4J version >= 49.1,
14
11
  # and CLDR version <= 23 (v24 syntax is not supported yet).
15
12
  #
16
- class TailoringImporter < IcuBasedImporter
13
+ class TailoringImporter < Importer
14
+
15
+ requirement :icu, '51.2'
16
+ requirement :cldr, '23.1'
17
+ output_path 'collation/tailoring'
18
+ locales TwitterCldr.supported_locales
19
+ ruby_engine :jruby
17
20
 
18
21
  SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
19
22
  SIMPLE_RULES = %w[p s t i]
@@ -40,26 +43,12 @@ module TwitterCldr
40
43
 
41
44
  class ImportError < RuntimeError; end
42
45
 
43
- # Arguments:
44
- #
45
- # input_path - path to a directory containing CLDR data
46
- # output_path - output directory for imported YAML files
47
- # icu4j_path - path to ICU4J jar file
48
- #
49
- def initialize(input_path, output_path, icu4j_path)
50
- require_icu4j(icu4j_path)
51
-
52
- @input_path = input_path
53
- @output_path = output_path
54
- end
46
+ private
55
47
 
56
- def import(locales)
57
- TwitterCldr::Resources.download_cldr_if_necessary(@input_path)
58
- locales.each { |locale| import_locale(locale) }
48
+ def execute
49
+ params[:locales].each { |locale| import_locale(locale) }
59
50
  end
60
51
 
61
- private
62
-
63
52
  def import_locale(locale)
64
53
  print "Importing %8s\t--\t" % locale
65
54
 
@@ -87,11 +76,13 @@ module TwitterCldr
87
76
  end
88
77
 
89
78
  def locale_file_path(locale)
90
- File.join(@input_path, 'common', 'collation', "#{translated_locale(locale)}.xml")
79
+ File.join(
80
+ requirements[:cldr].common_path, 'collation', "#{translated_locale(locale)}.xml"
81
+ )
91
82
  end
92
83
 
93
84
  def resource_file_path(locale)
94
- File.join(@output_path, "#{locale}.yml")
85
+ File.join(params[:output_path], "#{locale}.yml")
95
86
  end
96
87
 
97
88
  def tailoring_data(locale)
@@ -136,12 +127,28 @@ module TwitterCldr
136
127
  default_type_node && default_type_node.attr('type')
137
128
  end
138
129
 
130
+ def get_class(name)
131
+ requirements[:icu].get_class(name)
132
+ end
133
+
134
+ def collator_class
135
+ @collator_class ||= get_class('com.ibm.icu.text.Collator')
136
+ end
137
+
138
+ def unicode_set_class
139
+ @unicode_set_class ||= get_class('com.ibm.icu.text.UnicodeSet')
140
+ end
141
+
142
+ def collation_element_iterator_class
143
+ @collation_element_iterator_class ||= get_class('com.ibm.icu.text.CollationElementIterator')
144
+ end
145
+
139
146
  def parse_tailorings(data, locale)
140
147
  rules = data && data.at_xpath('rules')
141
148
 
142
149
  return '' unless rules
143
150
 
144
- collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
151
+ collator = collator_class.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
145
152
 
146
153
  rules.children.map do |child|
147
154
  validate_tailoring_rule(child)
@@ -183,7 +190,7 @@ module TwitterCldr
183
190
 
184
191
  def parse_suppressed_contractions(data)
185
192
  node = data && data.at_xpath('suppress_contractions')
186
- node ? Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(node.text)).to_a.join : ''
193
+ node ? unicode_set_class.to_array(unicode_set_class.new(node.text)).to_a.join : ''
187
194
  end
188
195
 
189
196
  def parse_collator_options(data)
@@ -209,7 +216,7 @@ module TwitterCldr
209
216
  collation_elements = []
210
217
  ce = iter.next
211
218
 
212
- while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
219
+ while ce != collation_element_iterator_class::NULLORDER
213
220
  p1 = (ce >> 24) & LAST_BYTE_MASK
214
221
  p2 = (ce >> 16) & LAST_BYTE_MASK
215
222
 
@@ -3,17 +3,13 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'java'
7
6
  require 'fileutils'
8
7
 
9
8
  module TwitterCldr
10
9
  module Resources
11
10
 
12
11
  # This class should be used with JRuby in 1.9 mode
13
- class TransformTestImporter < IcuBasedImporter
14
-
15
- attr_reader :output_file, :icu4j_path
16
-
12
+ class TransformTestImporter < Importer
17
13
  # most of these were taken from wikipedia, lol
18
14
  TEXT_SAMPLES = {
19
15
  latin: ["From today's featured article"], # @TODO test capital letters,
@@ -23,7 +19,7 @@ module TwitterCldr
23
19
  gurmukhi: ["ਅੱਜ ਇਤਿਹਾਸ ਵਿੱਚ"],
24
20
  gujarati: ["આ માસનો ઉમદા લેખ"],
25
21
  bengali: ["নির্বাচিত নিবন্ধ"],
26
- hangul: ["김창옥"],
22
+ hangul: ["김창옥", '모든 사용자는 위키백과에 직접 참여해 확인 가능'],
27
23
  arabic: ["مقالة اليوم المختارة"],
28
24
  han: ["因此只有两场风暴因造成"],
29
25
  hiragana: ["くろねこさま"],
@@ -37,7 +33,6 @@ module TwitterCldr
37
33
  malayalam: ['ഇടുക്കിയിലെ സൂര്യനെല്ലി സ്വദേശിനിയായ'],
38
34
  tamil: ['சென்னையில் வாழும் உலோ.செந்தமிழ்க்கோதை'],
39
35
  interindic: ['  '],
40
- hangul: ['모든 사용자는 위키백과에 직접 참여해 확인 가능'],
41
36
  hebrew: ['על שמן של המיילדות במצרים, שפרה ופועה, נקראו'],
42
37
  simplified: ['系统源于墨西哥以西的扰动天气区,并且位于更大规模的天气系统以内'],
43
38
  traditional: ['系統源於墨西哥以西的擾動天氣區,並且位於更大規模的天氣系統以內'],
@@ -50,15 +45,12 @@ module TwitterCldr
50
45
 
51
46
  BGN_SAMPLES = [:armenian, :katakana, :korean]
52
47
 
53
- def initialize(output_file, icu4j_path)
54
- @output_file = output_file
55
- @icu4j_path = icu4j_path
56
- end
48
+ requirement :icu, Versions.icu_version
49
+ output_path File.join(TwitterCldr::SPEC_DIR, 'transforms', 'test_data.yml')
50
+ ruby_engine :jruby
57
51
 
58
- def import
59
- require_icu4j(icu4j_path)
60
-
61
- File.open(output_file, 'w+') do |f|
52
+ def execute
53
+ File.open(params.fetch(:output_path), 'w+') do |f|
62
54
  f.write(
63
55
  YAML.dump(
64
56
  generate_test_data(transformer.each_transform)
@@ -67,6 +59,8 @@ module TwitterCldr
67
59
  end
68
60
  end
69
61
 
62
+ private
63
+
70
64
  def generate_test_data(transforms)
71
65
  transforms.each_with_object([]) do |transform_id_str, ret|
72
66
  forward_id = transform_id.parse(transform_id_str)
@@ -104,8 +98,12 @@ module TwitterCldr
104
98
  TwitterCldr::Transforms::Transformer.exists?(id)
105
99
  end
106
100
 
101
+ def transliterator_class
102
+ @transliterator_class ||= requirements[:icu].get_class('com.ibm.icu.text.Transliterator')
103
+ end
104
+
107
105
  def generate_transform_samples(id, samples)
108
- trans = com.ibm.icu.text.Transliterator.getInstance(id.to_s)
106
+ trans = transliterator_class.getInstance(id.to_s)
109
107
  samples.each_with_object({}) do |sample, ret|
110
108
  ret[sample] = trans.transliterate(sample)
111
109
  end
@@ -131,7 +129,7 @@ module TwitterCldr
131
129
  def transform_id
132
130
  TwitterCldr::Transforms::TransformId
133
131
  end
134
-
135
132
  end
133
+
136
134
  end
137
135
  end
@@ -3,41 +3,42 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'twitter_cldr/resources/download'
7
6
  require 'fileutils'
7
+ require 'open-uri'
8
8
  require 'json'
9
9
 
10
10
  module TwitterCldr
11
11
  module Resources
12
12
  module Uli
13
- class SegmentExceptionsImporter
13
+ class SegmentExceptionsImporter < Resources::Importer
14
14
 
15
15
  URL = "http://unicode.org/uli/trac/export/58/trunk/abbrs/json/%{locale}.json"
16
+ LOCALES = [:de, :en, :es, :fr, :it, :pt, :ru] # these are the only locales ULI supports at the moment
16
17
 
17
- attr_reader :input_path, :output_path
18
+ output_path 'uli/segments'
19
+ ruby_engine :mri
18
20
 
19
- def initialize(input_path, output_path)
20
- @input_path = input_path
21
- @output_path = output_path
22
- end
23
-
24
- def import(locales)
21
+ def execute
25
22
  FileUtils.mkdir_p(input_path)
26
23
  FileUtils.mkdir_p(output_path)
27
- locales.each { |locale| import_locale(locale) }
24
+ LOCALES.each { |locale| import_locale(locale) }
28
25
  end
29
26
 
30
27
  private
31
28
 
29
+ def output_path
30
+ params.fetch(:output_path)
31
+ end
32
+
32
33
  def import_locale(locale)
33
34
  if input_file = download_resource_for(locale)
34
35
  output_file = File.join(output_path, "#{locale}.yml")
35
36
  exceptions = JSON.parse(File.read(input_file))
36
37
 
37
- File.open(output_file, "w+") do |f|
38
+ File.open(output_file, 'w+') do |f|
38
39
  YAML.dump({
39
40
  locale => {
40
- exceptions: exceptions["data"]["abbrs"]
41
+ exceptions: exceptions['data']['abbrs']
41
42
  }
42
43
  }, f)
43
44
  end
@@ -45,14 +46,25 @@ module TwitterCldr
45
46
  end
46
47
 
47
48
  def download_resource_for(locale)
48
- input_file = input_path_for(locale)
49
- TwitterCldr::Resources.download_if_necessary(
50
- input_file, URL.gsub("%{locale}", locale.to_s)
51
- )
49
+ input_file = input_file_for(locale)
50
+ url = URL % { locale: locale }
51
+
52
+ unless File.file?(input_file)
53
+ STDOUT.write("Downloading #{url}... ")
54
+ open(input_file, 'wb') { |file| file << open(url).read }
55
+ puts 'done'
56
+ end
57
+
52
58
  input_file
53
59
  end
54
60
 
55
- def input_path_for(locale)
61
+ def input_path
62
+ @input_path ||= File.join(
63
+ TwitterCldr::VENDOR_DIR, 'uli', 'segments'
64
+ )
65
+ end
66
+
67
+ def input_file_for(locale)
56
68
  File.join(input_path, "#{locale}.json")
57
69
  end
58
70
 
@@ -3,52 +3,53 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'twitter_cldr/resources/download'
7
-
8
6
  module TwitterCldr
9
7
  module Resources
10
8
 
11
- class UnicodeDataImporter < UnicodeImporter
9
+ class UnicodeDataImporter < Importer
12
10
 
13
- BLOCKS_URL = 'ucd/Blocks.txt'
14
- UNICODE_DATA_URL = 'ucd/UnicodeData.txt'
15
- CASEFOLDING_DATA_URL = 'ucd/CaseFolding.txt'
11
+ BLOCKS_FILE = 'ucd/Blocks.txt'
12
+ UNICODE_DATA_FILE = 'ucd/UnicodeData.txt'
13
+ CASEFOLDING_DATA_FILE = 'ucd/CaseFolding.txt'
16
14
 
17
- # Arguments:
18
- #
19
- # input_path - path to a directory containing Blocks.txt and UnicodeData.txt
20
- # output_path - output directory for imported YAML files
21
- #
22
- def initialize(input_path, output_path)
23
- @input_path = input_path
24
- @output_path = output_path
25
- end
15
+ requirement :unicode, Versions.unicode_version, [BLOCKS_FILE, UNICODE_DATA_FILE, CASEFOLDING_DATA_FILE]
16
+ output_path 'unicode_data'
17
+ ruby_engine :mri
26
18
 
27
- def import
19
+ def execute
28
20
  blocks = import_blocks
29
21
  unicode_data = import_unicode_data(blocks)
30
22
  casefolding_data = import_casefolding_data
31
23
 
32
- File.open(File.join(@output_path, 'blocks.yml'), 'w') do |output|
24
+ STDOUT.write('Writing data to disk... ')
25
+
26
+ File.open(File.join(output_path, 'blocks.yml'), 'w') do |output|
33
27
  YAML.dump(blocks, output)
34
28
  end
35
29
 
36
- FileUtils.mkdir_p(File.join(@output_path, 'blocks'))
30
+ FileUtils.mkdir_p(File.join(output_path, 'blocks'))
37
31
 
38
32
  unicode_data.each do |block_name, code_points|
39
- File.open(File.join(@output_path, 'blocks', "#{block_name}.yml"), 'w') do |output|
33
+ File.open(File.join(output_path, 'blocks', "#{block_name}.yml"), 'w') do |output|
40
34
  YAML.dump(code_points, output)
41
35
  end
42
36
  end
43
37
 
44
- File.open(File.join(@output_path, 'casefolding.yml'), 'w') do |output|
38
+ File.open(File.join(output_path, 'casefolding.yml'), 'w') do |output|
45
39
  YAML.dump(casefolding_data, output)
46
40
  end
41
+
42
+ puts 'done'
47
43
  end
48
44
 
49
45
  private
50
46
 
47
+ def output_path
48
+ params.fetch(:output_path)
49
+ end
50
+
51
51
  def import_blocks
52
+ STDOUT.write('Importing blocks... ')
52
53
  blocks = {}
53
54
 
54
55
  File.open(blocks_file) do |input|
@@ -62,48 +63,54 @@ module TwitterCldr
62
63
  end
63
64
  end
64
65
 
66
+ puts 'done'
65
67
  blocks
66
68
  end
67
69
 
70
+ def parse_file(file, &block)
71
+ UnicodeFileParser.parse_standard_file(file, &block)
72
+ end
73
+
68
74
  def import_unicode_data(blocks)
75
+ STDOUT.write('Importing Unicode data... ')
69
76
  unicode_data = Hash.new do |hash, key|
70
77
  hash[key] = Hash.new { |h, k| h[k] = {} }
71
78
  end
72
79
 
73
- parse_standard_file(unicode_data_file) do |data|
80
+ parse_file(unicode_data_file) do |data|
74
81
  data[0] = data[0].hex
75
82
  unicode_data[find_block(blocks, data[0]).first][data[0]] = data
76
83
  end
77
84
 
85
+ puts 'done'
78
86
  unicode_data
79
87
  end
80
88
 
81
89
  def import_casefolding_data
82
- parse_standard_file(casefold_data_file).map do |data|
90
+ STDOUT.write('Importing casefolding data... ')
91
+
92
+ casefolding_data = parse_file(casefold_data_file).map do |data|
83
93
  {
84
94
  source: data[0].hex,
85
95
  target: data[2].split(" ").map(&:hex),
86
96
  status: data[1]
87
97
  }
88
98
  end
99
+
100
+ puts 'done'
101
+ casefolding_data
89
102
  end
90
103
 
91
104
  def casefold_data_file
92
- TwitterCldr::Resources.download_unicode_data_if_necessary(
93
- File.join(@input_path, 'CaseFolding.txt'), CASEFOLDING_DATA_URL
94
- )
105
+ requirements[:unicode].source_path_for(CASEFOLDING_DATA_FILE)
95
106
  end
96
107
 
97
108
  def unicode_data_file
98
- TwitterCldr::Resources.download_unicode_data_if_necessary(
99
- File.join(@input_path, 'UnicodeData.txt'), UNICODE_DATA_URL
100
- )
109
+ requirements[:unicode].source_path_for(UNICODE_DATA_FILE)
101
110
  end
102
111
 
103
112
  def blocks_file
104
- TwitterCldr::Resources.download_unicode_data_if_necessary(
105
- File.join(@input_path, 'Blocks.txt'), BLOCKS_URL
106
- )
113
+ requirements[:unicode].source_path_for(BLOCKS_FILE)
107
114
  end
108
115
 
109
116
  def find_block(blocks, code_point)