twitter_cldr 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (303) hide show
  1. data/NOTICE +95 -1
  2. data/README.md +4 -4
  3. data/Rakefile +18 -28
  4. data/lib/ext/calendars/date.rb +3 -0
  5. data/lib/ext/calendars/datetime.rb +3 -0
  6. data/lib/ext/calendars/time.rb +3 -0
  7. data/lib/ext/localized_object.rb +3 -0
  8. data/lib/ext/numbers/bignum.rb +3 -0
  9. data/lib/ext/numbers/fixnum.rb +3 -0
  10. data/lib/ext/numbers/float.rb +3 -0
  11. data/lib/ext/numbers/localized_number.rb +3 -0
  12. data/lib/ext/strings/string.rb +31 -0
  13. data/lib/ext/strings/symbol.rb +3 -0
  14. data/lib/formatters/base.rb +3 -0
  15. data/lib/formatters/calendars/date_formatter.rb +3 -0
  16. data/lib/formatters/calendars/datetime_formatter.rb +3 -0
  17. data/lib/formatters/calendars/time_formatter.rb +3 -0
  18. data/lib/formatters/numbers/currency_formatter.rb +3 -0
  19. data/lib/formatters/numbers/decimal_formatter.rb +3 -0
  20. data/lib/formatters/numbers/helpers/base.rb +3 -0
  21. data/lib/formatters/numbers/helpers/fraction.rb +3 -0
  22. data/lib/formatters/numbers/helpers/integer.rb +3 -0
  23. data/lib/formatters/numbers/number_formatter.rb +3 -0
  24. data/lib/formatters/numbers/percent_formatter.rb +3 -0
  25. data/lib/formatters/plurals/plural_formatter.rb +141 -0
  26. data/lib/formatters/plurals/rules.rb +4 -1
  27. data/lib/normalizers/base.rb +17 -0
  28. data/lib/normalizers/canonical/nfd.rb +81 -0
  29. data/lib/shared/currencies.rb +4 -1
  30. data/lib/shared/languages.rb +4 -1
  31. data/lib/shared/resources.rb +8 -28
  32. data/lib/shared/timezones.rb +3 -0
  33. data/lib/shared/unicode_data.rb +44 -0
  34. data/lib/tokenizers/base.rb +3 -0
  35. data/lib/tokenizers/calendars/date_tokenizer.rb +3 -0
  36. data/lib/tokenizers/calendars/datetime_tokenizer.rb +4 -1
  37. data/lib/tokenizers/calendars/time_tokenizer.rb +3 -0
  38. data/lib/tokenizers/key_path.rb +3 -0
  39. data/lib/tokenizers/numbers/number_tokenizer.rb +4 -1
  40. data/lib/tokenizers/token.rb +3 -0
  41. data/lib/twitter_cldr.rb +52 -29
  42. data/lib/utils/interpolation.rb +105 -0
  43. data/lib/utils.rb +28 -0
  44. data/lib/version.rb +6 -1
  45. data/resources/unicode_data/aegean_numbers.yml +913 -0
  46. data/resources/unicode_data/alchemical_symbols.yml +1857 -0
  47. data/resources/unicode_data/alphabetic_presentation_forms.yml +929 -0
  48. data/resources/unicode_data/ancient_greek_musical_notation.yml +1121 -0
  49. data/resources/unicode_data/ancient_greek_numbers.yml +1201 -0
  50. data/resources/unicode_data/ancient_symbols.yml +193 -0
  51. data/resources/unicode_data/arabic.yml +4049 -0
  52. data/resources/unicode_data/arabic_extended_a.yml +625 -0
  53. data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +2289 -0
  54. data/resources/unicode_data/arabic_presentation_forms_a.yml +9777 -0
  55. data/resources/unicode_data/arabic_presentation_forms_b.yml +2257 -0
  56. data/resources/unicode_data/arabic_supplement.yml +769 -0
  57. data/resources/unicode_data/armenian.yml +1393 -0
  58. data/resources/unicode_data/arrows.yml +1793 -0
  59. data/resources/unicode_data/avestan.yml +977 -0
  60. data/resources/unicode_data/balinese.yml +1937 -0
  61. data/resources/unicode_data/bamum.yml +1409 -0
  62. data/resources/unicode_data/bamum_supplement.yml +9105 -0
  63. data/resources/unicode_data/basic_latin.yml +2049 -0
  64. data/resources/unicode_data/batak.yml +897 -0
  65. data/resources/unicode_data/bengali.yml +1473 -0
  66. data/resources/unicode_data/block_elements.yml +513 -0
  67. data/resources/unicode_data/blocks.yml +881 -0
  68. data/resources/unicode_data/bopomofo.yml +657 -0
  69. data/resources/unicode_data/bopomofo_extended.yml +433 -0
  70. data/resources/unicode_data/box_drawing.yml +2049 -0
  71. data/resources/unicode_data/brahmi.yml +1729 -0
  72. data/resources/unicode_data/braille_patterns.yml +4097 -0
  73. data/resources/unicode_data/buginese.yml +481 -0
  74. data/resources/unicode_data/buhid.yml +321 -0
  75. data/resources/unicode_data/byzantine_musical_symbols.yml +3937 -0
  76. data/resources/unicode_data/carian.yml +785 -0
  77. data/resources/unicode_data/chakma.yml +1073 -0
  78. data/resources/unicode_data/cham.yml +1329 -0
  79. data/resources/unicode_data/cherokee.yml +1361 -0
  80. data/resources/unicode_data/cjk_compatibility.yml +4097 -0
  81. data/resources/unicode_data/cjk_compatibility_forms.yml +513 -0
  82. data/resources/unicode_data/cjk_compatibility_ideographs.yml +7553 -0
  83. data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +8673 -0
  84. data/resources/unicode_data/cjk_radicals_supplement.yml +1841 -0
  85. data/resources/unicode_data/cjk_strokes.yml +577 -0
  86. data/resources/unicode_data/cjk_symbols_and_punctuation.yml +1025 -0
  87. data/resources/unicode_data/cjk_unified_ideographs.yml +33 -0
  88. data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +33 -0
  89. data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +33 -0
  90. data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +33 -0
  91. data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +33 -0
  92. data/resources/unicode_data/combining_diacritical_marks.yml +1793 -0
  93. data/resources/unicode_data/combining_diacritical_marks_for_symbols.yml +529 -0
  94. data/resources/unicode_data/combining_diacritical_marks_supplement.yml +689 -0
  95. data/resources/unicode_data/combining_half_marks.yml +113 -0
  96. data/resources/unicode_data/common_indic_number_forms.yml +161 -0
  97. data/resources/unicode_data/control_pictures.yml +625 -0
  98. data/resources/unicode_data/coptic.yml +1969 -0
  99. data/resources/unicode_data/counting_rod_numerals.yml +289 -0
  100. data/resources/unicode_data/cuneiform.yml +14065 -0
  101. data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +1649 -0
  102. data/resources/unicode_data/currency_symbols.yml +417 -0
  103. data/resources/unicode_data/cypriot_syllabary.yml +881 -0
  104. data/resources/unicode_data/cyrillic.yml +4097 -0
  105. data/resources/unicode_data/cyrillic_extended_a.yml +513 -0
  106. data/resources/unicode_data/cyrillic_extended_b.yml +1425 -0
  107. data/resources/unicode_data/cyrillic_supplement.yml +641 -0
  108. data/resources/unicode_data/deseret.yml +1281 -0
  109. data/resources/unicode_data/devanagari.yml +2033 -0
  110. data/resources/unicode_data/devanagari_extended.yml +449 -0
  111. data/resources/unicode_data/dingbats.yml +3057 -0
  112. data/resources/unicode_data/domino_tiles.yml +1601 -0
  113. data/resources/unicode_data/egyptian_hieroglyphs.yml +17137 -0
  114. data/resources/unicode_data/emoticons.yml +1217 -0
  115. data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +2737 -0
  116. data/resources/unicode_data/enclosed_alphanumerics.yml +2561 -0
  117. data/resources/unicode_data/enclosed_cjk_letters_and_months.yml +4065 -0
  118. data/resources/unicode_data/enclosed_ideographic_supplement.yml +913 -0
  119. data/resources/unicode_data/ethiopic.yml +5729 -0
  120. data/resources/unicode_data/ethiopic_extended.yml +1265 -0
  121. data/resources/unicode_data/ethiopic_extended_a.yml +513 -0
  122. data/resources/unicode_data/ethiopic_supplement.yml +417 -0
  123. data/resources/unicode_data/general_punctuation.yml +1713 -0
  124. data/resources/unicode_data/geometric_shapes.yml +1537 -0
  125. data/resources/unicode_data/georgian.yml +1409 -0
  126. data/resources/unicode_data/georgian_supplement.yml +641 -0
  127. data/resources/unicode_data/glagolitic.yml +1505 -0
  128. data/resources/unicode_data/gothic.yml +433 -0
  129. data/resources/unicode_data/greek_and_coptic.yml +2145 -0
  130. data/resources/unicode_data/greek_extended.yml +3729 -0
  131. data/resources/unicode_data/gujarati.yml +1345 -0
  132. data/resources/unicode_data/gurmukhi.yml +1265 -0
  133. data/resources/unicode_data/halfwidth_and_fullwidth_forms.yml +3601 -0
  134. data/resources/unicode_data/hangul_compatibility_jamo.yml +1505 -0
  135. data/resources/unicode_data/hangul_jamo.yml +4097 -0
  136. data/resources/unicode_data/hangul_jamo_extended_a.yml +465 -0
  137. data/resources/unicode_data/hangul_jamo_extended_b.yml +1153 -0
  138. data/resources/unicode_data/hangul_syllables.yml +33 -0
  139. data/resources/unicode_data/hanunoo.yml +369 -0
  140. data/resources/unicode_data/hebrew.yml +1393 -0
  141. data/resources/unicode_data/high_private_use_surrogates.yml +33 -0
  142. data/resources/unicode_data/high_surrogates.yml +33 -0
  143. data/resources/unicode_data/hiragana.yml +1489 -0
  144. data/resources/unicode_data/ideographic_description_characters.yml +193 -0
  145. data/resources/unicode_data/imperial_aramaic.yml +497 -0
  146. data/resources/unicode_data/inscriptional_pahlavi.yml +433 -0
  147. data/resources/unicode_data/inscriptional_parthian.yml +481 -0
  148. data/resources/unicode_data/ipa_extensions.yml +1537 -0
  149. data/resources/unicode_data/javanese.yml +1457 -0
  150. data/resources/unicode_data/kaithi.yml +1057 -0
  151. data/resources/unicode_data/kana_supplement.yml +33 -0
  152. data/resources/unicode_data/kanbun.yml +257 -0
  153. data/resources/unicode_data/kangxi_radicals.yml +3425 -0
  154. data/resources/unicode_data/kannada.yml +1377 -0
  155. data/resources/unicode_data/katakana.yml +1537 -0
  156. data/resources/unicode_data/katakana_phonetic_extensions.yml +257 -0
  157. data/resources/unicode_data/kayah_li.yml +769 -0
  158. data/resources/unicode_data/kharoshthi.yml +1041 -0
  159. data/resources/unicode_data/khmer.yml +1825 -0
  160. data/resources/unicode_data/khmer_symbols.yml +513 -0
  161. data/resources/unicode_data/lao.yml +1073 -0
  162. data/resources/unicode_data/latin_1_supplement.yml +2049 -0
  163. data/resources/unicode_data/latin_extended_a.yml +2049 -0
  164. data/resources/unicode_data/latin_extended_additional.yml +4097 -0
  165. data/resources/unicode_data/latin_extended_b.yml +3329 -0
  166. data/resources/unicode_data/latin_extended_c.yml +513 -0
  167. data/resources/unicode_data/latin_extended_d.yml +2145 -0
  168. data/resources/unicode_data/lepcha.yml +1185 -0
  169. data/resources/unicode_data/letterlike_symbols.yml +1281 -0
  170. data/resources/unicode_data/limbu.yml +1057 -0
  171. data/resources/unicode_data/linear_b_ideograms.yml +1969 -0
  172. data/resources/unicode_data/linear_b_syllabary.yml +1409 -0
  173. data/resources/unicode_data/lisu.yml +769 -0
  174. data/resources/unicode_data/low_surrogates.yml +33 -0
  175. data/resources/unicode_data/lycian.yml +465 -0
  176. data/resources/unicode_data/lydian.yml +433 -0
  177. data/resources/unicode_data/mahjong_tiles.yml +705 -0
  178. data/resources/unicode_data/malayalam.yml +1569 -0
  179. data/resources/unicode_data/mandaic.yml +465 -0
  180. data/resources/unicode_data/mathematical_alphanumeric_symbols.yml +15937 -0
  181. data/resources/unicode_data/mathematical_operators.yml +4097 -0
  182. data/resources/unicode_data/meetei_mayek.yml +897 -0
  183. data/resources/unicode_data/meetei_mayek_extensions.yml +369 -0
  184. data/resources/unicode_data/meroitic_cursive.yml +417 -0
  185. data/resources/unicode_data/meroitic_hieroglyphs.yml +513 -0
  186. data/resources/unicode_data/miao.yml +2129 -0
  187. data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +769 -0
  188. data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +2049 -0
  189. data/resources/unicode_data/miscellaneous_symbols.yml +4097 -0
  190. data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +1393 -0
  191. data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +8529 -0
  192. data/resources/unicode_data/miscellaneous_technical.yml +3905 -0
  193. data/resources/unicode_data/modifier_tone_letters.yml +513 -0
  194. data/resources/unicode_data/mongolian.yml +2497 -0
  195. data/resources/unicode_data/musical_symbols.yml +3521 -0
  196. data/resources/unicode_data/myanmar.yml +2561 -0
  197. data/resources/unicode_data/myanmar_extended_a.yml +449 -0
  198. data/resources/unicode_data/new_tai_lue.yml +1329 -0
  199. data/resources/unicode_data/nko.yml +945 -0
  200. data/resources/unicode_data/number_forms.yml +929 -0
  201. data/resources/unicode_data/ogham.yml +465 -0
  202. data/resources/unicode_data/ol_chiki.yml +769 -0
  203. data/resources/unicode_data/old_italic.yml +561 -0
  204. data/resources/unicode_data/old_persian.yml +801 -0
  205. data/resources/unicode_data/old_south_arabian.yml +513 -0
  206. data/resources/unicode_data/old_turkic.yml +1169 -0
  207. data/resources/unicode_data/optical_character_recognition.yml +177 -0
  208. data/resources/unicode_data/oriya.yml +1441 -0
  209. data/resources/unicode_data/osmanya.yml +641 -0
  210. data/resources/unicode_data/phags_pa.yml +897 -0
  211. data/resources/unicode_data/phaistos_disc.yml +737 -0
  212. data/resources/unicode_data/phoenician.yml +465 -0
  213. data/resources/unicode_data/phonetic_extensions.yml +2049 -0
  214. data/resources/unicode_data/phonetic_extensions_supplement.yml +1025 -0
  215. data/resources/unicode_data/playing_cards.yml +945 -0
  216. data/resources/unicode_data/private_use_area.yml +33 -0
  217. data/resources/unicode_data/rejang.yml +593 -0
  218. data/resources/unicode_data/rumi_numeral_symbols.yml +497 -0
  219. data/resources/unicode_data/runic.yml +1297 -0
  220. data/resources/unicode_data/samaritan.yml +977 -0
  221. data/resources/unicode_data/saurashtra.yml +1297 -0
  222. data/resources/unicode_data/sharada.yml +1329 -0
  223. data/resources/unicode_data/shavian.yml +769 -0
  224. data/resources/unicode_data/sinhala.yml +1281 -0
  225. data/resources/unicode_data/small_form_variants.yml +417 -0
  226. data/resources/unicode_data/sora_sompeng.yml +561 -0
  227. data/resources/unicode_data/spacing_modifier_letters.yml +1281 -0
  228. data/resources/unicode_data/specials.yml +81 -0
  229. data/resources/unicode_data/sundanese.yml +1025 -0
  230. data/resources/unicode_data/sundanese_supplement.yml +129 -0
  231. data/resources/unicode_data/superscripts_and_subscripts.yml +673 -0
  232. data/resources/unicode_data/supplemental_arrows_a.yml +257 -0
  233. data/resources/unicode_data/supplemental_arrows_b.yml +2049 -0
  234. data/resources/unicode_data/supplemental_mathematical_operators.yml +4097 -0
  235. data/resources/unicode_data/supplemental_punctuation.yml +961 -0
  236. data/resources/unicode_data/supplementary_private_use_area_a.yml +33 -0
  237. data/resources/unicode_data/supplementary_private_use_area_b.yml +33 -0
  238. data/resources/unicode_data/syloti_nagri.yml +705 -0
  239. data/resources/unicode_data/syriac.yml +1233 -0
  240. data/resources/unicode_data/tagalog.yml +321 -0
  241. data/resources/unicode_data/tagbanwa.yml +289 -0
  242. data/resources/unicode_data/tags.yml +1553 -0
  243. data/resources/unicode_data/tai_le.yml +561 -0
  244. data/resources/unicode_data/tai_tham.yml +2033 -0
  245. data/resources/unicode_data/tai_viet.yml +1153 -0
  246. data/resources/unicode_data/tai_xuan_jing_symbols.yml +1393 -0
  247. data/resources/unicode_data/takri.yml +1057 -0
  248. data/resources/unicode_data/tamil.yml +1153 -0
  249. data/resources/unicode_data/telugu.yml +1489 -0
  250. data/resources/unicode_data/thaana.yml +801 -0
  251. data/resources/unicode_data/thai.yml +1393 -0
  252. data/resources/unicode_data/tibetan.yml +3377 -0
  253. data/resources/unicode_data/tifinagh.yml +945 -0
  254. data/resources/unicode_data/transport_and_map_symbols.yml +1121 -0
  255. data/resources/unicode_data/ugaritic.yml +497 -0
  256. data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +10241 -0
  257. data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
  258. data/resources/unicode_data/vai.yml +4801 -0
  259. data/resources/unicode_data/variation_selectors.yml +257 -0
  260. data/resources/unicode_data/variation_selectors_supplement.yml +3841 -0
  261. data/resources/unicode_data/vedic_extensions.yml +625 -0
  262. data/resources/unicode_data/vertical_forms.yml +161 -0
  263. data/resources/unicode_data/yi_radicals.yml +881 -0
  264. data/resources/unicode_data/yi_syllables.yml +18641 -0
  265. data/resources/unicode_data/yijing_hexagram_symbols.yml +1025 -0
  266. data/spec/ext/calendars/date_spec.rb +5 -1
  267. data/spec/ext/calendars/datetime_spec.rb +5 -1
  268. data/spec/ext/calendars/time_spec.rb +5 -1
  269. data/spec/ext/numbers/bignum_spec.rb +5 -1
  270. data/spec/ext/numbers/fixnum_spec.rb +5 -1
  271. data/spec/ext/numbers/float_spec.rb +5 -1
  272. data/spec/ext/numbers/localized_number_spec.rb +5 -1
  273. data/spec/ext/strings/string_spec.rb +102 -0
  274. data/spec/ext/strings/symbol_spec.rb +5 -1
  275. data/spec/formatters/base_spec.rb +5 -1
  276. data/spec/formatters/calendars/datetime_formatter_spec.rb +5 -1
  277. data/spec/formatters/numbers/currency_formatter_spec.rb +5 -1
  278. data/spec/formatters/numbers/decimal_formatter_spec.rb +5 -1
  279. data/spec/formatters/numbers/helpers/fraction_spec.rb +5 -1
  280. data/spec/formatters/numbers/helpers/integer_spec.rb +5 -1
  281. data/spec/formatters/numbers/number_formatter_spec.rb +6 -2
  282. data/spec/formatters/numbers/percent_formatter_spec.rb +5 -1
  283. data/spec/formatters/plurals/plural_formatter_spec.rb +205 -0
  284. data/spec/formatters/plurals/rules_spec.rb +28 -28
  285. data/spec/normalizers/NormalizationTest.txt +602 -0
  286. data/spec/normalizers/base_spec.rb +16 -0
  287. data/spec/normalizers/canonical/nfd_spec.rb +50 -0
  288. data/spec/shared/currencies_spec.rb +5 -1
  289. data/spec/shared/languages_spec.rb +5 -1
  290. data/spec/shared/resources_spec.rb +5 -18
  291. data/spec/shared/unicode_data_spec.rb +51 -0
  292. data/spec/spec_helper.rb +6 -3
  293. data/spec/tokenizers/base_spec.rb +3 -0
  294. data/spec/tokenizers/calendars/date_tokenizer_spec.rb +5 -1
  295. data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +5 -1
  296. data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -1
  297. data/spec/tokenizers/key_path_spec.rb +3 -0
  298. data/spec/tokenizers/numbers/number_tokenizer_spec.rb +5 -1
  299. data/spec/tokenizers/token_spec.rb +5 -1
  300. data/spec/twitter_cldr_spec.rb +23 -1
  301. data/spec/utils/interpolation_spec.rb +124 -0
  302. data/spec/utils_spec.rb +32 -0
  303. metadata +285 -21
@@ -0,0 +1,16 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe Base do
6
+ describe "#code_point_to_char" do
7
+ it "converts unicode code points to the actual character" do
8
+ TwitterCldr::Normalizers::Base.code_point_to_char("221E").should == "∞"
9
+ end
10
+ end
11
+ describe "#char_to_code_point" do
12
+ it "converts a character to a unicode code point" do
13
+ TwitterCldr::Normalizers::Base.char_to_code_point("∞").should == "221E"
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ include TwitterCldr::Normalizers
6
+
7
+ describe NFD do
8
+ describe "#normalize" do
9
+ NFD.normalize("庠摪饢鼢豦樄澸脧鱵礩翜艰").should == "庠摪饢鼢豦樄澸脧鱵礩翜艰"
10
+ NFD.normalize("䷙䷿").should == "䷙䷿"
11
+ NFD.normalize("ᎿᎲᎪᏨᎨᏪᎧᎵᏥ").should == "ᎿᎲᎪᏨᎨᏪᎧᎵᏥ"
12
+ NFD.normalize("ᆙᅓᆼᄋᇶ").should == "ᆙᅓᆼᄋᇶ"
13
+ NFD.normalize("…‾⁋
⁒‒′‾⁖").should == "…‾⁋
⁒‒′‾⁖"
14
+ NFD.normalize("ⶾⷕⶱⷀ").should == "ⶾⷕⶱⷀ"
15
+ end
16
+
17
+ describe "#decompose" do
18
+ it "does not decompose a character with no decomposition mapping" do
19
+ code_points = ["0EB8", "041F", "0066", "1F52C", "A2D6"]
20
+ code_points.each do |code_point|
21
+ NFD.decompose(code_point).should == code_point
22
+ end
23
+ end
24
+
25
+ it "does not decompose a character with compatibility decomposition mapping" do
26
+ code_points = ["A770", "FB02", "FC35", "FD20", "00BC"]
27
+ code_points.each do |code_point|
28
+ NFD.decompose(code_point).should == code_point
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "#normalize_code_points" do
34
+ it "passes all the tests in NormalizersTest.txt" do
35
+ normalization_test_file = File.join(File.dirname(File.dirname(__FILE__)), "NormalizationTest.txt")
36
+ File.open(normalization_test_file, "r:UTF-8") do |file|
37
+ while line = file.gets
38
+ unless line[0,1] =~ /(@|#)/ || line.empty?
39
+ c1, c2, c3, c4, c5 = line.split(';')[0...5].map { |cps| cps.split }
40
+ NFD.normalize_code_points(c1).should == c3
41
+ NFD.normalize_code_points(c2).should == c3
42
+ NFD.normalize_code_points(c3).should == c3
43
+ NFD.normalize_code_points(c4).should == c5
44
+ NFD.normalize_code_points(c5).should == c5
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(__FILE__), %w[.. spec_helper])
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Shared
5
9
 
6
10
  TEST_COUNTRIES = ["Australia", "Thailand", "Russia", "China", "Japan", "Peru", "South Africa", "India", "South Korea", "United Kingdom"]
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(__FILE__), %w[.. spec_helper])
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Shared
5
9
 
6
10
  describe Languages do
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(__FILE__), %w[.. spec_helper])
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Shared
5
9
 
6
10
  describe Resources do
@@ -26,21 +30,4 @@ describe Resources do
26
30
  @resource.resource_for("de", "racehorse").should == { :key => "value" }
27
31
  end
28
32
  end
29
-
30
- describe "#deep_symbolize_keys" do
31
- it "should work with a regular hash" do
32
- result = @resource.send(:deep_symbolize_keys, { "twitter" => "rocks", "my" => "socks" })
33
- result.should == { :twitter => "rocks", :my => "socks"}
34
- end
35
-
36
- it "should work with nested hashes" do
37
- result = @resource.send(:deep_symbolize_keys, { "twitter" => { "rocks" => "my socks" } })
38
- result.should == { :twitter => { :rocks => "my socks" } }
39
- end
40
-
41
- it "should work with nested hashes and arrays" do
42
- result = @resource.send(:deep_symbolize_keys, { "twitter" => { "rocks_my" => [{ "socks" => "and mind" }, { "hard" => "core" }] } })
43
- result.should == { :twitter => { :rocks_my => [{ :socks => "and mind" }, { :hard => "core" }] } }
44
- end
45
- end
46
33
  end
@@ -0,0 +1,51 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ include TwitterCldr::Shared
9
+
10
+ describe UnicodeData do
11
+ describe "#for_code_point" do
12
+ it "should retrieve information for any valid code point" do
13
+ data = UnicodeData.for_code_point('0301')
14
+ data.should be_a(Struct)
15
+ data.length.should == 15
16
+ end
17
+
18
+ it "should return nil for invalid code points" do
19
+ UnicodeData.for_code_point('abcd').should be_nil
20
+ UnicodeData.for_code_point('FFFFFFF').should be_nil
21
+ UnicodeData.for_code_point('uytukhil123').should be_nil
22
+ end
23
+
24
+ it "fetches valid information for the specified code point" do
25
+ test_data = {
26
+ '17D1' => ['17D1','KHMER SIGN VIRIAM','Mn','0','NSM',"","","","",'N',"","","","",""],
27
+ 'FE91' => ['FE91','ARABIC LETTER BEH INITIAL FORM','Lo','0','AL','<initial> 0628',"","","",'N','GLYPH FOR INITIAL ARABIC BAA',"","","",""],
28
+ '24B5' => ['24B5','PARENTHESIZED LATIN SMALL LETTER Z','So','0','L','<compat> 0028 007A 0029',"","","",'N',"","","","",""],
29
+ '2128' => ['2128','BLACK-LETTER CAPITAL Z','Lu','0','L','<font> 005A',"","","",'N','BLACK-LETTER Z',"","","",""],
30
+ '1F241'=> ['1F241','TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-4E09','So','0','L','<compat> 3014 4E09 3015',"","","",'N',"","","","",""]
31
+ }
32
+ test_data.each_pair do |code_point, data|
33
+ UnicodeData.for_code_point(code_point).values.should == data
34
+ end
35
+ end
36
+
37
+ it "fetches valid information for a code point within a range" do
38
+ test_data = {
39
+ '4E11' => ["4E11","<CJK Ideograph>","Lo","0","L","","","","","N","","","","",""],
40
+ 'AC55' => ["AC55","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
41
+ 'D7A1' => ["D7A1","<Hangul Syllable>","Lo","0","L","","","","","N","","","","",""],
42
+ 'DAAA' => ["DAAA","<Non Private Use High Surrogate>","Cs","0","L","","","","","N","","","","",""],
43
+ 'F8FE' => ["F8FE","<Private Use>","Co","0","L","","","","","N","","","","",""]
44
+ }
45
+
46
+ test_data.each_pair do |code_point, data|
47
+ UnicodeData.for_code_point(code_point).values.should == data
48
+ end
49
+ end
50
+ end
51
+ end
data/spec/spec_helper.rb CHANGED
@@ -1,7 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.expand_path(File.join(File.dirname(__FILE__), %w[.. lib twitter_cldr]))
4
- FIXTURE_DIR = File.expand_path(File.join(File.dirname(__FILE__), %w[fixtures]))
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'rspec'
7
+ require 'twitter_cldr'
5
8
 
6
9
  class FastGettext
7
10
  class << self
@@ -15,7 +18,7 @@ class FastGettext
15
18
  end
16
19
  end
17
20
 
18
- Spec::Runner.configure do |config|
21
+ RSpec.configure do |config|
19
22
  config.mock_with :rr
20
23
 
21
24
  config.before(:each) do
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  require File.join(File.dirname(File.dirname(__FILE__)), "spec_helper")
4
7
  include TwitterCldr::Tokenizers
5
8
 
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Tokenizers
5
9
 
6
10
  describe DateTokenizer do
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Tokenizers
5
9
 
6
10
  describe DateTimeTokenizer do
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Tokenizers
5
9
 
6
10
  describe DateTokenizer do
@@ -1,5 +1,8 @@
1
1
  # encoding: UTF-8
2
2
 
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
3
6
  require File.join(File.dirname(File.dirname(__FILE__)), "spec_helper")
4
7
  include TwitterCldr::Tokenizers
5
8
 
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(File.dirname(File.dirname(__FILE__))), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Tokenizers
5
9
 
6
10
  describe NumberTokenizer do
@@ -1,6 +1,10 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(File.dirname(__FILE__)), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
4
8
  include TwitterCldr::Tokenizers
5
9
 
6
10
  describe Token do
@@ -1,6 +1,9 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.join(File.dirname(__FILE__), "spec_helper")
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
4
7
 
5
8
  describe TwitterCldr do
6
9
  describe "#supported_locale?" do
@@ -52,4 +55,23 @@ describe TwitterCldr do
52
55
  end
53
56
  end
54
57
  end
58
+
59
+ describe '#resources' do
60
+ it 'returns @@resources' do
61
+ resources = TwitterCldr::Shared::Resources.new
62
+ TwitterCldr.send :class_variable_set, :@@resources, resources
63
+
64
+ TwitterCldr.resources.should == resources
65
+ end
66
+ end
67
+
68
+ describe '#get_resource' do
69
+ it 'delegates to @@resources' do
70
+ resources = TwitterCldr::Shared::Resources.new
71
+ mock(resources).resource_for('locale', 'resource') { 'result' }
72
+ TwitterCldr.send :class_variable_set, :@@resources, resources
73
+
74
+ TwitterCldr.get_resource('locale', 'resource').should == 'result'
75
+ end
76
+ end
55
77
  end
@@ -0,0 +1,124 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ # Some test cases were taken from i18n (https://github.com/svenfuchs/i18n/blob/89ea337f48562370988421e50caa7c2fe89452c7/test/core_ext/string/interpolate_test.rb)
7
+ # and gettext (https://github.com/mutoh/gettext/blob/11b8c1525ba9f00afb1942f7ebf34bec12f7558b/test/test_string.rb) gems.
8
+ #
9
+ # See NOTICE file for corresponding license agreements.
10
+
11
+
12
+ require 'spec_helper'
13
+
14
+ describe TwitterCldr::Utils do
15
+ describe '#interpolate' do
16
+
17
+ context 'when argument is a Hash' do
18
+ it 'does nothing if no placeholder give' do
19
+ TwitterCldr::Utils.interpolate('foo', :foo => 'bar').should == 'foo'
20
+ end
21
+
22
+ it 'interpolates named placeholders' do
23
+ TwitterCldr::Utils.interpolate('%{digit} %{sign} %{digit}', :digit => 2, :sign => '+').should == '2 + 2'
24
+ end
25
+
26
+ it 'interpolates named placeholders with formatting' do
27
+ TwitterCldr::Utils.interpolate(
28
+ '%<as_integer>d %<as_float>.2f', :as_integer => 3.14, :as_float => 15
29
+ ).should == '3 15.00'
30
+ end
31
+
32
+ it 'interpolates mixed placeholders' do
33
+ TwitterCldr::Utils.interpolate(
34
+ '%{regular} is approx. %<pi>.4f', :regular => 'pi', :pi => 3.141592
35
+ ).should == 'pi is approx. 3.1416'
36
+ end
37
+
38
+ it 'does not recurse' do
39
+ TwitterCldr::Utils.interpolate(
40
+ '%{top_level}', :top_level => '%<second_level>', :second_level => 'unexpected'
41
+ ).should == '%<second_level>'
42
+ end
43
+
44
+ it 'treats % before placeholder as escape character' do
45
+ TwitterCldr::Utils.interpolate(
46
+ '%%{foo} = %{foo}, %%<bar>d = %<bar>d', :foo => 1, :bar => 2.3
47
+ ).should == '%{foo} = 1, %<bar>d = 2'
48
+ end
49
+
50
+ it 'interpolates formatted placeholders as Ruby 1.9' do
51
+ TwitterCldr::Utils.interpolate('%<msg>s', :msg => 'foo').should == 'foo'
52
+ TwitterCldr::Utils.interpolate('%<num>d', :num => 1 ).should == '1'
53
+ TwitterCldr::Utils.interpolate('%<num>f', :num => 1.0 ).should == '1.000000'
54
+ TwitterCldr::Utils.interpolate('%<num>3.0f', :num => 1.0 ).should == ' 1'
55
+ TwitterCldr::Utils.interpolate('%<num>2.2f', :num => 100.0).should == '100.00'
56
+ TwitterCldr::Utils.interpolate('%<num>#b', :num => 1 ).should == '0b1'
57
+ TwitterCldr::Utils.interpolate('%<num>#x', :num => 100.0).should == '0x64'
58
+ end
59
+
60
+ it 'ignores extra values' do
61
+ TwitterCldr::Utils.interpolate('%{msg}', :msg => 'hello', :extra => 'extra').should == 'hello'
62
+ end
63
+
64
+ it 'raises ArgumentError if formatted placeholder is malformed' do
65
+ lambda { TwitterCldr::Utils.interpolate('%<num>,d', :num => 100) }.should raise_error(ArgumentError)
66
+ lambda { TwitterCldr::Utils.interpolate('%<num>/d', :num => 100) }.should raise_error(ArgumentError)
67
+ end
68
+
69
+ it 'raises KeyError when the value is missing' do
70
+ lambda { TwitterCldr::Utils.interpolate('%{msg}', {}) }.should raise_error(KeyError)
71
+ end
72
+ end
73
+
74
+ context 'when argument is an Array' do
75
+ it 'does nothing if no placeholder give' do
76
+ TwitterCldr::Utils.interpolate('foo', [111]).should == 'foo'
77
+ end
78
+
79
+ it 'interpolates all placeholders' do
80
+ TwitterCldr::Utils.interpolate('%d %s', [12, 'monkeys']).should == '12 monkeys'
81
+ end
82
+
83
+ it 'interpolates all placeholders with formatting' do
84
+ TwitterCldr::Utils.interpolate('%d %.3f %#b', [3.1415, 92, 6]).should == '3 92.000 0b110'
85
+ end
86
+
87
+ it 'formats positional arguments' do
88
+ TwitterCldr::Utils.interpolate('%1$*2$s %2$d %1$s', ['hello', 8]).should == ' hello 8 hello'
89
+ end
90
+
91
+ it 'treats % as escape character' do
92
+ TwitterCldr::Utils.interpolate('%s: %+.2f±%.2f%%', ['total', 3.14159, 2.6535]).should == 'total: +3.14±2.65%'
93
+ end
94
+
95
+ it 'ignores extra values' do
96
+ TwitterCldr::Utils.interpolate('%d', [2, 1]).should == '2'
97
+ end
98
+
99
+ it 'raises ArgumentError when given not enough values' do
100
+ lambda { TwitterCldr::Utils.interpolate('%d %d', [1]) }.should raise_error(ArgumentError)
101
+ end
102
+
103
+ it 'raises ArgumentError if the string contains named placeholders' do
104
+ lambda { TwitterCldr::Utils.interpolate('%{name} %d', [1, 2]) }.should raise_error(ArgumentError)
105
+ end
106
+ end
107
+
108
+ context 'when argument is a single value' do
109
+ it 'formats a string' do
110
+ TwitterCldr::Utils.interpolate('a string: %s', 'string').should == 'a string: string'
111
+ end
112
+
113
+ it 'formats a number' do
114
+ TwitterCldr::Utils.interpolate('a number: %4.1f', 3.1415).should == 'a number: 3.1'
115
+ end
116
+
117
+ it 'raises ArgumentError if the string contains named placeholders' do
118
+ lambda { TwitterCldr::Utils.interpolate('%{name}', 'must be hash') }.should raise_error(ArgumentError)
119
+ end
120
+ end
121
+
122
+ end
123
+
124
+ end
@@ -0,0 +1,32 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'spec_helper'
7
+
8
+ describe TwitterCldr::Utils do
9
+ describe '#deep_symbolize_keys' do
10
+
11
+ let(:hash) { { 'foo' => { 'bar' => { 'baz' => 'woot' }, :ar => [1, 2] }, 42 => { 'baz' => 'wat' } } }
12
+
13
+ let(:symbolized_hash) { { :foo => { :bar => { :baz => 'woot' }, :ar => [1, 2] }, 42 => { :baz => 'wat' } } }
14
+
15
+ it 'symbolizes string keys of a hash' do
16
+ TwitterCldr::Utils.deep_symbolize_keys(hash).should == symbolized_hash
17
+ end
18
+
19
+ it 'deeply symbolizes elements of an array' do
20
+ TwitterCldr::Utils.deep_symbolize_keys([1, hash, 'foo', :bar]).should == [1, symbolized_hash, 'foo', :bar]
21
+ end
22
+
23
+ it 'deeply symbolizes elements of an array nested in a hash' do
24
+ TwitterCldr::Utils.deep_symbolize_keys({ 'foo' => [1, hash] }).should == { :foo => [1, symbolized_hash] }
25
+ end
26
+
27
+ it 'leaves arguments of other types alone' do
28
+ ['foo', :bar, 42].each { |arg| TwitterCldr::Utils.deep_symbolize_keys(arg).should == arg }
29
+ end
30
+
31
+ end
32
+ end