twitter_cldr 1.6.2 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -1
- data/History.txt +8 -0
- data/README.md +64 -14
- data/Rakefile +57 -7
- data/js/lib/compiler.rb +3 -1
- data/js/lib/mustache/bundle.coffee +5 -5
- data/js/lib/mustache/numbers/numbers.coffee +179 -0
- data/js/lib/mustache/shared/currencies.coffee +27 -0
- data/js/lib/renderers/numbers/numbers_renderer.rb +31 -0
- data/js/lib/renderers/plurals/rules/plural_rules_compiler.rb +5 -1
- data/js/lib/renderers/shared/currencies_renderer.rb +23 -0
- data/js/lib/twitter_cldr_js.rb +2 -0
- data/js/spec/js/calendars/timespan_spec.js +2 -2
- data/js/spec/js/numbers/currency_spec.js +34 -0
- data/js/spec/js/numbers/decimal_spec.js +24 -0
- data/js/spec/js/numbers/helpers/fraction_spec.js +23 -0
- data/js/spec/js/numbers/helpers/integer_spec.js +100 -0
- data/js/spec/js/numbers/number_spec.js +70 -0
- data/js/spec/js/numbers/percent_spec.js +22 -0
- data/js/spec/rb/renderers/plurals/plural_rules_compiler_spec.rb +4 -0
- data/lib/twitter_cldr/collation/collator.rb +14 -20
- data/lib/twitter_cldr/collation/trie_builder.rb +3 -3
- data/lib/twitter_cldr/formatters/numbers/decimal_formatter.rb +0 -6
- data/lib/twitter_cldr/formatters/numbers/helpers/base.rb +1 -1
- data/lib/twitter_cldr/formatters/numbers/number_formatter.rb +6 -2
- data/lib/twitter_cldr/normalization/base.rb +7 -1
- data/lib/twitter_cldr/normalization/nfd.rb +2 -6
- data/lib/twitter_cldr/normalization/nfkc.rb +4 -10
- data/lib/twitter_cldr/normalization/nfkd.rb +12 -37
- data/lib/twitter_cldr/resources.rb +10 -3
- data/lib/twitter_cldr/resources/canonical_compositions_updater.rb +51 -0
- data/lib/twitter_cldr/resources/composition_exclusions_importer.rb +62 -0
- data/lib/twitter_cldr/resources/custom_locales_resources_importer.rb +80 -0
- data/lib/twitter_cldr/resources/download.rb +41 -0
- data/lib/twitter_cldr/resources/loader.rb +1 -1
- data/lib/twitter_cldr/resources/locales_resources_importer.rb +105 -0
- data/lib/twitter_cldr/resources/phone_codes_importer.rb +48 -0
- data/lib/twitter_cldr/resources/postal_codes_importer.rb +44 -0
- data/lib/twitter_cldr/resources/tailoring_importer.rb +18 -16
- data/lib/twitter_cldr/resources/unicode_data_importer.rb +90 -0
- data/lib/twitter_cldr/shared.rb +3 -1
- data/lib/twitter_cldr/shared/code_point.rb +47 -36
- data/lib/twitter_cldr/shared/currencies.rb +12 -15
- data/lib/twitter_cldr/shared/phone_codes.rb +30 -0
- data/lib/twitter_cldr/shared/postal_codes.rb +35 -0
- data/lib/twitter_cldr/tokenizers/calendars/timespan_tokenizer.rb +11 -3
- data/lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb +8 -1
- data/lib/twitter_cldr/utils/code_points.rb +2 -2
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tailoring/af.yml +3 -3
- data/resources/collation/tailoring/ar.yml +3 -3
- data/resources/collation/tailoring/ca.yml +3 -3
- data/resources/collation/tailoring/cs.yml +3 -3
- data/resources/collation/tailoring/da.yml +4 -4
- data/resources/collation/tailoring/de.yml +3 -3
- data/resources/collation/tailoring/el.yml +3 -3
- data/resources/collation/tailoring/en.yml +3 -3
- data/resources/collation/tailoring/es.yml +3 -3
- data/resources/collation/tailoring/eu.yml +3 -3
- data/resources/collation/tailoring/fa.yml +3 -3
- data/resources/collation/tailoring/fi.yml +3 -3
- data/resources/collation/tailoring/fil.yml +3 -3
- data/resources/collation/tailoring/fr.yml +3 -3
- data/resources/collation/tailoring/he.yml +3 -3
- data/resources/collation/tailoring/hi.yml +3 -3
- data/resources/collation/tailoring/hu.yml +3 -3
- data/resources/collation/tailoring/id.yml +3 -3
- data/resources/collation/tailoring/it.yml +3 -3
- data/resources/collation/tailoring/ja.yml +3 -3
- data/resources/collation/tailoring/ko.yml +3 -3
- data/resources/collation/tailoring/ms.yml +3 -3
- data/resources/collation/tailoring/nb.yml +3 -3
- data/resources/collation/tailoring/nl.yml +3 -3
- data/resources/collation/tailoring/pl.yml +3 -3
- data/resources/collation/tailoring/pt.yml +3 -3
- data/resources/collation/tailoring/ru.yml +3 -3
- data/resources/collation/tailoring/sv.yml +3 -3
- data/resources/collation/tailoring/th.yml +3 -3
- data/resources/collation/tailoring/tr.yml +3 -3
- data/resources/collation/tailoring/uk.yml +3 -3
- data/resources/collation/tailoring/ur.yml +3 -3
- data/resources/collation/tailoring/zh-Hant.yml +3 -3
- data/resources/collation/tailoring/zh.yml +3 -3
- data/resources/custom/locales/af/units.yml +19 -19
- data/resources/custom/locales/ar/units.yml +35 -35
- data/resources/custom/locales/ca/units.yml +19 -19
- data/resources/custom/locales/cs/units.yml +23 -23
- data/resources/custom/locales/da/units.yml +19 -19
- data/resources/custom/locales/de/units.yml +19 -19
- data/resources/custom/locales/el/units.yml +19 -19
- data/resources/custom/locales/en/units.yml +10 -10
- data/resources/custom/locales/es/units.yml +19 -19
- data/resources/custom/locales/eu/units.yml +19 -19
- data/resources/custom/locales/fa/units.yml +15 -15
- data/resources/custom/locales/fi/units.yml +19 -19
- data/resources/custom/locales/fil/units.yml +19 -19
- data/resources/custom/locales/fr/units.yml +19 -19
- data/resources/custom/locales/he/units.yml +19 -19
- data/resources/custom/locales/hi/units.yml +19 -19
- data/resources/custom/locales/hu/units.yml +15 -15
- data/resources/custom/locales/id/units.yml +15 -15
- data/resources/custom/locales/it/units.yml +19 -19
- data/resources/custom/locales/ja/units.yml +15 -15
- data/resources/custom/locales/ko/units.yml +15 -15
- data/resources/custom/locales/ms/units.yml +15 -15
- data/resources/custom/locales/nb/units.yml +19 -19
- data/resources/custom/locales/nl/units.yml +19 -19
- data/resources/custom/locales/pl/units.yml +27 -23
- data/resources/custom/locales/pt/units.yml +19 -19
- data/resources/custom/locales/ru/units.yml +27 -27
- data/resources/custom/locales/sv/units.yml +19 -19
- data/resources/custom/locales/th/units.yml +15 -15
- data/resources/custom/locales/tr/units.yml +15 -15
- data/resources/custom/locales/uk/units.yml +27 -27
- data/resources/custom/locales/ur/units.yml +19 -19
- data/resources/custom/locales/zh-Hant/units.yml +15 -15
- data/resources/custom/locales/zh/units.yml +15 -15
- data/resources/locales/af/calendars.yml +114 -113
- data/resources/locales/af/languages.yml +174 -173
- data/resources/locales/af/numbers.yml +43 -42
- data/resources/locales/af/plurals.yml +3 -2
- data/resources/locales/af/units.yml +136 -135
- data/resources/locales/ar/calendars.yml +121 -120
- data/resources/locales/ar/languages.yml +501 -500
- data/resources/locales/ar/numbers.yml +36 -35
- data/resources/locales/ar/plurals.yml +9 -1
- data/resources/locales/ar/units.yml +220 -219
- data/resources/locales/ca/calendars.yml +157 -156
- data/resources/locales/ca/languages.yml +511 -510
- data/resources/locales/ca/numbers.yml +44 -43
- data/resources/locales/ca/plurals.yml +3 -2
- data/resources/locales/ca/units.yml +136 -135
- data/resources/locales/cs/calendars.yml +153 -152
- data/resources/locales/cs/languages.yml +472 -471
- data/resources/locales/cs/numbers.yml +45 -44
- data/resources/locales/cs/plurals.yml +3 -2
- data/resources/locales/cs/units.yml +164 -163
- data/resources/locales/da/calendars.yml +117 -116
- data/resources/locales/da/languages.yml +515 -514
- data/resources/locales/da/numbers.yml +44 -43
- data/resources/locales/da/plurals.yml +3 -1
- data/resources/locales/da/units.yml +122 -121
- data/resources/locales/de/calendars.yml +136 -135
- data/resources/locales/de/languages.yml +514 -513
- data/resources/locales/de/numbers.yml +44 -43
- data/resources/locales/de/plurals.yml +3 -1
- data/resources/locales/de/units.yml +136 -135
- data/resources/locales/el/calendars.yml +138 -137
- data/resources/locales/el/languages.yml +520 -519
- data/resources/locales/el/numbers.yml +43 -42
- data/resources/locales/el/plurals.yml +3 -2
- data/resources/locales/el/units.yml +143 -142
- data/resources/locales/en/calendars.yml +117 -116
- data/resources/locales/en/languages.yml +559 -558
- data/resources/locales/en/numbers.yml +32 -31
- data/resources/locales/en/plurals.yml +3 -1
- data/resources/locales/en/units.yml +108 -107
- data/resources/locales/es/calendars.yml +118 -117
- data/resources/locales/es/languages.yml +511 -510
- data/resources/locales/es/numbers.yml +42 -41
- data/resources/locales/es/plurals.yml +3 -1
- data/resources/locales/es/units.yml +136 -135
- data/resources/locales/eu/calendars.yml +124 -123
- data/resources/locales/eu/languages.yml +162 -161
- data/resources/locales/eu/numbers.yml +44 -43
- data/resources/locales/eu/plurals.yml +3 -2
- data/resources/locales/eu/units.yml +129 -128
- data/resources/locales/fa/calendars.yml +137 -136
- data/resources/locales/fa/languages.yml +489 -488
- data/resources/locales/fa/numbers.yml +31 -30
- data/resources/locales/fa/plurals.yml +3 -1
- data/resources/locales/fa/units.yml +122 -121
- data/resources/locales/fi/calendars.yml +153 -152
- data/resources/locales/fi/languages.yml +520 -519
- data/resources/locales/fi/numbers.yml +44 -43
- data/resources/locales/fi/plurals.yml +3 -1
- data/resources/locales/fi/units.yml +136 -135
- data/resources/locales/fil/calendars.yml +123 -122
- data/resources/locales/fil/languages.yml +169 -168
- data/resources/locales/fil/numbers.yml +32 -31
- data/resources/locales/fil/plurals.yml +3 -1
- data/resources/locales/fil/units.yml +122 -121
- data/resources/locales/fr/calendars.yml +144 -143
- data/resources/locales/fr/languages.yml +512 -511
- data/resources/locales/fr/numbers.yml +44 -43
- data/resources/locales/fr/plurals.yml +3 -1
- data/resources/locales/fr/units.yml +136 -135
- data/resources/locales/he/calendars.yml +120 -119
- data/resources/locales/he/languages.yml +282 -281
- data/resources/locales/he/numbers.yml +32 -31
- data/resources/locales/he/plurals.yml +3 -1
- data/resources/locales/he/units.yml +122 -121
- data/resources/locales/hi/calendars.yml +112 -111
- data/resources/locales/hi/languages.yml +511 -510
- data/resources/locales/hi/numbers.yml +32 -31
- data/resources/locales/hi/plurals.yml +3 -1
- data/resources/locales/hi/units.yml +122 -121
- data/resources/locales/hu/calendars.yml +142 -141
- data/resources/locales/hu/languages.yml +520 -519
- data/resources/locales/hu/numbers.yml +43 -42
- data/resources/locales/hu/plurals.yml +3 -1
- data/resources/locales/hu/units.yml +108 -107
- data/resources/locales/id/calendars.yml +117 -116
- data/resources/locales/id/languages.yml +514 -513
- data/resources/locales/id/numbers.yml +43 -42
- data/resources/locales/id/plurals.yml +3 -1
- data/resources/locales/id/units.yml +108 -107
- data/resources/locales/it/calendars.yml +123 -122
- data/resources/locales/it/languages.yml +504 -503
- data/resources/locales/it/numbers.yml +44 -43
- data/resources/locales/it/plurals.yml +3 -1
- data/resources/locales/it/units.yml +122 -121
- data/resources/locales/ja/calendars.yml +109 -108
- data/resources/locales/ja/languages.yml +516 -515
- data/resources/locales/ja/numbers.yml +35 -34
- data/resources/locales/ja/plurals.yml +3 -1
- data/resources/locales/ja/units.yml +108 -107
- data/resources/locales/ko/calendars.yml +112 -111
- data/resources/locales/ko/languages.yml +509 -508
- data/resources/locales/ko/numbers.yml +32 -31
- data/resources/locales/ko/plurals.yml +3 -1
- data/resources/locales/ko/units.yml +108 -107
- data/resources/locales/ms/calendars.yml +134 -133
- data/resources/locales/ms/languages.yml +158 -157
- data/resources/locales/ms/numbers.yml +31 -30
- data/resources/locales/ms/plurals.yml +3 -1
- data/resources/locales/ms/units.yml +136 -135
- data/resources/locales/nb/calendars.yml +143 -142
- data/resources/locales/nb/languages.yml +530 -529
- data/resources/locales/nb/numbers.yml +44 -43
- data/resources/locales/nb/plurals.yml +3 -2
- data/resources/locales/nb/units.yml +130 -129
- data/resources/locales/nl/calendars.yml +124 -123
- data/resources/locales/nl/languages.yml +516 -515
- data/resources/locales/nl/numbers.yml +44 -43
- data/resources/locales/nl/plurals.yml +3 -1
- data/resources/locales/nl/units.yml +122 -121
- data/resources/locales/pl/calendars.yml +148 -147
- data/resources/locales/pl/languages.yml +505 -504
- data/resources/locales/pl/numbers.yml +46 -45
- data/resources/locales/pl/plurals.yml +5 -1
- data/resources/locales/pl/units.yml +205 -204
- data/resources/locales/pt/calendars.yml +131 -130
- data/resources/locales/pt/languages.yml +517 -516
- data/resources/locales/pt/numbers.yml +44 -43
- data/resources/locales/pt/plurals.yml +3 -1
- data/resources/locales/pt/units.yml +136 -135
- data/resources/locales/ru/calendars.yml +143 -142
- data/resources/locales/ru/languages.yml +511 -510
- data/resources/locales/ru/numbers.yml +43 -42
- data/resources/locales/ru/plurals.yml +5 -1
- data/resources/locales/ru/units.yml +192 -191
- data/resources/locales/sv/calendars.yml +151 -150
- data/resources/locales/sv/languages.yml +531 -530
- data/resources/locales/sv/numbers.yml +44 -43
- data/resources/locales/sv/plurals.yml +3 -1
- data/resources/locales/sv/units.yml +136 -135
- data/resources/locales/th/calendars.yml +125 -124
- data/resources/locales/th/languages.yml +510 -509
- data/resources/locales/th/numbers.yml +41 -40
- data/resources/locales/th/plurals.yml +3 -1
- data/resources/locales/th/units.yml +108 -107
- data/resources/locales/tr/calendars.yml +139 -138
- data/resources/locales/tr/languages.yml +511 -510
- data/resources/locales/tr/numbers.yml +43 -42
- data/resources/locales/tr/plurals.yml +3 -1
- data/resources/locales/tr/units.yml +108 -107
- data/resources/locales/uk/calendars.yml +131 -130
- data/resources/locales/uk/languages.yml +520 -519
- data/resources/locales/uk/numbers.yml +46 -45
- data/resources/locales/uk/plurals.yml +5 -2
- data/resources/locales/uk/units.yml +192 -191
- data/resources/locales/ur/calendars.yml +111 -110
- data/resources/locales/ur/languages.yml +164 -163
- data/resources/locales/ur/numbers.yml +32 -31
- data/resources/locales/ur/plurals.yml +3 -1
- data/resources/locales/ur/units.yml +136 -135
- data/resources/locales/zh-Hant/calendars.yml +116 -115
- data/resources/locales/zh-Hant/languages.yml +510 -509
- data/resources/locales/zh-Hant/numbers.yml +35 -34
- data/resources/locales/zh-Hant/plurals.yml +3 -2
- data/resources/locales/zh-Hant/units.yml +108 -107
- data/resources/locales/zh/calendars.yml +138 -137
- data/resources/locales/zh/languages.yml +506 -505
- data/resources/locales/zh/numbers.yml +35 -34
- data/resources/locales/zh/plurals.yml +3 -1
- data/resources/locales/zh/units.yml +80 -79
- data/resources/shared/currencies.yml +448 -451
- data/resources/shared/phone_codes.yml +241 -0
- data/resources/shared/postal_codes.yml +160 -0
- data/resources/unicode_data/blocks.yml +221 -221
- data/resources/unicode_data/blocks/aegean_numbers.yml +913 -0
- data/resources/unicode_data/blocks/alchemical_symbols.yml +1857 -0
- data/resources/unicode_data/blocks/alphabetic_presentation_forms.yml +929 -0
- data/resources/unicode_data/blocks/ancient_greek_musical_notation.yml +1121 -0
- data/resources/unicode_data/blocks/ancient_greek_numbers.yml +1201 -0
- data/resources/unicode_data/blocks/ancient_symbols.yml +193 -0
- data/resources/unicode_data/blocks/arabic.yml +4049 -0
- data/resources/unicode_data/blocks/arabic_extended_a.yml +625 -0
- data/resources/unicode_data/blocks/arabic_mathematical_alphabetic_symbols.yml +2289 -0
- data/resources/unicode_data/{arabic_presentation_forms_a.yml → blocks/arabic_presentation_forms_a.yml} +7369 -7368
- data/resources/unicode_data/{arabic_presentation_forms_b.yml → blocks/arabic_presentation_forms_b.yml} +1414 -1414
- data/resources/unicode_data/blocks/arabic_supplement.yml +769 -0
- data/resources/unicode_data/blocks/armenian.yml +1393 -0
- data/resources/unicode_data/blocks/arrows.yml +1793 -0
- data/resources/unicode_data/blocks/avestan.yml +977 -0
- data/resources/unicode_data/blocks/balinese.yml +1937 -0
- data/resources/unicode_data/blocks/bamum.yml +1409 -0
- data/resources/unicode_data/blocks/bamum_supplement.yml +9105 -0
- data/resources/unicode_data/blocks/basic_latin.yml +2049 -0
- data/resources/unicode_data/blocks/batak.yml +897 -0
- data/resources/unicode_data/blocks/bengali.yml +1473 -0
- data/resources/unicode_data/blocks/block_elements.yml +513 -0
- data/resources/unicode_data/blocks/bopomofo.yml +657 -0
- data/resources/unicode_data/blocks/bopomofo_extended.yml +433 -0
- data/resources/unicode_data/{box_drawing.yml → blocks/box_drawing.yml} +1537 -1537
- data/resources/unicode_data/blocks/brahmi.yml +1729 -0
- data/resources/unicode_data/blocks/braille_patterns.yml +4097 -0
- data/resources/unicode_data/blocks/buginese.yml +481 -0
- data/resources/unicode_data/blocks/buhid.yml +321 -0
- data/resources/unicode_data/blocks/byzantine_musical_symbols.yml +3937 -0
- data/resources/unicode_data/blocks/carian.yml +785 -0
- data/resources/unicode_data/blocks/chakma.yml +1073 -0
- data/resources/unicode_data/blocks/cham.yml +1329 -0
- data/resources/unicode_data/blocks/cherokee.yml +1361 -0
- data/resources/unicode_data/{cjk_compatibility.yml → blocks/cjk_compatibility.yml} +2706 -2706
- data/resources/unicode_data/{cjk_compatibility_forms.yml → blocks/cjk_compatibility_forms.yml} +363 -363
- data/resources/unicode_data/blocks/cjk_compatibility_ideographs.yml +7553 -0
- data/resources/unicode_data/blocks/cjk_compatibility_ideographs_supplement.yml +8673 -0
- data/resources/unicode_data/blocks/cjk_radicals_supplement.yml +1841 -0
- data/resources/unicode_data/blocks/cjk_strokes.yml +577 -0
- data/resources/unicode_data/blocks/cjk_symbols_and_punctuation.yml +1025 -0
- data/resources/unicode_data/blocks/cjk_unified_ideographs.yml +33 -0
- data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_a.yml +33 -0
- data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_b.yml +33 -0
- data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_c.yml +33 -0
- data/resources/unicode_data/blocks/cjk_unified_ideographs_extension_d.yml +33 -0
- data/resources/unicode_data/blocks/combining_diacritical_marks.yml +1793 -0
- data/resources/unicode_data/{combining_diacritical_marks_for_symbols.yml → blocks/combining_diacritical_marks_for_symbols.yml} +409 -409
- data/resources/unicode_data/blocks/combining_diacritical_marks_supplement.yml +689 -0
- data/resources/unicode_data/blocks/combining_half_marks.yml +113 -0
- data/resources/unicode_data/blocks/common_indic_number_forms.yml +161 -0
- data/resources/unicode_data/blocks/control_pictures.yml +625 -0
- data/resources/unicode_data/blocks/coptic.yml +1969 -0
- data/resources/unicode_data/blocks/counting_rod_numerals.yml +289 -0
- data/resources/unicode_data/blocks/cuneiform.yml +14065 -0
- data/resources/unicode_data/blocks/cuneiform_numbers_and_punctuation.yml +1649 -0
- data/resources/unicode_data/blocks/currency_symbols.yml +417 -0
- data/resources/unicode_data/blocks/cypriot_syllabary.yml +881 -0
- data/resources/unicode_data/{cyrillic.yml → blocks/cyrillic.yml} +2765 -2765
- data/resources/unicode_data/blocks/cyrillic_extended_a.yml +513 -0
- data/resources/unicode_data/blocks/cyrillic_extended_b.yml +1425 -0
- data/resources/unicode_data/blocks/cyrillic_supplement.yml +641 -0
- data/resources/unicode_data/blocks/deseret.yml +1281 -0
- data/resources/unicode_data/blocks/devanagari.yml +2033 -0
- data/resources/unicode_data/blocks/devanagari_extended.yml +449 -0
- data/resources/unicode_data/blocks/dingbats.yml +3057 -0
- data/resources/unicode_data/blocks/domino_tiles.yml +1601 -0
- data/resources/unicode_data/blocks/egyptian_hieroglyphs.yml +17137 -0
- data/resources/unicode_data/blocks/emoticons.yml +1217 -0
- data/resources/unicode_data/blocks/enclosed_alphanumeric_supplement.yml +2737 -0
- data/resources/unicode_data/blocks/enclosed_alphanumerics.yml +2561 -0
- data/resources/unicode_data/{enclosed_cjk_letters_and_months.yml → blocks/enclosed_cjk_letters_and_months.yml} +3067 -3067
- data/resources/unicode_data/{enclosed_ideographic_supplement.yml → blocks/enclosed_ideographic_supplement.yml} +685 -685
- data/resources/unicode_data/blocks/ethiopic.yml +5729 -0
- data/resources/unicode_data/blocks/ethiopic_extended.yml +1265 -0
- data/resources/unicode_data/blocks/ethiopic_extended_a.yml +513 -0
- data/resources/unicode_data/blocks/ethiopic_supplement.yml +417 -0
- data/resources/unicode_data/blocks/general_punctuation.yml +1713 -0
- data/resources/unicode_data/blocks/geometric_shapes.yml +1537 -0
- data/resources/unicode_data/blocks/georgian.yml +1409 -0
- data/resources/unicode_data/blocks/georgian_supplement.yml +641 -0
- data/resources/unicode_data/blocks/glagolitic.yml +1505 -0
- data/resources/unicode_data/blocks/gothic.yml +433 -0
- data/resources/unicode_data/{greek_and_coptic.yml → blocks/greek_and_coptic.yml} +1360 -1360
- data/resources/unicode_data/{greek_extended.yml → blocks/greek_extended.yml} +2330 -2330
- data/resources/unicode_data/blocks/gujarati.yml +1345 -0
- data/resources/unicode_data/blocks/gurmukhi.yml +1265 -0
- data/resources/unicode_data/{halfwidth_and_fullwidth_forms.yml → blocks/halfwidth_and_fullwidth_forms.yml} +2517 -2517
- data/resources/unicode_data/{hangul_compatibility_jamo.yml → blocks/hangul_compatibility_jamo.yml} +993 -993
- data/resources/unicode_data/blocks/hangul_jamo.yml +4097 -0
- data/resources/unicode_data/blocks/hangul_jamo_extended_a.yml +465 -0
- data/resources/unicode_data/blocks/hangul_jamo_extended_b.yml +1153 -0
- data/resources/unicode_data/blocks/hangul_syllables.yml +33 -0
- data/resources/unicode_data/blocks/hanunoo.yml +369 -0
- data/resources/unicode_data/blocks/hebrew.yml +1393 -0
- data/resources/unicode_data/blocks/high_private_use_surrogates.yml +33 -0
- data/resources/unicode_data/blocks/high_surrogates.yml +33 -0
- data/resources/unicode_data/blocks/hiragana.yml +1489 -0
- data/resources/unicode_data/blocks/ideographic_description_characters.yml +193 -0
- data/resources/unicode_data/blocks/imperial_aramaic.yml +497 -0
- data/resources/unicode_data/blocks/inscriptional_pahlavi.yml +433 -0
- data/resources/unicode_data/blocks/inscriptional_parthian.yml +481 -0
- data/resources/unicode_data/{ipa_extensions.yml → blocks/ipa_extensions.yml} +1050 -1050
- data/resources/unicode_data/blocks/javanese.yml +1457 -0
- data/resources/unicode_data/blocks/kaithi.yml +1057 -0
- data/resources/unicode_data/blocks/kana_supplement.yml +33 -0
- data/resources/unicode_data/{kanbun.yml → blocks/kanbun.yml} +167 -167
- data/resources/unicode_data/blocks/kangxi_radicals.yml +3425 -0
- data/resources/unicode_data/blocks/kannada.yml +1377 -0
- data/resources/unicode_data/blocks/katakana.yml +1537 -0
- data/resources/unicode_data/blocks/katakana_phonetic_extensions.yml +257 -0
- data/resources/unicode_data/blocks/kayah_li.yml +769 -0
- data/resources/unicode_data/blocks/kharoshthi.yml +1041 -0
- data/resources/unicode_data/blocks/khmer.yml +1825 -0
- data/resources/unicode_data/blocks/khmer_symbols.yml +513 -0
- data/resources/unicode_data/blocks/lao.yml +1073 -0
- data/resources/unicode_data/{latin_1_supplement.yml → blocks/latin_1_supplement.yml} +1319 -1319
- data/resources/unicode_data/{latin_extended_a.yml → blocks/latin_extended_a.yml} +1210 -1210
- data/resources/unicode_data/{latin_extended_additional.yml → blocks/latin_extended_additional.yml} +2460 -2460
- data/resources/unicode_data/{latin_extended_b.yml → blocks/latin_extended_b.yml} +2096 -2096
- data/resources/unicode_data/blocks/latin_extended_c.yml +513 -0
- data/resources/unicode_data/blocks/latin_extended_d.yml +2145 -0
- data/resources/unicode_data/blocks/lepcha.yml +1185 -0
- data/resources/unicode_data/blocks/letterlike_symbols.yml +1281 -0
- data/resources/unicode_data/blocks/limbu.yml +1057 -0
- data/resources/unicode_data/blocks/linear_b_ideograms.yml +1969 -0
- data/resources/unicode_data/blocks/linear_b_syllabary.yml +1409 -0
- data/resources/unicode_data/blocks/lisu.yml +769 -0
- data/resources/unicode_data/blocks/low_surrogates.yml +33 -0
- data/resources/unicode_data/blocks/lycian.yml +465 -0
- data/resources/unicode_data/blocks/lydian.yml +433 -0
- data/resources/unicode_data/blocks/mahjong_tiles.yml +705 -0
- data/resources/unicode_data/blocks/malayalam.yml +1569 -0
- data/resources/unicode_data/blocks/mandaic.yml +465 -0
- data/resources/unicode_data/{mathematical_alphanumeric_symbols.yml → blocks/mathematical_alphanumeric_symbols.yml} +11953 -11953
- data/resources/unicode_data/blocks/mathematical_operators.yml +4097 -0
- data/resources/unicode_data/blocks/meetei_mayek.yml +897 -0
- data/resources/unicode_data/blocks/meetei_mayek_extensions.yml +369 -0
- data/resources/unicode_data/blocks/meroitic_cursive.yml +417 -0
- data/resources/unicode_data/blocks/meroitic_hieroglyphs.yml +513 -0
- data/resources/unicode_data/blocks/miao.yml +2129 -0
- data/resources/unicode_data/blocks/miscellaneous_mathematical_symbols_a.yml +769 -0
- data/resources/unicode_data/blocks/miscellaneous_mathematical_symbols_b.yml +2049 -0
- data/resources/unicode_data/blocks/miscellaneous_symbols.yml +4097 -0
- data/resources/unicode_data/blocks/miscellaneous_symbols_and_arrows.yml +1393 -0
- data/resources/unicode_data/blocks/miscellaneous_symbols_and_pictographs.yml +8529 -0
- data/resources/unicode_data/blocks/miscellaneous_technical.yml +3905 -0
- data/resources/unicode_data/blocks/modifier_tone_letters.yml +513 -0
- data/resources/unicode_data/blocks/mongolian.yml +2497 -0
- data/resources/unicode_data/blocks/musical_symbols.yml +3521 -0
- data/resources/unicode_data/blocks/myanmar.yml +2561 -0
- data/resources/unicode_data/blocks/myanmar_extended_a.yml +449 -0
- data/resources/unicode_data/blocks/new_tai_lue.yml +1329 -0
- data/resources/unicode_data/blocks/nko.yml +945 -0
- data/resources/unicode_data/blocks/number_forms.yml +929 -0
- data/resources/unicode_data/blocks/ogham.yml +465 -0
- data/resources/unicode_data/blocks/ol_chiki.yml +769 -0
- data/resources/unicode_data/blocks/old_italic.yml +561 -0
- data/resources/unicode_data/blocks/old_persian.yml +801 -0
- data/resources/unicode_data/blocks/old_south_arabian.yml +513 -0
- data/resources/unicode_data/blocks/old_turkic.yml +1169 -0
- data/resources/unicode_data/blocks/optical_character_recognition.yml +177 -0
- data/resources/unicode_data/blocks/oriya.yml +1441 -0
- data/resources/unicode_data/blocks/osmanya.yml +641 -0
- data/resources/unicode_data/blocks/phags_pa.yml +897 -0
- data/resources/unicode_data/blocks/phaistos_disc.yml +737 -0
- data/resources/unicode_data/blocks/phoenician.yml +465 -0
- data/resources/unicode_data/blocks/phonetic_extensions.yml +2049 -0
- data/resources/unicode_data/blocks/phonetic_extensions_supplement.yml +1025 -0
- data/resources/unicode_data/blocks/playing_cards.yml +945 -0
- data/resources/unicode_data/blocks/private_use_area.yml +33 -0
- data/resources/unicode_data/blocks/rejang.yml +593 -0
- data/resources/unicode_data/blocks/rumi_numeral_symbols.yml +497 -0
- data/resources/unicode_data/blocks/runic.yml +1297 -0
- data/resources/unicode_data/blocks/samaritan.yml +977 -0
- data/resources/unicode_data/blocks/saurashtra.yml +1297 -0
- data/resources/unicode_data/blocks/sharada.yml +1329 -0
- data/resources/unicode_data/blocks/shavian.yml +769 -0
- data/resources/unicode_data/blocks/sinhala.yml +1281 -0
- data/resources/unicode_data/blocks/small_form_variants.yml +417 -0
- data/resources/unicode_data/blocks/sora_sompeng.yml +561 -0
- data/resources/unicode_data/blocks/spacing_modifier_letters.yml +1281 -0
- data/resources/unicode_data/blocks/specials.yml +81 -0
- data/resources/unicode_data/blocks/sundanese.yml +1025 -0
- data/resources/unicode_data/blocks/sundanese_supplement.yml +129 -0
- data/resources/unicode_data/blocks/superscripts_and_subscripts.yml +673 -0
- data/resources/unicode_data/blocks/supplemental_arrows_a.yml +257 -0
- data/resources/unicode_data/blocks/supplemental_arrows_b.yml +2049 -0
- data/resources/unicode_data/blocks/supplemental_mathematical_operators.yml +4097 -0
- data/resources/unicode_data/blocks/supplemental_punctuation.yml +961 -0
- data/resources/unicode_data/blocks/supplementary_private_use_area_a.yml +33 -0
- data/resources/unicode_data/blocks/supplementary_private_use_area_b.yml +33 -0
- data/resources/unicode_data/blocks/syloti_nagri.yml +705 -0
- data/resources/unicode_data/blocks/syriac.yml +1233 -0
- data/resources/unicode_data/blocks/tagalog.yml +321 -0
- data/resources/unicode_data/blocks/tagbanwa.yml +289 -0
- data/resources/unicode_data/blocks/tags.yml +1553 -0
- data/resources/unicode_data/blocks/tai_le.yml +561 -0
- data/resources/unicode_data/blocks/tai_tham.yml +2033 -0
- data/resources/unicode_data/blocks/tai_viet.yml +1153 -0
- data/resources/unicode_data/blocks/tai_xuan_jing_symbols.yml +1393 -0
- data/resources/unicode_data/blocks/takri.yml +1057 -0
- data/resources/unicode_data/blocks/tamil.yml +1153 -0
- data/resources/unicode_data/blocks/telugu.yml +1489 -0
- data/resources/unicode_data/blocks/thaana.yml +801 -0
- data/resources/unicode_data/blocks/thai.yml +1393 -0
- data/resources/unicode_data/blocks/tibetan.yml +3377 -0
- data/resources/unicode_data/blocks/tifinagh.yml +945 -0
- data/resources/unicode_data/blocks/transport_and_map_symbols.yml +1121 -0
- data/resources/unicode_data/blocks/ugaritic.yml +497 -0
- data/resources/unicode_data/blocks/unified_canadian_aboriginal_syllabics.yml +10241 -0
- data/resources/unicode_data/blocks/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
- data/resources/unicode_data/blocks/vai.yml +4801 -0
- data/resources/unicode_data/blocks/variation_selectors.yml +257 -0
- data/resources/unicode_data/blocks/variation_selectors_supplement.yml +3841 -0
- data/resources/unicode_data/blocks/vedic_extensions.yml +625 -0
- data/resources/unicode_data/{vertical_forms.yml → blocks/vertical_forms.yml} +121 -121
- data/resources/unicode_data/blocks/yi_radicals.yml +881 -0
- data/resources/unicode_data/blocks/yi_syllables.yml +18641 -0
- data/resources/unicode_data/blocks/yijing_hexagram_symbols.yml +1025 -0
- data/resources/unicode_data/canonical_compositions.yml +4925 -0
- data/resources/unicode_data/composition_exclusions.yml +78 -74
- data/resources/unicode_data/hangul_blocks.yml +9 -9
- data/spec/collation/collation_spec.rb +6 -6
- data/spec/collation/collator_spec.rb +18 -19
- data/spec/collation/trie_builder_spec.rb +6 -9
- data/spec/core_ext/array_spec.rb +1 -1
- data/spec/core_ext/string_spec.rb +1 -1
- data/spec/formatters/numbers/currency_formatter_spec.rb +5 -0
- data/spec/formatters/numbers/decimal_formatter_spec.rb +4 -0
- data/spec/formatters/numbers/number_formatter_spec.rb +4 -4
- data/spec/formatters/numbers/percent_formatter_spec.rb +8 -0
- data/spec/normalization/base_spec.rb +2 -2
- data/spec/normalization/normalization_spec.rb +7 -3
- data/spec/readme_spec.rb +9 -9
- data/spec/resources/loader_spec.rb +4 -4
- data/spec/shared/code_point_spec.rb +102 -62
- data/spec/shared/currencies_spec.rb +17 -19
- data/spec/shared/phone_codes_spec.rb +49 -0
- data/spec/shared/postal_codes_spec.rb +68 -0
- data/spec/utils/code_points_spec.rb +6 -6
- metadata +264 -224
- data/resources/unicode_data/aegean_numbers.yml +0 -913
- data/resources/unicode_data/alchemical_symbols.yml +0 -1857
- data/resources/unicode_data/alphabetic_presentation_forms.yml +0 -929
- data/resources/unicode_data/ancient_greek_musical_notation.yml +0 -1121
- data/resources/unicode_data/ancient_greek_numbers.yml +0 -1201
- data/resources/unicode_data/ancient_symbols.yml +0 -193
- data/resources/unicode_data/arabic.yml +0 -4049
- data/resources/unicode_data/arabic_extended_a.yml +0 -625
- data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +0 -2289
- data/resources/unicode_data/arabic_supplement.yml +0 -769
- data/resources/unicode_data/armenian.yml +0 -1393
- data/resources/unicode_data/arrows.yml +0 -1793
- data/resources/unicode_data/avestan.yml +0 -977
- data/resources/unicode_data/balinese.yml +0 -1937
- data/resources/unicode_data/bamum.yml +0 -1409
- data/resources/unicode_data/bamum_supplement.yml +0 -9105
- data/resources/unicode_data/basic_latin.yml +0 -2049
- data/resources/unicode_data/batak.yml +0 -897
- data/resources/unicode_data/bengali.yml +0 -1473
- data/resources/unicode_data/block_elements.yml +0 -513
- data/resources/unicode_data/bopomofo.yml +0 -657
- data/resources/unicode_data/bopomofo_extended.yml +0 -433
- data/resources/unicode_data/brahmi.yml +0 -1729
- data/resources/unicode_data/braille_patterns.yml +0 -4097
- data/resources/unicode_data/buginese.yml +0 -481
- data/resources/unicode_data/buhid.yml +0 -321
- data/resources/unicode_data/byzantine_musical_symbols.yml +0 -3937
- data/resources/unicode_data/carian.yml +0 -785
- data/resources/unicode_data/chakma.yml +0 -1073
- data/resources/unicode_data/cham.yml +0 -1329
- data/resources/unicode_data/cherokee.yml +0 -1361
- data/resources/unicode_data/cjk_compatibility_ideographs.yml +0 -7553
- data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +0 -8673
- data/resources/unicode_data/cjk_radicals_supplement.yml +0 -1841
- data/resources/unicode_data/cjk_strokes.yml +0 -577
- data/resources/unicode_data/cjk_symbols_and_punctuation.yml +0 -1025
- data/resources/unicode_data/cjk_unified_ideographs.yml +0 -33
- data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +0 -33
- data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +0 -33
- data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +0 -33
- data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +0 -33
- data/resources/unicode_data/combining_diacritical_marks.yml +0 -1793
- data/resources/unicode_data/combining_diacritical_marks_supplement.yml +0 -689
- data/resources/unicode_data/combining_half_marks.yml +0 -113
- data/resources/unicode_data/common_indic_number_forms.yml +0 -161
- data/resources/unicode_data/control_pictures.yml +0 -625
- data/resources/unicode_data/coptic.yml +0 -1969
- data/resources/unicode_data/counting_rod_numerals.yml +0 -289
- data/resources/unicode_data/cuneiform.yml +0 -14065
- data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +0 -1649
- data/resources/unicode_data/currency_symbols.yml +0 -417
- data/resources/unicode_data/cypriot_syllabary.yml +0 -881
- data/resources/unicode_data/cyrillic_extended_a.yml +0 -513
- data/resources/unicode_data/cyrillic_extended_b.yml +0 -1425
- data/resources/unicode_data/cyrillic_supplement.yml +0 -641
- data/resources/unicode_data/decomposition_map.yml +0 -4565
- data/resources/unicode_data/deseret.yml +0 -1281
- data/resources/unicode_data/devanagari.yml +0 -2033
- data/resources/unicode_data/devanagari_extended.yml +0 -449
- data/resources/unicode_data/dingbats.yml +0 -3057
- data/resources/unicode_data/domino_tiles.yml +0 -1601
- data/resources/unicode_data/egyptian_hieroglyphs.yml +0 -17137
- data/resources/unicode_data/emoticons.yml +0 -1217
- data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +0 -2737
- data/resources/unicode_data/enclosed_alphanumerics.yml +0 -2561
- data/resources/unicode_data/ethiopic.yml +0 -5729
- data/resources/unicode_data/ethiopic_extended.yml +0 -1265
- data/resources/unicode_data/ethiopic_extended_a.yml +0 -513
- data/resources/unicode_data/ethiopic_supplement.yml +0 -417
- data/resources/unicode_data/general_punctuation.yml +0 -1713
- data/resources/unicode_data/geometric_shapes.yml +0 -1537
- data/resources/unicode_data/georgian.yml +0 -1409
- data/resources/unicode_data/georgian_supplement.yml +0 -641
- data/resources/unicode_data/glagolitic.yml +0 -1505
- data/resources/unicode_data/gothic.yml +0 -433
- data/resources/unicode_data/gujarati.yml +0 -1345
- data/resources/unicode_data/gurmukhi.yml +0 -1265
- data/resources/unicode_data/hangul_jamo.yml +0 -4097
- data/resources/unicode_data/hangul_jamo_extended_a.yml +0 -465
- data/resources/unicode_data/hangul_jamo_extended_b.yml +0 -1153
- data/resources/unicode_data/hangul_syllables.yml +0 -33
- data/resources/unicode_data/hanunoo.yml +0 -369
- data/resources/unicode_data/hebrew.yml +0 -1393
- data/resources/unicode_data/high_private_use_surrogates.yml +0 -33
- data/resources/unicode_data/high_surrogates.yml +0 -33
- data/resources/unicode_data/hiragana.yml +0 -1489
- data/resources/unicode_data/ideographic_description_characters.yml +0 -193
- data/resources/unicode_data/imperial_aramaic.yml +0 -497
- data/resources/unicode_data/inscriptional_pahlavi.yml +0 -433
- data/resources/unicode_data/inscriptional_parthian.yml +0 -481
- data/resources/unicode_data/javanese.yml +0 -1457
- data/resources/unicode_data/kaithi.yml +0 -1057
- data/resources/unicode_data/kana_supplement.yml +0 -33
- data/resources/unicode_data/kangxi_radicals.yml +0 -3425
- data/resources/unicode_data/kannada.yml +0 -1377
- data/resources/unicode_data/katakana.yml +0 -1537
- data/resources/unicode_data/katakana_phonetic_extensions.yml +0 -257
- data/resources/unicode_data/kayah_li.yml +0 -769
- data/resources/unicode_data/kharoshthi.yml +0 -1041
- data/resources/unicode_data/khmer.yml +0 -1825
- data/resources/unicode_data/khmer_symbols.yml +0 -513
- data/resources/unicode_data/lao.yml +0 -1073
- data/resources/unicode_data/latin_extended_c.yml +0 -513
- data/resources/unicode_data/latin_extended_d.yml +0 -2145
- data/resources/unicode_data/lepcha.yml +0 -1185
- data/resources/unicode_data/letterlike_symbols.yml +0 -1281
- data/resources/unicode_data/limbu.yml +0 -1057
- data/resources/unicode_data/linear_b_ideograms.yml +0 -1969
- data/resources/unicode_data/linear_b_syllabary.yml +0 -1409
- data/resources/unicode_data/lisu.yml +0 -769
- data/resources/unicode_data/low_surrogates.yml +0 -33
- data/resources/unicode_data/lycian.yml +0 -465
- data/resources/unicode_data/lydian.yml +0 -433
- data/resources/unicode_data/mahjong_tiles.yml +0 -705
- data/resources/unicode_data/malayalam.yml +0 -1569
- data/resources/unicode_data/mandaic.yml +0 -465
- data/resources/unicode_data/mathematical_operators.yml +0 -4097
- data/resources/unicode_data/meetei_mayek.yml +0 -897
- data/resources/unicode_data/meetei_mayek_extensions.yml +0 -369
- data/resources/unicode_data/meroitic_cursive.yml +0 -417
- data/resources/unicode_data/meroitic_hieroglyphs.yml +0 -513
- data/resources/unicode_data/miao.yml +0 -2129
- data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +0 -769
- data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +0 -2049
- data/resources/unicode_data/miscellaneous_symbols.yml +0 -4097
- data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +0 -1393
- data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +0 -8529
- data/resources/unicode_data/miscellaneous_technical.yml +0 -3905
- data/resources/unicode_data/modifier_tone_letters.yml +0 -513
- data/resources/unicode_data/mongolian.yml +0 -2497
- data/resources/unicode_data/musical_symbols.yml +0 -3521
- data/resources/unicode_data/myanmar.yml +0 -2561
- data/resources/unicode_data/myanmar_extended_a.yml +0 -449
- data/resources/unicode_data/new_tai_lue.yml +0 -1329
- data/resources/unicode_data/nko.yml +0 -945
- data/resources/unicode_data/number_forms.yml +0 -929
- data/resources/unicode_data/ogham.yml +0 -465
- data/resources/unicode_data/ol_chiki.yml +0 -769
- data/resources/unicode_data/old_italic.yml +0 -561
- data/resources/unicode_data/old_persian.yml +0 -801
- data/resources/unicode_data/old_south_arabian.yml +0 -513
- data/resources/unicode_data/old_turkic.yml +0 -1169
- data/resources/unicode_data/optical_character_recognition.yml +0 -177
- data/resources/unicode_data/oriya.yml +0 -1441
- data/resources/unicode_data/osmanya.yml +0 -641
- data/resources/unicode_data/phags_pa.yml +0 -897
- data/resources/unicode_data/phaistos_disc.yml +0 -737
- data/resources/unicode_data/phoenician.yml +0 -465
- data/resources/unicode_data/phonetic_extensions.yml +0 -2049
- data/resources/unicode_data/phonetic_extensions_supplement.yml +0 -1025
- data/resources/unicode_data/playing_cards.yml +0 -945
- data/resources/unicode_data/private_use_area.yml +0 -33
- data/resources/unicode_data/rejang.yml +0 -593
- data/resources/unicode_data/rumi_numeral_symbols.yml +0 -497
- data/resources/unicode_data/runic.yml +0 -1297
- data/resources/unicode_data/samaritan.yml +0 -977
- data/resources/unicode_data/saurashtra.yml +0 -1297
- data/resources/unicode_data/sharada.yml +0 -1329
- data/resources/unicode_data/shavian.yml +0 -769
- data/resources/unicode_data/sinhala.yml +0 -1281
- data/resources/unicode_data/small_form_variants.yml +0 -417
- data/resources/unicode_data/sora_sompeng.yml +0 -561
- data/resources/unicode_data/spacing_modifier_letters.yml +0 -1281
- data/resources/unicode_data/specials.yml +0 -81
- data/resources/unicode_data/sundanese.yml +0 -1025
- data/resources/unicode_data/sundanese_supplement.yml +0 -129
- data/resources/unicode_data/superscripts_and_subscripts.yml +0 -673
- data/resources/unicode_data/supplemental_arrows_a.yml +0 -257
- data/resources/unicode_data/supplemental_arrows_b.yml +0 -2049
- data/resources/unicode_data/supplemental_mathematical_operators.yml +0 -4097
- data/resources/unicode_data/supplemental_punctuation.yml +0 -961
- data/resources/unicode_data/supplementary_private_use_area_a.yml +0 -33
- data/resources/unicode_data/supplementary_private_use_area_b.yml +0 -33
- data/resources/unicode_data/syloti_nagri.yml +0 -705
- data/resources/unicode_data/syriac.yml +0 -1233
- data/resources/unicode_data/tagalog.yml +0 -321
- data/resources/unicode_data/tagbanwa.yml +0 -289
- data/resources/unicode_data/tags.yml +0 -1553
- data/resources/unicode_data/tai_le.yml +0 -561
- data/resources/unicode_data/tai_tham.yml +0 -2033
- data/resources/unicode_data/tai_viet.yml +0 -1153
- data/resources/unicode_data/tai_xuan_jing_symbols.yml +0 -1393
- data/resources/unicode_data/takri.yml +0 -1057
- data/resources/unicode_data/tamil.yml +0 -1153
- data/resources/unicode_data/telugu.yml +0 -1489
- data/resources/unicode_data/thaana.yml +0 -801
- data/resources/unicode_data/thai.yml +0 -1393
- data/resources/unicode_data/tibetan.yml +0 -3377
- data/resources/unicode_data/tifinagh.yml +0 -945
- data/resources/unicode_data/transport_and_map_symbols.yml +0 -1121
- data/resources/unicode_data/ugaritic.yml +0 -497
- data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +0 -10241
- data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +0 -1121
- data/resources/unicode_data/vai.yml +0 -4801
- data/resources/unicode_data/variation_selectors.yml +0 -257
- data/resources/unicode_data/variation_selectors_supplement.yml +0 -3841
- data/resources/unicode_data/vedic_extensions.yml +0 -625
- data/resources/unicode_data/yi_radicals.yml +0 -881
- data/resources/unicode_data/yi_syllables.yml +0 -18641
- data/resources/unicode_data/yijing_hexagram_symbols.yml +0 -1025
@@ -27,6 +27,25 @@ module TwitterCldr
|
|
27
27
|
CodePoint = Struct.new(*CODE_POINT_FIELDS) do
|
28
28
|
DECOMPOSITION_DATA_INDEX = 5
|
29
29
|
|
30
|
+
DECOMPOSITION_REGEX = /^(?:<(.+)>\s+)?(.+)?$/
|
31
|
+
|
32
|
+
attr_accessor :compatibility_decomposition_tag
|
33
|
+
|
34
|
+
def initialize(*)
|
35
|
+
super
|
36
|
+
|
37
|
+
if decomposition =~ DECOMPOSITION_REGEX
|
38
|
+
self.compatibility_decomposition_tag = $1
|
39
|
+
self.decomposition = $2 && $2.split.map(&:hex)
|
40
|
+
else
|
41
|
+
raise ArgumentError, "decomposition #{decomposition.inspect} has invalid format"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def compatibility_decomposition?
|
46
|
+
!!compatibility_decomposition_tag
|
47
|
+
end
|
48
|
+
|
30
49
|
def hangul_type
|
31
50
|
CodePoint.hangul_type(code_point)
|
32
51
|
end
|
@@ -37,49 +56,42 @@ module TwitterCldr
|
|
37
56
|
|
38
57
|
class << self
|
39
58
|
|
40
|
-
def
|
41
|
-
code_point = code_point.rjust(4, '0').upcase
|
42
|
-
|
59
|
+
def find(code_point)
|
43
60
|
target = get_block(code_point)
|
44
61
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
end
|
62
|
+
return unless target && target.first
|
63
|
+
|
64
|
+
block_data = TwitterCldr.get_resource(:unicode_data, :blocks, target.first)
|
65
|
+
code_point_data = block_data.fetch(code_point) { |cp| get_range_start(cp, block_data) }
|
66
|
+
|
67
|
+
CodePoint.new(*code_point_data) if code_point_data
|
52
68
|
end
|
53
69
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
70
|
+
def for_canonical_decomposition(code_points)
|
71
|
+
find(canonical_compositions[code_points]) if canonical_compositions.has_key?(code_points)
|
72
|
+
end
|
57
73
|
|
58
|
-
|
59
|
-
|
60
|
-
else
|
61
|
-
nil
|
62
|
-
end
|
74
|
+
def canonical_compositions
|
75
|
+
@canonical_compositions ||= TwitterCldr.get_resource(:unicode_data, :canonical_compositions)
|
63
76
|
end
|
64
77
|
|
65
78
|
def hangul_type(code_point)
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
end
|
79
|
+
return unless code_point
|
80
|
+
|
81
|
+
[:lparts, :vparts, :tparts, :compositions].each do |type|
|
82
|
+
hangul_blocks[type].each do |range|
|
83
|
+
return type if range.include?(code_point)
|
72
84
|
end
|
73
85
|
end
|
86
|
+
|
74
87
|
nil
|
75
88
|
end
|
76
89
|
|
77
90
|
def excluded_from_composition?(code_point)
|
78
|
-
|
79
|
-
composition_exclusions.any? { |excl| excl.include?(code_point_int) }
|
91
|
+
composition_exclusions.any? { |exclusion| exclusion.include?(code_point) }
|
80
92
|
end
|
81
93
|
|
82
|
-
|
94
|
+
private
|
83
95
|
|
84
96
|
def hangul_blocks
|
85
97
|
@hangul_blocks ||= TwitterCldr.get_resource(:unicode_data, :hangul_blocks)
|
@@ -90,23 +102,22 @@ module TwitterCldr
|
|
90
102
|
end
|
91
103
|
|
92
104
|
def get_block(code_point)
|
93
|
-
blocks
|
94
|
-
|
105
|
+
blocks.detect { |_, range| range.include?(code_point) }
|
106
|
+
end
|
95
107
|
|
96
|
-
|
97
|
-
|
98
|
-
range.include?(code_point_int)
|
99
|
-
end
|
108
|
+
def blocks
|
109
|
+
TwitterCldr.get_resource(:unicode_data, :blocks)
|
100
110
|
end
|
101
111
|
|
102
112
|
# Check if block constitutes a range. The code point beginning a range will have a name enclosed in <>, ending with 'First'
|
103
113
|
# eg: <CJK Ideograph Extension A, First>
|
104
114
|
# http://unicode.org/reports/tr44/#Code_Point_Ranges
|
105
115
|
def get_range_start(code_point, block_data)
|
106
|
-
|
107
|
-
|
116
|
+
start_data = block_data[block_data.keys.min]
|
117
|
+
|
108
118
|
if start_data[1] =~ /<.*, First>/
|
109
|
-
start_data
|
119
|
+
start_data = start_data.clone
|
120
|
+
start_data[0] = code_point
|
110
121
|
start_data[1] = start_data[1].sub(', First', '')
|
111
122
|
start_data
|
112
123
|
end
|
@@ -7,36 +7,33 @@ module TwitterCldr
|
|
7
7
|
module Shared
|
8
8
|
module Currencies
|
9
9
|
|
10
|
-
@@resource = TwitterCldr.get_resource(:shared, :currencies)[:shared][:currencies]
|
11
|
-
|
12
10
|
class << self
|
13
11
|
|
14
12
|
def countries
|
15
|
-
|
13
|
+
resource.keys.map(&:to_s)
|
16
14
|
end
|
17
15
|
|
18
16
|
def currency_codes
|
19
|
-
|
17
|
+
resource.values.map { |data| data[:code] }
|
20
18
|
end
|
21
19
|
|
22
20
|
def for_country(country_name)
|
23
|
-
|
21
|
+
resource[country_name.to_sym]
|
24
22
|
end
|
25
23
|
|
26
24
|
def for_code(currency_code)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
final
|
25
|
+
country_name, data = resource.detect { |_, data| data[:code] == currency_code }
|
26
|
+
{ :country => country_name.to_s, :currency => data[:currency], :symbol => data[:symbol] } if data
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def resource
|
32
|
+
@resource ||= TwitterCldr.get_resource(:shared, :currencies)
|
36
33
|
end
|
37
34
|
|
38
35
|
end
|
39
36
|
|
40
37
|
end
|
41
38
|
end
|
42
|
-
end
|
39
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Shared
|
8
|
+
module PhoneCodes
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
def territories
|
13
|
+
resource.keys
|
14
|
+
end
|
15
|
+
|
16
|
+
def code_for_territory(territory)
|
17
|
+
resource[territory.to_s.downcase.to_sym]
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def resource
|
23
|
+
@resource ||= TwitterCldr.get_resource(:shared, :phone_codes)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Shared
|
8
|
+
module PostalCodes
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
def territories
|
13
|
+
resource.keys
|
14
|
+
end
|
15
|
+
|
16
|
+
def regex_for_territory(territory)
|
17
|
+
resource[territory.to_s.downcase.to_sym]
|
18
|
+
end
|
19
|
+
|
20
|
+
def valid?(territory, postal_code)
|
21
|
+
regexp = regex_for_territory(territory)
|
22
|
+
!!(regexp && regexp =~ postal_code)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def resource
|
28
|
+
@resource ||= TwitterCldr.get_resource(:shared, :postal_codes)
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -6,17 +6,21 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Tokenizers
|
8
8
|
class TimespanTokenizer < Base
|
9
|
+
|
9
10
|
VALID_UNITS = [:second, :minute, :hour, :day, :week, :month, :year]
|
10
11
|
|
11
12
|
def initialize(options = {})
|
12
13
|
super(options)
|
13
14
|
|
14
15
|
@token_splitter_regex = /([^0*#,\.]*)([0#,\.]+)([^0*#,\.]*)$/ # creates spaces
|
15
|
-
|
16
|
+
|
17
|
+
@token_type_regexes = [
|
16
18
|
{ :type => :pattern, :regex => /[0?#,\.]*/ }, # splits token at right places
|
17
19
|
{ :type => :plaintext, :regex => // }
|
18
20
|
]
|
21
|
+
|
19
22
|
@base_path = [:units]
|
23
|
+
|
20
24
|
@paths = {
|
21
25
|
:ago => {
|
22
26
|
:default => :'hour-past',
|
@@ -64,13 +68,13 @@ module TwitterCldr
|
|
64
68
|
pluralization = 2 if token_exists(path + [2])
|
65
69
|
end
|
66
70
|
path << pluralization
|
71
|
+
|
67
72
|
tokens_with_placeholders_for(path) if token_exists(path)
|
68
73
|
end
|
69
74
|
|
70
75
|
def token_exists(path)
|
71
|
-
@@token_cache ||= {}
|
72
76
|
cache_key = compute_cache_key(@locale, path.join('.'))
|
73
|
-
|
77
|
+
token_cache.include?(cache_key) || !!traverse(path)
|
74
78
|
end
|
75
79
|
|
76
80
|
def all_types_for(unit, direction)
|
@@ -79,6 +83,10 @@ module TwitterCldr
|
|
79
83
|
|
80
84
|
protected
|
81
85
|
|
86
|
+
def token_cache
|
87
|
+
@token_cache ||= {}
|
88
|
+
end
|
89
|
+
|
82
90
|
def full_path(direction, unit, type)
|
83
91
|
@base_path + [@paths[direction][unit], type]
|
84
92
|
end
|
@@ -6,6 +6,8 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Tokenizers
|
8
8
|
class NumberTokenizer < Base
|
9
|
+
VALID_TYPES = [:decimal, :percent, :currency]
|
10
|
+
|
9
11
|
def initialize(options = {})
|
10
12
|
super(options)
|
11
13
|
|
@@ -32,7 +34,12 @@ module TwitterCldr
|
|
32
34
|
|
33
35
|
insert_point = traverse(key_path[0..-2])
|
34
36
|
insert_point[:positive] = positive
|
35
|
-
|
37
|
+
|
38
|
+
if negative
|
39
|
+
insert_point[:negative] = "#{symbols[:minus] || '-'}#{negative}"
|
40
|
+
else
|
41
|
+
insert_point[:negative] = "#{symbols[:minus] || '-'}#{positive}"
|
42
|
+
end
|
36
43
|
end
|
37
44
|
|
38
45
|
sign = options[:sign] || :positive
|
@@ -10,11 +10,11 @@ module TwitterCldr
|
|
10
10
|
class << self
|
11
11
|
|
12
12
|
def to_char(code_point)
|
13
|
-
[code_point
|
13
|
+
[code_point].pack('U*')
|
14
14
|
end
|
15
15
|
|
16
16
|
def from_char(char)
|
17
|
-
char.unpack('U*').first
|
17
|
+
char.unpack('U*').first
|
18
18
|
end
|
19
19
|
|
20
20
|
def from_chars(chars)
|
data/lib/twitter_cldr/version.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: 0149; [41, 5, 10]
|
4
|
-
suppressed_contractions: ''
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: 0149; [41, 5, 10]
|
4
|
+
:suppressed_contractions: ''
|
@@ -1,6 +1,6 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: 0629; [6144, 87, 5]
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: 0629; [6144, 87, 5]
|
4
4
|
|
5
5
|
FE94; [6144, 87, 6]
|
6
6
|
|
@@ -19,4 +19,4 @@ tailored_table: 0629; [6144, 87, 5]
|
|
19
19
|
FC90; [6287, 87, A]
|
20
20
|
|
21
21
|
FC5D; [6287, 87, B]
|
22
|
-
suppressed_contractions: ''
|
22
|
+
:suppressed_contractions: ''
|
@@ -1,10 +1,10 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: 0140; [3D, 87, 5]
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: 0140; [3D, 87, 5]
|
4
4
|
|
5
5
|
006C 00B7; [3D, 87, 5]
|
6
6
|
|
7
7
|
013F; [3D, 87, 86]
|
8
8
|
|
9
9
|
004C 00B7; [3D, 87, 86]
|
10
|
-
suppressed_contractions: ''
|
10
|
+
:suppressed_contractions: ''
|
@@ -1,6 +1,6 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: 0063 030C; [2C04, 5, 5]
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: 0063 030C; [2C04, 5, 5]
|
4
4
|
|
5
5
|
0043 030C; [2C04, 5, 86]
|
6
6
|
|
@@ -23,4 +23,4 @@ tailored_table: 0063 030C; [2C04, 5, 5]
|
|
23
23
|
007A 030C; [5A04, 5, 5]
|
24
24
|
|
25
25
|
005A 030C; [5A04, 5, 86]
|
26
|
-
suppressed_contractions: ''
|
26
|
+
:suppressed_contractions: ''
|
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
collator_options:
|
3
|
-
case_first: :upper
|
4
|
-
tailored_table: 0111; [2D, 87, 5]
|
2
|
+
:collator_options:
|
3
|
+
:case_first: :upper
|
4
|
+
:tailored_table: 0111; [2D, 87, 5]
|
5
5
|
|
6
6
|
0110; [2D, 87, 86]
|
7
7
|
|
@@ -58,4 +58,4 @@ tailored_table: 0111; [2D, 87, 5]
|
|
58
58
|
0041 0061; [5AA1, 5, 48][6, 0, C0]
|
59
59
|
|
60
60
|
0041 0041; [5AA1, 5, 89][6, 0, C0]
|
61
|
-
suppressed_contractions: ''
|
61
|
+
:suppressed_contractions: ''
|
@@ -1,4 +1,4 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: ''
|
4
|
-
suppressed_contractions: ''
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: ''
|
4
|
+
:suppressed_contractions: ''
|
@@ -1,4 +1,4 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: ''
|
4
|
-
suppressed_contractions: ''
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: ''
|
4
|
+
:suppressed_contractions: ''
|
@@ -1,4 +1,4 @@
|
|
1
1
|
---
|
2
|
-
collator_options: {}
|
3
|
-
tailored_table: ''
|
4
|
-
suppressed_contractions: ''
|
2
|
+
:collator_options: {}
|
3
|
+
:tailored_table: ''
|
4
|
+
:suppressed_contractions: ''
|