twitter_cldr 4.0.0 → 4.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -2
- data/README.md +18 -2
- data/Rakefile +39 -122
- data/lib/twitter_cldr.rb +3 -0
- data/lib/twitter_cldr/formatters/numbers/rbnf.rb +5 -1
- data/lib/twitter_cldr/resources.rb +86 -5
- data/lib/twitter_cldr/resources/bidi_test_importer.rb +50 -44
- data/lib/twitter_cldr/resources/casefolder_class_generator.rb +22 -13
- data/lib/twitter_cldr/resources/collation_tries_importer.rb +44 -0
- data/lib/twitter_cldr/resources/hyphenation_importer.rb +16 -42
- data/lib/twitter_cldr/resources/import_resolver.rb +71 -0
- data/lib/twitter_cldr/resources/importer.rb +107 -0
- data/lib/twitter_cldr/resources/language_codes_importer.rb +35 -38
- data/lib/twitter_cldr/resources/loader.rb +3 -3
- data/lib/twitter_cldr/resources/locales_resources_importer.rb +48 -35
- data/lib/twitter_cldr/resources/phone_codes_importer.rb +24 -23
- data/lib/twitter_cldr/resources/postal_codes_importer.rb +10 -11
- data/lib/twitter_cldr/resources/properties.rb +0 -4
- data/lib/twitter_cldr/resources/properties/age_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/arabic_shaping_property_importer.rb +9 -11
- data/lib/twitter_cldr/resources/properties/bidi_brackets_property_importer.rb +11 -9
- data/lib/twitter_cldr/resources/properties/blocks_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/derived_core_properties_importer.rb +9 -11
- data/lib/twitter_cldr/resources/properties/east_asian_width_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/grapheme_break_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/hangul_syllable_type_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/indic_positional_category_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/indic_syllabic_category_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/jamo_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/line_break_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/prop_list_importer.rb +9 -11
- data/lib/twitter_cldr/resources/properties/property_importer.rb +13 -22
- data/lib/twitter_cldr/resources/properties/script_extensions_property_importer.rb +12 -10
- data/lib/twitter_cldr/resources/properties/script_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/sentence_break_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/properties/unicode_data_properties_importer.rb +11 -9
- data/lib/twitter_cldr/resources/properties/word_break_property_importer.rb +13 -9
- data/lib/twitter_cldr/resources/rbnf_test_importer.rb +41 -38
- data/lib/twitter_cldr/resources/readme_renderer.rb +1 -2
- data/lib/twitter_cldr/resources/requirements.rb +18 -0
- data/lib/twitter_cldr/resources/requirements/cldr_requirement.rb +66 -0
- data/lib/twitter_cldr/resources/requirements/dependency_requirement.rb +23 -0
- data/lib/twitter_cldr/resources/requirements/git_requirement.rb +66 -0
- data/lib/twitter_cldr/resources/requirements/icu_requirement.rb +111 -0
- data/lib/twitter_cldr/resources/requirements/unicode_requirement.rb +51 -0
- data/lib/twitter_cldr/resources/segment_tests_importer.rb +15 -30
- data/lib/twitter_cldr/resources/tailoring_importer.rb +33 -26
- data/lib/twitter_cldr/resources/transform_test_importer.rb +15 -17
- data/lib/twitter_cldr/resources/uli/segment_exceptions_importer.rb +29 -17
- data/lib/twitter_cldr/resources/unicode_data_importer.rb +38 -31
- data/lib/twitter_cldr/resources/unicode_file_parser.rb +37 -0
- data/lib/twitter_cldr/resources/unicode_property_aliases_importer.rb +23 -27
- data/lib/twitter_cldr/shared/casefolder.rb +139 -115
- data/lib/twitter_cldr/version.rb +1 -1
- data/lib/twitter_cldr/versions.rb +0 -4
- data/resources/collation/tailoring/bo.yml +4 -0
- data/resources/collation/tries/bo.dump +0 -0
- data/resources/locales/bo/calendars.yml +247 -0
- data/resources/locales/bo/currencies.yml +208 -0
- data/resources/locales/bo/fields.yml +31 -0
- data/resources/locales/bo/languages.yml +24 -0
- data/resources/locales/bo/layout.yml +5 -0
- data/resources/locales/bo/lists.yml +12 -0
- data/resources/locales/bo/numbers.yml +111 -0
- data/resources/locales/bo/plural_rules.yml +6 -0
- data/resources/locales/bo/plurals.yml +12 -0
- data/resources/locales/bo/territories.yml +14 -0
- data/resources/locales/bo/units.yml +283 -0
- data/resources/shared/transforms/Arab-Latn.yml +109 -0
- data/resources/shared/transforms/Beng-Deva.yml +13 -0
- data/resources/shared/transforms/Beng-Gujr.yml +13 -0
- data/resources/shared/transforms/Beng-Guru.yml +13 -0
- data/resources/shared/transforms/Beng-Knda.yml +13 -0
- data/resources/shared/transforms/Beng-Latn.yml +13 -0
- data/resources/shared/transforms/Beng-Mlym.yml +13 -0
- data/resources/shared/transforms/Beng-Orya.yml +13 -0
- data/resources/shared/transforms/Beng-Taml.yml +13 -0
- data/resources/shared/transforms/Beng-Telu.yml +13 -0
- data/resources/shared/transforms/Cyrl-Latn.yml +128 -0
- data/resources/shared/transforms/Deva-Beng.yml +13 -0
- data/resources/shared/transforms/Deva-Gujr.yml +13 -0
- data/resources/shared/transforms/Deva-Guru.yml +13 -0
- data/resources/shared/transforms/Deva-Knda.yml +13 -0
- data/resources/shared/transforms/Deva-Latn.yml +13 -0
- data/resources/shared/transforms/Deva-Mlym.yml +13 -0
- data/resources/shared/transforms/Deva-Orya.yml +13 -0
- data/resources/shared/transforms/Deva-Taml.yml +13 -0
- data/resources/shared/transforms/Deva-Telu.yml +13 -0
- data/resources/shared/transforms/Geor-Latn.yml +43 -0
- data/resources/shared/transforms/Grek-Latn-UNGEGN.yml +160 -0
- data/resources/shared/transforms/Grek-Latn.yml +206 -0
- data/resources/shared/transforms/Gujr-Beng.yml +13 -0
- data/resources/shared/transforms/Gujr-Deva.yml +13 -0
- data/resources/shared/transforms/Gujr-Guru.yml +13 -0
- data/resources/shared/transforms/Gujr-Knda.yml +13 -0
- data/resources/shared/transforms/Gujr-Latn.yml +13 -0
- data/resources/shared/transforms/Gujr-Mlym.yml +13 -0
- data/resources/shared/transforms/Gujr-Orya.yml +13 -0
- data/resources/shared/transforms/Gujr-Taml.yml +13 -0
- data/resources/shared/transforms/Gujr-Telu.yml +13 -0
- data/resources/shared/transforms/Guru-Beng.yml +13 -0
- data/resources/shared/transforms/Guru-Deva.yml +13 -0
- data/resources/shared/transforms/Guru-Gujr.yml +13 -0
- data/resources/shared/transforms/Guru-Knda.yml +13 -0
- data/resources/shared/transforms/Guru-Latn.yml +13 -0
- data/resources/shared/transforms/Guru-Mlym.yml +13 -0
- data/resources/shared/transforms/Guru-Orya.yml +13 -0
- data/resources/shared/transforms/Guru-Taml.yml +13 -0
- data/resources/shared/transforms/Guru-Telu.yml +13 -0
- data/resources/shared/transforms/Han-Spacedhan.yml +1 -1
- data/resources/shared/transforms/Hang-Latn.yml +12 -0
- data/resources/shared/transforms/Hani-Latn.yml +1605 -0
- data/resources/shared/transforms/Hans-Hant.yml +3982 -0
- data/resources/shared/transforms/Hebr-Latn.yml +72 -0
- data/resources/shared/transforms/Hira-Kana.yml +114 -0
- data/resources/shared/transforms/Hira-Latn.yml +15 -0
- data/resources/shared/transforms/InterIndic-Latin.yml +2 -2
- data/resources/shared/transforms/Jamo-Latn.yml +12 -0
- data/resources/shared/transforms/Knda-Beng.yml +13 -0
- data/resources/shared/transforms/Knda-Deva.yml +13 -0
- data/resources/shared/transforms/Knda-Gujr.yml +13 -0
- data/resources/shared/transforms/Knda-Guru.yml +13 -0
- data/resources/shared/transforms/Knda-Latn.yml +13 -0
- data/resources/shared/transforms/Knda-Mlym.yml +13 -0
- data/resources/shared/transforms/Knda-Orya.yml +13 -0
- data/resources/shared/transforms/Knda-Taml.yml +13 -0
- data/resources/shared/transforms/Knda-Telu.yml +13 -0
- data/resources/shared/transforms/Latin-ASCII.yml +16 -1
- data/resources/shared/transforms/Latin-InterIndic.yml +2 -2
- data/resources/shared/transforms/Latn-Armn.yml +90 -0
- data/resources/shared/transforms/Latn-Beng.yml +14 -0
- data/resources/shared/transforms/Latn-Bopo.yml +1336 -0
- data/resources/shared/transforms/Latn-Cans.yml +190 -0
- data/resources/shared/transforms/Latn-Deva.yml +14 -0
- data/resources/shared/transforms/Latn-Ethi.yml +278 -0
- data/resources/shared/transforms/Latn-Gujr.yml +14 -0
- data/resources/shared/transforms/Latn-Guru.yml +14 -0
- data/resources/shared/transforms/Latn-Hang.yml +13 -0
- data/resources/shared/transforms/Latn-Jamo.yml +13 -0
- data/resources/shared/transforms/Latn-Kana.yml +274 -0
- data/resources/shared/transforms/Latn-Knda.yml +14 -0
- data/resources/shared/transforms/Latn-Mlym.yml +14 -0
- data/resources/shared/transforms/Latn-Orya.yml +14 -0
- data/resources/shared/transforms/Latn-Taml.yml +14 -0
- data/resources/shared/transforms/Latn-Telu.yml +14 -0
- data/resources/shared/transforms/Latn-Thaa.yml +439 -0
- data/resources/shared/transforms/Latn-Thai.yml +13 -0
- data/resources/shared/transforms/Mlym-Beng.yml +13 -0
- data/resources/shared/transforms/Mlym-Deva.yml +13 -0
- data/resources/shared/transforms/Mlym-Gujr.yml +13 -0
- data/resources/shared/transforms/Mlym-Guru.yml +13 -0
- data/resources/shared/transforms/Mlym-Knda.yml +13 -0
- data/resources/shared/transforms/Mlym-Latn.yml +13 -0
- data/resources/shared/transforms/Mlym-Orya.yml +13 -0
- data/resources/shared/transforms/Mlym-Taml.yml +13 -0
- data/resources/shared/transforms/Mlym-Telu.yml +13 -0
- data/resources/shared/transforms/Orya-Beng.yml +13 -0
- data/resources/shared/transforms/Orya-Deva.yml +13 -0
- data/resources/shared/transforms/Orya-Gujr.yml +13 -0
- data/resources/shared/transforms/Orya-Guru.yml +13 -0
- data/resources/shared/transforms/Orya-Knda.yml +13 -0
- data/resources/shared/transforms/Orya-Latn.yml +13 -0
- data/resources/shared/transforms/Orya-Mlym.yml +13 -0
- data/resources/shared/transforms/Orya-Taml.yml +13 -0
- data/resources/shared/transforms/Orya-Telu.yml +13 -0
- data/resources/shared/transforms/Syrc-Latn.yml +55 -0
- data/resources/shared/transforms/Taml-Beng.yml +13 -0
- data/resources/shared/transforms/Taml-Deva.yml +13 -0
- data/resources/shared/transforms/Taml-Gujr.yml +13 -0
- data/resources/shared/transforms/Taml-Guru.yml +13 -0
- data/resources/shared/transforms/Taml-Knda.yml +13 -0
- data/resources/shared/transforms/Taml-Latn.yml +13 -0
- data/resources/shared/transforms/Taml-Mlym.yml +13 -0
- data/resources/shared/transforms/Taml-Orya.yml +13 -0
- data/resources/shared/transforms/Taml-Telu.yml +13 -0
- data/resources/shared/transforms/Telu-Beng.yml +13 -0
- data/resources/shared/transforms/Telu-Deva.yml +13 -0
- data/resources/shared/transforms/Telu-Gujr.yml +13 -0
- data/resources/shared/transforms/Telu-Guru.yml +13 -0
- data/resources/shared/transforms/Telu-Knda.yml +13 -0
- data/resources/shared/transforms/Telu-Latn.yml +13 -0
- data/resources/shared/transforms/Telu-Mlym.yml +13 -0
- data/resources/shared/transforms/Telu-Orya.yml +13 -0
- data/resources/shared/transforms/Telu-Taml.yml +13 -0
- data/resources/shared/transforms/Thai-Latn.yml +15 -0
- data/resources/shared/transforms/am-am_FONIPA.yml +609 -0
- data/resources/shared/transforms/am-am_Latn-BGN.yml +336 -0
- data/resources/shared/transforms/am-ar.yml +11 -0
- data/resources/shared/transforms/am-fa.yml +10 -0
- data/resources/shared/transforms/ar-ar_Latn-BGN.yml +122 -0
- data/resources/shared/transforms/az_Cyrl-az-BGN.yml +93 -0
- data/resources/shared/transforms/be-be_Latn-BGN.yml +108 -0
- data/resources/shared/transforms/bg-bg_Latn-BGN.yml +99 -0
- data/resources/shared/transforms/ch-am.yml +10 -0
- data/resources/shared/transforms/ch-ar.yml +10 -0
- data/resources/shared/transforms/ch-ch_FONIPA.yml +0 -8
- data/resources/shared/transforms/ch-fa.yml +10 -0
- data/resources/shared/transforms/cs-am.yml +10 -0
- data/resources/shared/transforms/cs-ar.yml +10 -0
- data/resources/shared/transforms/cs-fa.yml +10 -0
- data/resources/shared/transforms/dsb-dsb_FONIPA.yml +0 -5
- data/resources/shared/transforms/dv-dv_Latn-BGN.yml +112 -0
- data/resources/shared/transforms/el-el_Latn-BGN.yml +208 -0
- data/resources/shared/transforms/eo-am.yml +10 -0
- data/resources/shared/transforms/eo-ar.yml +10 -0
- data/resources/shared/transforms/eo-eo_FONIPA.yml +52 -0
- data/resources/shared/transforms/eo-fa.yml +10 -0
- data/resources/shared/transforms/es-ar.yml +13 -0
- data/resources/shared/transforms/es-fa.yml +13 -0
- data/resources/shared/transforms/es_419-am.yml +11 -0
- data/resources/shared/transforms/es_419-ar.yml +14 -0
- data/resources/shared/transforms/es_419-fa.yml +14 -0
- data/resources/shared/transforms/fa-fa_Latn-BGN.yml +123 -0
- data/resources/shared/transforms/he-he_Latn-BGN.yml +62 -0
- data/resources/shared/transforms/hy-am.yml +10 -0
- data/resources/shared/transforms/hy-ar.yml +10 -0
- data/resources/shared/transforms/hy-fa.yml +10 -0
- data/resources/shared/transforms/hy-hy_FONIPA.yml +56 -0
- data/resources/shared/transforms/hy-hy_Latn-BGN.yml +133 -0
- data/resources/shared/transforms/hy_AREVMDA-am.yml +10 -0
- data/resources/shared/transforms/hy_AREVMDA-ar.yml +10 -0
- data/resources/shared/transforms/hy_AREVMDA-fa.yml +10 -0
- data/resources/shared/transforms/hy_AREVMDA-hy_AREVMDA_FONIPA.yml +82 -0
- data/resources/shared/transforms/ia-am.yml +10 -0
- data/resources/shared/transforms/ia-ar.yml +10 -0
- data/resources/shared/transforms/ia-fa.yml +10 -0
- data/resources/shared/transforms/ia-ia_FONIPA.yml +69 -0
- data/resources/shared/transforms/ja_Hrkt-ja_Latn-BGN.yml +310 -0
- data/resources/shared/transforms/ka-ka_Latn-BGN.yml +44 -0
- data/resources/shared/transforms/kk-am.yml +10 -0
- data/resources/shared/transforms/kk-ar.yml +10 -0
- data/resources/shared/transforms/kk-fa.yml +10 -0
- data/resources/shared/transforms/kk-kk_FONIPA.yml +53 -0
- data/resources/shared/transforms/kk-kk_Latn-BGN.yml +136 -0
- data/resources/shared/transforms/ko-ko_Latn-BGN.yml +282 -0
- data/resources/shared/transforms/ky-am.yml +10 -0
- data/resources/shared/transforms/ky-ar.yml +10 -0
- data/resources/shared/transforms/ky-fa.yml +10 -0
- data/resources/shared/transforms/ky-ky_FONIPA.yml +73 -0
- data/resources/shared/transforms/ky-ky_Latn-BGN.yml +107 -0
- data/resources/shared/transforms/la-la_FONIPA.yml +0 -8
- data/resources/shared/transforms/mk-mk_Latn-BGN.yml +89 -0
- data/resources/shared/transforms/mn-mn_Latn-BGN.yml +101 -0
- data/resources/shared/transforms/mn-mn_Latn-MNS.yml +89 -0
- data/resources/shared/transforms/my-am.yml +10 -0
- data/resources/shared/transforms/my-ar.yml +10 -0
- data/resources/shared/transforms/my-fa.yml +10 -0
- data/resources/shared/transforms/my-my_FONIPA.yml +260 -0
- data/resources/shared/transforms/pl-am.yml +10 -0
- data/resources/shared/transforms/pl-ar.yml +10 -0
- data/resources/shared/transforms/pl-fa.yml +10 -0
- data/resources/shared/transforms/ps-ps_Latn-BGN.yml +151 -0
- data/resources/shared/transforms/rm_SURSILV-am.yml +10 -0
- data/resources/shared/transforms/rm_SURSILV-ar.yml +10 -0
- data/resources/shared/transforms/rm_SURSILV-fa.yml +10 -0
- data/resources/shared/transforms/rm_SURSILV-rm_FONIPA_SURSILV.yml +84 -0
- data/resources/shared/transforms/ro-am.yml +10 -0
- data/resources/shared/transforms/ro-ar.yml +10 -0
- data/resources/shared/transforms/ro-fa.yml +10 -0
- data/resources/shared/transforms/ro-ro_FONIPA.yml +38 -6
- data/resources/shared/transforms/ro_FONIPA-ja.yml +1 -0
- data/resources/shared/transforms/ru-ru_Latn-BGN.yml +121 -0
- data/resources/shared/transforms/ru_Latn-ru-BGN.yml +101 -0
- data/resources/shared/transforms/sat-am.yml +10 -0
- data/resources/shared/transforms/sat-ar.yml +10 -0
- data/resources/shared/transforms/sat-fa.yml +10 -0
- data/resources/shared/transforms/sat_Olck-sat_FONIPA.yml +132 -0
- data/resources/shared/transforms/si-am.yml +10 -0
- data/resources/shared/transforms/si-ar.yml +10 -0
- data/resources/shared/transforms/si-fa.yml +10 -0
- data/resources/shared/transforms/si-si_FONIPA.yml +128 -0
- data/resources/shared/transforms/si-si_Latn.yml +96 -0
- data/resources/shared/transforms/sk-am.yml +10 -0
- data/resources/shared/transforms/sk-ar.yml +10 -0
- data/resources/shared/transforms/sk-fa.yml +10 -0
- data/resources/shared/transforms/sk-sk_FONIPA.yml +18 -2
- data/resources/shared/transforms/sk_FONIPA-ja.yml +2 -0
- data/resources/shared/transforms/sr-sr_Latn-BGN.yml +81 -0
- data/resources/shared/transforms/tk_Cyrl-tk-BGN.yml +122 -0
- data/resources/shared/transforms/tlh-am.yml +10 -0
- data/resources/shared/transforms/tlh-ar.yml +10 -0
- data/resources/shared/transforms/tlh-fa.yml +10 -0
- data/resources/shared/transforms/tlh-tlh_FONIPA.yml +0 -8
- data/resources/shared/transforms/uk-uk_Latn-BGN.yml +115 -0
- data/resources/shared/transforms/und_FONIPA-ar.yml +96 -0
- data/resources/shared/transforms/und_FONIPA-fa.yml +88 -0
- data/resources/shared/transforms/und_FONIPA-und_FONXSAMP.yml +198 -0
- data/resources/shared/transforms/uz_Cyrl-uz-BGN.yml +117 -0
- data/resources/shared/transforms/xh-am.yml +10 -0
- data/resources/shared/transforms/xh-ar.yml +10 -0
- data/resources/shared/transforms/xh-fa.yml +10 -0
- data/resources/shared/transforms/xh-xh_FONIPA.yml +71 -0
- data/resources/shared/transforms/zu-am.yml +10 -0
- data/resources/shared/transforms/zu-ar.yml +10 -0
- data/resources/shared/transforms/zu-fa.yml +10 -0
- data/resources/shared/transforms/zu-zu_FONIPA.yml +58 -0
- data/spec/formatters/numbers/rbnf/rbnf_spec.rb +3 -1
- data/spec/resources/loader_spec.rb +12 -5
- data/spec/spec_helper.rb +1 -1
- metadata +242 -10
- data/History.txt +0 -282
- data/lib/twitter_cldr/resources/collation_tries_dumper.rb +0 -43
- data/lib/twitter_cldr/resources/custom_locales_resources_importer.rb +0 -80
- data/lib/twitter_cldr/resources/download.rb +0 -64
- data/lib/twitter_cldr/resources/icu_based_importer.rb +0 -18
- data/lib/twitter_cldr/resources/properties/properties_importer.rb +0 -59
- data/lib/twitter_cldr/resources/unicode_importer.rb +0 -37
@@ -7,24 +7,35 @@ require 'erb'
|
|
7
7
|
|
8
8
|
module TwitterCldr
|
9
9
|
module Resources
|
10
|
-
class CasefolderClassGenerator
|
10
|
+
class CasefolderClassGenerator < Importer
|
11
11
|
|
12
|
-
|
12
|
+
requirement :dependency, [UnicodeDataImporter]
|
13
|
+
output_path File.join(TwitterCldr::LIB_DIR, 'twitter_cldr', 'shared')
|
14
|
+
parameter :template_file, File.join(TwitterCldr::LIB_DIR, 'twitter_cldr', 'resources', 'casefolder.rb.erb')
|
15
|
+
ruby_engine :mri
|
13
16
|
|
14
|
-
|
15
|
-
@template_file = template_file
|
16
|
-
@output_dir = output_dir
|
17
|
-
end
|
17
|
+
private
|
18
18
|
|
19
|
-
def
|
20
|
-
|
21
|
-
File.open(File.join(output_dir, output_file), "w+") do |f|
|
19
|
+
def execute
|
20
|
+
File.open(output_path, 'w+') do |f|
|
22
21
|
f.write(
|
23
22
|
ERB.new(File.read(template_file)).result(binding)
|
24
23
|
)
|
25
24
|
end
|
26
25
|
end
|
27
26
|
|
27
|
+
def template_file
|
28
|
+
params.fetch(:template_file)
|
29
|
+
end
|
30
|
+
|
31
|
+
def output_path
|
32
|
+
File.join(params.fetch(:output_path), output_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
def output_file
|
36
|
+
File.basename(template_file).chomp('.erb')
|
37
|
+
end
|
38
|
+
|
28
39
|
def casefolding_char_class_for(status)
|
29
40
|
to_regex_char_sequence(casefolding_data_for(status))
|
30
41
|
end
|
@@ -48,8 +59,6 @@ module TwitterCldr
|
|
48
59
|
str << "\n#{" " * (indent - 1)}}"
|
49
60
|
end
|
50
61
|
|
51
|
-
private
|
52
|
-
|
53
62
|
def to_regex_char_sequence(casefold_data)
|
54
63
|
casefold_data.map { |(source, _)| to_utf8(source) }.join("|")
|
55
64
|
end
|
@@ -67,9 +76,9 @@ module TwitterCldr
|
|
67
76
|
end
|
68
77
|
|
69
78
|
def resource
|
70
|
-
|
79
|
+
@resource ||= TwitterCldr.get_resource('unicode_data', 'casefolding')
|
71
80
|
end
|
72
81
|
|
73
82
|
end
|
74
83
|
end
|
75
|
-
end
|
84
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Resources
|
8
|
+
class CollationTriesImporter < Importer
|
9
|
+
requirement :dependency, [TailoringImporter]
|
10
|
+
locales TwitterCldr.supported_locales
|
11
|
+
ruby_engine :jruby
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def execute
|
16
|
+
update_default_trie_dump
|
17
|
+
|
18
|
+
params.fetch(:locales).each do |locale|
|
19
|
+
update_tailoring_trie_dump(locale)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def update_default_trie_dump
|
26
|
+
save_trie_dump(TwitterCldr::Collation::TrieLoader::DEFAULT_TRIE_LOCALE, default_trie)
|
27
|
+
end
|
28
|
+
|
29
|
+
def update_tailoring_trie_dump(locale)
|
30
|
+
save_trie_dump(locale, TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, @default_trie))
|
31
|
+
end
|
32
|
+
|
33
|
+
def save_trie_dump(locale, trie)
|
34
|
+
path = TwitterCldr::Collation::TrieLoader.dump_path(locale)
|
35
|
+
FileUtils.mkdir_p(File.dirname(path))
|
36
|
+
File.write(path, Marshal.dump(trie))
|
37
|
+
end
|
38
|
+
|
39
|
+
def default_trie
|
40
|
+
@default_trie ||= TwitterCldr::Collation::TrieBuilder.load_default_trie
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -6,28 +6,19 @@
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Resources
|
8
8
|
|
9
|
-
class HyphenationImporter
|
9
|
+
class HyphenationImporter < Importer
|
10
|
+
GIT_SHA = '0d3b5e5314e68c3cf5d573b2e7bdc11143dcb821'
|
10
11
|
REPO_URL = 'git@github.com:LibreOffice/dictionaries.git'
|
11
12
|
ENCODING_MAP = {
|
12
13
|
'microsoft-cp1251' => Encoding::Windows_1251
|
13
14
|
}
|
14
15
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
# output_path - output directory for generated YAML files
|
19
|
-
# ref - git ref to use
|
20
|
-
#
|
21
|
-
def initialize(input_path, output_path, ref)
|
22
|
-
@input_path = input_path
|
23
|
-
@output_path = output_path
|
24
|
-
@ref = ref
|
25
|
-
end
|
16
|
+
requirement :git, REPO_URL, GIT_SHA
|
17
|
+
output_path 'shared/hyphenation'
|
18
|
+
ruby_engine :mri
|
26
19
|
|
27
|
-
def
|
28
|
-
FileUtils.mkdir_p(
|
29
|
-
FileUtils.mkdir_p(@output_path)
|
30
|
-
clone_or_fetch_if_necessary
|
20
|
+
def execute
|
21
|
+
FileUtils.mkdir_p(output_path)
|
31
22
|
|
32
23
|
each_dictionary do |path, locale|
|
33
24
|
import_dictionary(path, locale)
|
@@ -69,7 +60,7 @@ module TwitterCldr
|
|
69
60
|
options.delete(:encoding)
|
70
61
|
|
71
62
|
File.write(
|
72
|
-
File.join(
|
63
|
+
File.join(output_path, "#{locale}.yml"),
|
73
64
|
YAML.dump({ options: options, rules: rules })
|
74
65
|
)
|
75
66
|
end
|
@@ -78,37 +69,20 @@ module TwitterCldr
|
|
78
69
|
ENCODING_MAP.fetch(encoding.downcase, encoding)
|
79
70
|
end
|
80
71
|
|
81
|
-
def
|
82
|
-
|
83
|
-
locale = TwitterCldr::Shared::Locale.parse(File.basename(path)[5..-5])
|
84
|
-
yield path, locale.dasherized
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def dictionary_path
|
89
|
-
File.join(@input_path, 'dictionaries')
|
72
|
+
def source_path
|
73
|
+
requirements[:git].source_path
|
90
74
|
end
|
91
75
|
|
92
|
-
def
|
93
|
-
|
94
|
-
unless ref_exists?
|
95
|
-
in_repo { `git fetch` }
|
96
|
-
end
|
97
|
-
else
|
98
|
-
`git clone #{REPO_URL} #{@input_path}`
|
99
|
-
end
|
76
|
+
def output_path
|
77
|
+
params.fetch(:output_path)
|
100
78
|
end
|
101
79
|
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
|
80
|
+
def each_dictionary
|
81
|
+
Dir.glob(File.join(source_path, '**/hyph_*.dic')) do |path|
|
82
|
+
locale = TwitterCldr::Shared::Locale.parse(File.basename(path)[5..-5])
|
83
|
+
yield path, locale.dasherized
|
106
84
|
end
|
107
85
|
end
|
108
|
-
|
109
|
-
def in_repo(&block)
|
110
|
-
Dir.chdir(@input_path, &block)
|
111
|
-
end
|
112
86
|
end
|
113
87
|
|
114
88
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'tsort'
|
2
|
+
|
3
|
+
module TwitterCldr
|
4
|
+
module Resources
|
5
|
+
class ImportResolver
|
6
|
+
include TSort
|
7
|
+
|
8
|
+
attr_reader :importers
|
9
|
+
|
10
|
+
def initialize(importers = Resources.importer_classes_for_ruby_engine)
|
11
|
+
@importers = importers
|
12
|
+
end
|
13
|
+
|
14
|
+
def import
|
15
|
+
check_unmet_deps
|
16
|
+
import_in_order
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def import_in_order
|
22
|
+
tsort.each { |instance| instance.import }
|
23
|
+
end
|
24
|
+
|
25
|
+
def tsort_each_node(&block)
|
26
|
+
instances.each(&block)
|
27
|
+
end
|
28
|
+
|
29
|
+
def tsort_each_child(instance, &block)
|
30
|
+
deps_for(instance).map do |dep_class|
|
31
|
+
yield instances.find { |ins| ins.class == dep_class }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def check_unmet_deps
|
36
|
+
instances.each do |instance|
|
37
|
+
check_unmet_instance_deps(instance)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_unmet_instance_deps(instance)
|
42
|
+
unmet_deps = unmet_deps_for(instance)
|
43
|
+
|
44
|
+
unless unmet_deps.empty?
|
45
|
+
list = unmet_deps.map { |d| d.name }.join(', ')
|
46
|
+
raise "#{instance.class} dependencies are not met: #{list}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def instances
|
51
|
+
@instances ||= importers.map do |importer|
|
52
|
+
importer.is_a?(Class) ? importer.new : importer
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def unmet_deps_for(instance)
|
57
|
+
deps_for(instance).reject do |dep_class|
|
58
|
+
instances.any? { |ins| ins.class == dep_class }
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def deps_for(instance)
|
63
|
+
if dep_requirement = instance.requirements[:dependency]
|
64
|
+
dep_requirement.importer_classes
|
65
|
+
else
|
66
|
+
[]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Resources
|
8
|
+
|
9
|
+
class Importer
|
10
|
+
DEFAULT_ENGINE = :mri
|
11
|
+
|
12
|
+
class << self
|
13
|
+
def requirement(name, *args)
|
14
|
+
const_name = "#{name.to_s.capitalize}Requirement".to_sym
|
15
|
+
requirements[name] = Requirements.const_get(const_name).new(*args)
|
16
|
+
end
|
17
|
+
|
18
|
+
def ruby_engine(engine)
|
19
|
+
@ruby_engine = engine
|
20
|
+
end
|
21
|
+
|
22
|
+
def output_path(path)
|
23
|
+
@output_path = if path.start_with?('/')
|
24
|
+
path
|
25
|
+
else
|
26
|
+
File.join(TwitterCldr::RESOURCES_DIR, path)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def locales(locs)
|
31
|
+
@locales = locs
|
32
|
+
end
|
33
|
+
|
34
|
+
def parameter(key, value)
|
35
|
+
parameters[key] = value
|
36
|
+
end
|
37
|
+
|
38
|
+
def default_params
|
39
|
+
parameters.merge(
|
40
|
+
output_path: @output_path,
|
41
|
+
locales: @locales,
|
42
|
+
ruby_engine: @ruby_engine || DEFAULT_ENGINE
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
def requirements
|
47
|
+
@requirements ||= {}
|
48
|
+
end
|
49
|
+
|
50
|
+
def parameters
|
51
|
+
@parameters ||= {}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
attr_reader :params, :requirements
|
56
|
+
|
57
|
+
def initialize(options = {})
|
58
|
+
@params = self.class.default_params.merge(options)
|
59
|
+
@requirements = self.class.requirements
|
60
|
+
end
|
61
|
+
|
62
|
+
def can_import?
|
63
|
+
importability_errors.empty?
|
64
|
+
end
|
65
|
+
|
66
|
+
def import
|
67
|
+
if can_import?
|
68
|
+
prepare
|
69
|
+
execute
|
70
|
+
else
|
71
|
+
raise "Can't import #{self.class.name}: #{importability_errors.first}"
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def prepare
|
76
|
+
before_prepare
|
77
|
+
requirements.each { |_, req| req.prepare }
|
78
|
+
after_prepare
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def importability_errors
|
84
|
+
@importability_errors ||= [].tap do |errors|
|
85
|
+
errors << 'incompatible RUBY_ENGINE' unless compatible_engine?
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def compatible_engine?
|
90
|
+
case params.fetch(:ruby_engine)
|
91
|
+
when :mri
|
92
|
+
RUBY_ENGINE == 'ruby'
|
93
|
+
when :jruby
|
94
|
+
RUBY_ENGINE == 'jruby'
|
95
|
+
else
|
96
|
+
false
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def before_prepare
|
101
|
+
end
|
102
|
+
|
103
|
+
def after_prepare
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -3,12 +3,12 @@
|
|
3
3
|
# Copyright 2012 Twitter, Inc
|
4
4
|
# http://www.apache.org/licenses/LICENSE-2.0
|
5
5
|
|
6
|
-
require '
|
6
|
+
require 'open-uri'
|
7
7
|
|
8
8
|
module TwitterCldr
|
9
9
|
module Resources
|
10
10
|
|
11
|
-
class LanguageCodesImporter
|
11
|
+
class LanguageCodesImporter < Importer
|
12
12
|
|
13
13
|
BCP_47_FILE, ISO_639_FILE = %w[bcp-47.txt iso-639.txt]
|
14
14
|
|
@@ -26,42 +26,39 @@ module TwitterCldr
|
|
26
26
|
bcp_47_alt: :bcp_47
|
27
27
|
}
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
29
|
+
output_path 'shared'
|
30
|
+
ruby_engine :mri
|
31
|
+
|
32
|
+
private
|
33
33
|
|
34
|
-
def
|
34
|
+
def execute
|
35
35
|
prepare_data
|
36
|
-
import_data
|
36
|
+
import_data
|
37
37
|
end
|
38
38
|
|
39
|
-
private
|
40
|
-
|
41
39
|
def prepare_data
|
42
40
|
INPUT_DATA.each do |file, url|
|
43
|
-
|
41
|
+
source_path = source_path_for(file)
|
42
|
+
open(source_path, 'wb') { |file| file << open(url).read }
|
44
43
|
end
|
45
44
|
end
|
46
45
|
|
47
|
-
def
|
46
|
+
def source_path_for(file)
|
47
|
+
File.join(TwitterCldr::VENDOR_DIR, file)
|
48
|
+
end
|
49
|
+
|
50
|
+
def import_data
|
48
51
|
result = import_iso_639
|
49
52
|
result = import_bcp_47(result)
|
50
53
|
|
51
54
|
language_codes = Hash[result.inject({}) { |memo, (key, value)| memo[key] = Hash[value.sort]; memo }.sort]
|
52
|
-
|
53
55
|
language_codes_table = build_table(language_codes)
|
54
56
|
|
55
|
-
write('language_codes_table.dump',
|
56
|
-
|
57
|
-
if import_yaml
|
58
|
-
write('language_codes.yml', 'w:utf-8', YAML.dump(language_codes))
|
59
|
-
write('language_codes_table.yml', 'w:utf-8', YAML.dump(language_codes_table))
|
60
|
-
end
|
57
|
+
write('language_codes_table.dump', Marshal.dump(language_codes_table))
|
61
58
|
end
|
62
59
|
|
63
|
-
def write(file,
|
64
|
-
File.
|
60
|
+
def write(file, data)
|
61
|
+
File.write(File.join(params.fetch(:output_path), file), data)
|
65
62
|
end
|
66
63
|
|
67
64
|
# Generates codes in the following format:
|
@@ -76,8 +73,8 @@ module TwitterCldr
|
|
76
73
|
# }
|
77
74
|
#
|
78
75
|
def import_iso_639(result = {})
|
79
|
-
File.open(
|
80
|
-
lines = file.
|
76
|
+
File.open(source_path_for(ISO_639_FILE)) do |file|
|
77
|
+
lines = file.each_line
|
81
78
|
lines.next # skip header
|
82
79
|
|
83
80
|
lines.each do |line|
|
@@ -120,8 +117,8 @@ module TwitterCldr
|
|
120
117
|
# }
|
121
118
|
# }
|
122
119
|
def import_bcp_47(result = {})
|
123
|
-
File.open(
|
124
|
-
lines = file.
|
120
|
+
File.open(source_path_for(BCP_47_FILE)) do |file|
|
121
|
+
lines = file.each_line
|
125
122
|
lines.next # skip header
|
126
123
|
|
127
124
|
data = {}
|
@@ -212,21 +209,21 @@ module TwitterCldr
|
|
212
209
|
end
|
213
210
|
|
214
211
|
ISO_639_COLUMNS = [
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
212
|
+
:code, # Code
|
213
|
+
:status, # Status
|
214
|
+
:partner_agency, # Partner Agency
|
215
|
+
:iso_639_3, # 639_3
|
216
|
+
:iso_639_2, # 639_2 (alpha-3 bibliographic/terminology code)
|
217
|
+
:b_code, # alpha-3 bibliographic code if iso_639_2 contains terminology code
|
218
|
+
:bt_equiv, # bt_equiv (alpha-3 bibliographic/terminology equivalent)
|
219
|
+
:iso_639_1, # 639_1
|
220
|
+
:name, # Reference_Name
|
221
|
+
:scope, # Element_Scope
|
222
|
+
:type, # Language_Type
|
223
|
+
:docs # Documentation
|
227
224
|
]
|
228
225
|
|
229
226
|
end
|
230
227
|
|
231
228
|
end
|
232
|
-
end
|
229
|
+
end
|