twitter_cldr 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/NOTICE +95 -1
- data/README.md +4 -4
- data/Rakefile +18 -28
- data/lib/ext/calendars/date.rb +3 -0
- data/lib/ext/calendars/datetime.rb +3 -0
- data/lib/ext/calendars/time.rb +3 -0
- data/lib/ext/localized_object.rb +3 -0
- data/lib/ext/numbers/bignum.rb +3 -0
- data/lib/ext/numbers/fixnum.rb +3 -0
- data/lib/ext/numbers/float.rb +3 -0
- data/lib/ext/numbers/localized_number.rb +3 -0
- data/lib/ext/strings/string.rb +31 -0
- data/lib/ext/strings/symbol.rb +3 -0
- data/lib/formatters/base.rb +3 -0
- data/lib/formatters/calendars/date_formatter.rb +3 -0
- data/lib/formatters/calendars/datetime_formatter.rb +3 -0
- data/lib/formatters/calendars/time_formatter.rb +3 -0
- data/lib/formatters/numbers/currency_formatter.rb +3 -0
- data/lib/formatters/numbers/decimal_formatter.rb +3 -0
- data/lib/formatters/numbers/helpers/base.rb +3 -0
- data/lib/formatters/numbers/helpers/fraction.rb +3 -0
- data/lib/formatters/numbers/helpers/integer.rb +3 -0
- data/lib/formatters/numbers/number_formatter.rb +3 -0
- data/lib/formatters/numbers/percent_formatter.rb +3 -0
- data/lib/formatters/plurals/plural_formatter.rb +141 -0
- data/lib/formatters/plurals/rules.rb +4 -1
- data/lib/normalizers/base.rb +17 -0
- data/lib/normalizers/canonical/nfd.rb +81 -0
- data/lib/shared/currencies.rb +4 -1
- data/lib/shared/languages.rb +4 -1
- data/lib/shared/resources.rb +8 -28
- data/lib/shared/timezones.rb +3 -0
- data/lib/shared/unicode_data.rb +44 -0
- data/lib/tokenizers/base.rb +3 -0
- data/lib/tokenizers/calendars/date_tokenizer.rb +3 -0
- data/lib/tokenizers/calendars/datetime_tokenizer.rb +4 -1
- data/lib/tokenizers/calendars/time_tokenizer.rb +3 -0
- data/lib/tokenizers/key_path.rb +3 -0
- data/lib/tokenizers/numbers/number_tokenizer.rb +4 -1
- data/lib/tokenizers/token.rb +3 -0
- data/lib/twitter_cldr.rb +52 -29
- data/lib/utils/interpolation.rb +105 -0
- data/lib/utils.rb +28 -0
- data/lib/version.rb +6 -1
- data/resources/unicode_data/aegean_numbers.yml +913 -0
- data/resources/unicode_data/alchemical_symbols.yml +1857 -0
- data/resources/unicode_data/alphabetic_presentation_forms.yml +929 -0
- data/resources/unicode_data/ancient_greek_musical_notation.yml +1121 -0
- data/resources/unicode_data/ancient_greek_numbers.yml +1201 -0
- data/resources/unicode_data/ancient_symbols.yml +193 -0
- data/resources/unicode_data/arabic.yml +4049 -0
- data/resources/unicode_data/arabic_extended_a.yml +625 -0
- data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +2289 -0
- data/resources/unicode_data/arabic_presentation_forms_a.yml +9777 -0
- data/resources/unicode_data/arabic_presentation_forms_b.yml +2257 -0
- data/resources/unicode_data/arabic_supplement.yml +769 -0
- data/resources/unicode_data/armenian.yml +1393 -0
- data/resources/unicode_data/arrows.yml +1793 -0
- data/resources/unicode_data/avestan.yml +977 -0
- data/resources/unicode_data/balinese.yml +1937 -0
- data/resources/unicode_data/bamum.yml +1409 -0
- data/resources/unicode_data/bamum_supplement.yml +9105 -0
- data/resources/unicode_data/basic_latin.yml +2049 -0
- data/resources/unicode_data/batak.yml +897 -0
- data/resources/unicode_data/bengali.yml +1473 -0
- data/resources/unicode_data/block_elements.yml +513 -0
- data/resources/unicode_data/blocks.yml +881 -0
- data/resources/unicode_data/bopomofo.yml +657 -0
- data/resources/unicode_data/bopomofo_extended.yml +433 -0
- data/resources/unicode_data/box_drawing.yml +2049 -0
- data/resources/unicode_data/brahmi.yml +1729 -0
- data/resources/unicode_data/braille_patterns.yml +4097 -0
- data/resources/unicode_data/buginese.yml +481 -0
- data/resources/unicode_data/buhid.yml +321 -0
- data/resources/unicode_data/byzantine_musical_symbols.yml +3937 -0
- data/resources/unicode_data/carian.yml +785 -0
- data/resources/unicode_data/chakma.yml +1073 -0
- data/resources/unicode_data/cham.yml +1329 -0
- data/resources/unicode_data/cherokee.yml +1361 -0
- data/resources/unicode_data/cjk_compatibility.yml +4097 -0
- data/resources/unicode_data/cjk_compatibility_forms.yml +513 -0
- data/resources/unicode_data/cjk_compatibility_ideographs.yml +7553 -0
- data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +8673 -0
- data/resources/unicode_data/cjk_radicals_supplement.yml +1841 -0
- data/resources/unicode_data/cjk_strokes.yml +577 -0
- data/resources/unicode_data/cjk_symbols_and_punctuation.yml +1025 -0
- data/resources/unicode_data/cjk_unified_ideographs.yml +33 -0
- data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +33 -0
- data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +33 -0
- data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +33 -0
- data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +33 -0
- data/resources/unicode_data/combining_diacritical_marks.yml +1793 -0
- data/resources/unicode_data/combining_diacritical_marks_for_symbols.yml +529 -0
- data/resources/unicode_data/combining_diacritical_marks_supplement.yml +689 -0
- data/resources/unicode_data/combining_half_marks.yml +113 -0
- data/resources/unicode_data/common_indic_number_forms.yml +161 -0
- data/resources/unicode_data/control_pictures.yml +625 -0
- data/resources/unicode_data/coptic.yml +1969 -0
- data/resources/unicode_data/counting_rod_numerals.yml +289 -0
- data/resources/unicode_data/cuneiform.yml +14065 -0
- data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +1649 -0
- data/resources/unicode_data/currency_symbols.yml +417 -0
- data/resources/unicode_data/cypriot_syllabary.yml +881 -0
- data/resources/unicode_data/cyrillic.yml +4097 -0
- data/resources/unicode_data/cyrillic_extended_a.yml +513 -0
- data/resources/unicode_data/cyrillic_extended_b.yml +1425 -0
- data/resources/unicode_data/cyrillic_supplement.yml +641 -0
- data/resources/unicode_data/deseret.yml +1281 -0
- data/resources/unicode_data/devanagari.yml +2033 -0
- data/resources/unicode_data/devanagari_extended.yml +449 -0
- data/resources/unicode_data/dingbats.yml +3057 -0
- data/resources/unicode_data/domino_tiles.yml +1601 -0
- data/resources/unicode_data/egyptian_hieroglyphs.yml +17137 -0
- data/resources/unicode_data/emoticons.yml +1217 -0
- data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +2737 -0
- data/resources/unicode_data/enclosed_alphanumerics.yml +2561 -0
- data/resources/unicode_data/enclosed_cjk_letters_and_months.yml +4065 -0
- data/resources/unicode_data/enclosed_ideographic_supplement.yml +913 -0
- data/resources/unicode_data/ethiopic.yml +5729 -0
- data/resources/unicode_data/ethiopic_extended.yml +1265 -0
- data/resources/unicode_data/ethiopic_extended_a.yml +513 -0
- data/resources/unicode_data/ethiopic_supplement.yml +417 -0
- data/resources/unicode_data/general_punctuation.yml +1713 -0
- data/resources/unicode_data/geometric_shapes.yml +1537 -0
- data/resources/unicode_data/georgian.yml +1409 -0
- data/resources/unicode_data/georgian_supplement.yml +641 -0
- data/resources/unicode_data/glagolitic.yml +1505 -0
- data/resources/unicode_data/gothic.yml +433 -0
- data/resources/unicode_data/greek_and_coptic.yml +2145 -0
- data/resources/unicode_data/greek_extended.yml +3729 -0
- data/resources/unicode_data/gujarati.yml +1345 -0
- data/resources/unicode_data/gurmukhi.yml +1265 -0
- data/resources/unicode_data/halfwidth_and_fullwidth_forms.yml +3601 -0
- data/resources/unicode_data/hangul_compatibility_jamo.yml +1505 -0
- data/resources/unicode_data/hangul_jamo.yml +4097 -0
- data/resources/unicode_data/hangul_jamo_extended_a.yml +465 -0
- data/resources/unicode_data/hangul_jamo_extended_b.yml +1153 -0
- data/resources/unicode_data/hangul_syllables.yml +33 -0
- data/resources/unicode_data/hanunoo.yml +369 -0
- data/resources/unicode_data/hebrew.yml +1393 -0
- data/resources/unicode_data/high_private_use_surrogates.yml +33 -0
- data/resources/unicode_data/high_surrogates.yml +33 -0
- data/resources/unicode_data/hiragana.yml +1489 -0
- data/resources/unicode_data/ideographic_description_characters.yml +193 -0
- data/resources/unicode_data/imperial_aramaic.yml +497 -0
- data/resources/unicode_data/inscriptional_pahlavi.yml +433 -0
- data/resources/unicode_data/inscriptional_parthian.yml +481 -0
- data/resources/unicode_data/ipa_extensions.yml +1537 -0
- data/resources/unicode_data/javanese.yml +1457 -0
- data/resources/unicode_data/kaithi.yml +1057 -0
- data/resources/unicode_data/kana_supplement.yml +33 -0
- data/resources/unicode_data/kanbun.yml +257 -0
- data/resources/unicode_data/kangxi_radicals.yml +3425 -0
- data/resources/unicode_data/kannada.yml +1377 -0
- data/resources/unicode_data/katakana.yml +1537 -0
- data/resources/unicode_data/katakana_phonetic_extensions.yml +257 -0
- data/resources/unicode_data/kayah_li.yml +769 -0
- data/resources/unicode_data/kharoshthi.yml +1041 -0
- data/resources/unicode_data/khmer.yml +1825 -0
- data/resources/unicode_data/khmer_symbols.yml +513 -0
- data/resources/unicode_data/lao.yml +1073 -0
- data/resources/unicode_data/latin_1_supplement.yml +2049 -0
- data/resources/unicode_data/latin_extended_a.yml +2049 -0
- data/resources/unicode_data/latin_extended_additional.yml +4097 -0
- data/resources/unicode_data/latin_extended_b.yml +3329 -0
- data/resources/unicode_data/latin_extended_c.yml +513 -0
- data/resources/unicode_data/latin_extended_d.yml +2145 -0
- data/resources/unicode_data/lepcha.yml +1185 -0
- data/resources/unicode_data/letterlike_symbols.yml +1281 -0
- data/resources/unicode_data/limbu.yml +1057 -0
- data/resources/unicode_data/linear_b_ideograms.yml +1969 -0
- data/resources/unicode_data/linear_b_syllabary.yml +1409 -0
- data/resources/unicode_data/lisu.yml +769 -0
- data/resources/unicode_data/low_surrogates.yml +33 -0
- data/resources/unicode_data/lycian.yml +465 -0
- data/resources/unicode_data/lydian.yml +433 -0
- data/resources/unicode_data/mahjong_tiles.yml +705 -0
- data/resources/unicode_data/malayalam.yml +1569 -0
- data/resources/unicode_data/mandaic.yml +465 -0
- data/resources/unicode_data/mathematical_alphanumeric_symbols.yml +15937 -0
- data/resources/unicode_data/mathematical_operators.yml +4097 -0
- data/resources/unicode_data/meetei_mayek.yml +897 -0
- data/resources/unicode_data/meetei_mayek_extensions.yml +369 -0
- data/resources/unicode_data/meroitic_cursive.yml +417 -0
- data/resources/unicode_data/meroitic_hieroglyphs.yml +513 -0
- data/resources/unicode_data/miao.yml +2129 -0
- data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +769 -0
- data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +2049 -0
- data/resources/unicode_data/miscellaneous_symbols.yml +4097 -0
- data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +1393 -0
- data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +8529 -0
- data/resources/unicode_data/miscellaneous_technical.yml +3905 -0
- data/resources/unicode_data/modifier_tone_letters.yml +513 -0
- data/resources/unicode_data/mongolian.yml +2497 -0
- data/resources/unicode_data/musical_symbols.yml +3521 -0
- data/resources/unicode_data/myanmar.yml +2561 -0
- data/resources/unicode_data/myanmar_extended_a.yml +449 -0
- data/resources/unicode_data/new_tai_lue.yml +1329 -0
- data/resources/unicode_data/nko.yml +945 -0
- data/resources/unicode_data/number_forms.yml +929 -0
- data/resources/unicode_data/ogham.yml +465 -0
- data/resources/unicode_data/ol_chiki.yml +769 -0
- data/resources/unicode_data/old_italic.yml +561 -0
- data/resources/unicode_data/old_persian.yml +801 -0
- data/resources/unicode_data/old_south_arabian.yml +513 -0
- data/resources/unicode_data/old_turkic.yml +1169 -0
- data/resources/unicode_data/optical_character_recognition.yml +177 -0
- data/resources/unicode_data/oriya.yml +1441 -0
- data/resources/unicode_data/osmanya.yml +641 -0
- data/resources/unicode_data/phags_pa.yml +897 -0
- data/resources/unicode_data/phaistos_disc.yml +737 -0
- data/resources/unicode_data/phoenician.yml +465 -0
- data/resources/unicode_data/phonetic_extensions.yml +2049 -0
- data/resources/unicode_data/phonetic_extensions_supplement.yml +1025 -0
- data/resources/unicode_data/playing_cards.yml +945 -0
- data/resources/unicode_data/private_use_area.yml +33 -0
- data/resources/unicode_data/rejang.yml +593 -0
- data/resources/unicode_data/rumi_numeral_symbols.yml +497 -0
- data/resources/unicode_data/runic.yml +1297 -0
- data/resources/unicode_data/samaritan.yml +977 -0
- data/resources/unicode_data/saurashtra.yml +1297 -0
- data/resources/unicode_data/sharada.yml +1329 -0
- data/resources/unicode_data/shavian.yml +769 -0
- data/resources/unicode_data/sinhala.yml +1281 -0
- data/resources/unicode_data/small_form_variants.yml +417 -0
- data/resources/unicode_data/sora_sompeng.yml +561 -0
- data/resources/unicode_data/spacing_modifier_letters.yml +1281 -0
- data/resources/unicode_data/specials.yml +81 -0
- data/resources/unicode_data/sundanese.yml +1025 -0
- data/resources/unicode_data/sundanese_supplement.yml +129 -0
- data/resources/unicode_data/superscripts_and_subscripts.yml +673 -0
- data/resources/unicode_data/supplemental_arrows_a.yml +257 -0
- data/resources/unicode_data/supplemental_arrows_b.yml +2049 -0
- data/resources/unicode_data/supplemental_mathematical_operators.yml +4097 -0
- data/resources/unicode_data/supplemental_punctuation.yml +961 -0
- data/resources/unicode_data/supplementary_private_use_area_a.yml +33 -0
- data/resources/unicode_data/supplementary_private_use_area_b.yml +33 -0
- data/resources/unicode_data/syloti_nagri.yml +705 -0
- data/resources/unicode_data/syriac.yml +1233 -0
- data/resources/unicode_data/tagalog.yml +321 -0
- data/resources/unicode_data/tagbanwa.yml +289 -0
- data/resources/unicode_data/tags.yml +1553 -0
- data/resources/unicode_data/tai_le.yml +561 -0
- data/resources/unicode_data/tai_tham.yml +2033 -0
- data/resources/unicode_data/tai_viet.yml +1153 -0
- data/resources/unicode_data/tai_xuan_jing_symbols.yml +1393 -0
- data/resources/unicode_data/takri.yml +1057 -0
- data/resources/unicode_data/tamil.yml +1153 -0
- data/resources/unicode_data/telugu.yml +1489 -0
- data/resources/unicode_data/thaana.yml +801 -0
- data/resources/unicode_data/thai.yml +1393 -0
- data/resources/unicode_data/tibetan.yml +3377 -0
- data/resources/unicode_data/tifinagh.yml +945 -0
- data/resources/unicode_data/transport_and_map_symbols.yml +1121 -0
- data/resources/unicode_data/ugaritic.yml +497 -0
- data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +10241 -0
- data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
- data/resources/unicode_data/vai.yml +4801 -0
- data/resources/unicode_data/variation_selectors.yml +257 -0
- data/resources/unicode_data/variation_selectors_supplement.yml +3841 -0
- data/resources/unicode_data/vedic_extensions.yml +625 -0
- data/resources/unicode_data/vertical_forms.yml +161 -0
- data/resources/unicode_data/yi_radicals.yml +881 -0
- data/resources/unicode_data/yi_syllables.yml +18641 -0
- data/resources/unicode_data/yijing_hexagram_symbols.yml +1025 -0
- data/spec/ext/calendars/date_spec.rb +5 -1
- data/spec/ext/calendars/datetime_spec.rb +5 -1
- data/spec/ext/calendars/time_spec.rb +5 -1
- data/spec/ext/numbers/bignum_spec.rb +5 -1
- data/spec/ext/numbers/fixnum_spec.rb +5 -1
- data/spec/ext/numbers/float_spec.rb +5 -1
- data/spec/ext/numbers/localized_number_spec.rb +5 -1
- data/spec/ext/strings/string_spec.rb +102 -0
- data/spec/ext/strings/symbol_spec.rb +5 -1
- data/spec/formatters/base_spec.rb +5 -1
- data/spec/formatters/calendars/datetime_formatter_spec.rb +5 -1
- data/spec/formatters/numbers/currency_formatter_spec.rb +5 -1
- data/spec/formatters/numbers/decimal_formatter_spec.rb +5 -1
- data/spec/formatters/numbers/helpers/fraction_spec.rb +5 -1
- data/spec/formatters/numbers/helpers/integer_spec.rb +5 -1
- data/spec/formatters/numbers/number_formatter_spec.rb +6 -2
- data/spec/formatters/numbers/percent_formatter_spec.rb +5 -1
- data/spec/formatters/plurals/plural_formatter_spec.rb +205 -0
- data/spec/formatters/plurals/rules_spec.rb +28 -28
- data/spec/normalizers/NormalizationTest.txt +602 -0
- data/spec/normalizers/base_spec.rb +16 -0
- data/spec/normalizers/canonical/nfd_spec.rb +50 -0
- data/spec/shared/currencies_spec.rb +5 -1
- data/spec/shared/languages_spec.rb +5 -1
- data/spec/shared/resources_spec.rb +5 -18
- data/spec/shared/unicode_data_spec.rb +51 -0
- data/spec/spec_helper.rb +6 -3
- data/spec/tokenizers/base_spec.rb +3 -0
- data/spec/tokenizers/calendars/date_tokenizer_spec.rb +5 -1
- data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +5 -1
- data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -1
- data/spec/tokenizers/key_path_spec.rb +3 -0
- data/spec/tokenizers/numbers/number_tokenizer_spec.rb +5 -1
- data/spec/tokenizers/token_spec.rb +5 -1
- data/spec/twitter_cldr_spec.rb +23 -1
- data/spec/utils/interpolation_spec.rb +124 -0
- data/spec/utils_spec.rb +32 -0
- metadata +285 -21
@@ -0,0 +1,81 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
module TwitterCldr
|
4
|
+
module Normalizers
|
5
|
+
class NFD < Base
|
6
|
+
@@hangul_constants = {:SBase => "AC00".hex, :LBase => "1100".hex, :VBase => "1161".hex, :TBase => "11A7".hex,
|
7
|
+
:Scount => 11172, :LCount => 19, :VCount => 21, :TCount => 28, :NCount => 588, :Scount => 1172}
|
8
|
+
class << self
|
9
|
+
def normalize(string)
|
10
|
+
#Convert string to code points
|
11
|
+
code_points = string.split('').map { |char| char_to_code_point(char) }
|
12
|
+
|
13
|
+
#Normalize code points
|
14
|
+
normalized_code_points = normalize_code_points(code_points)
|
15
|
+
|
16
|
+
#Convert normalized code points back to string
|
17
|
+
normalized_code_points.map { |code_point| code_point_to_char(code_point) }.join
|
18
|
+
end
|
19
|
+
|
20
|
+
def normalize_code_points(code_points)
|
21
|
+
code_points = code_points.map { |code_point| decompose code_point }.flatten
|
22
|
+
reorder code_points
|
23
|
+
code_points
|
24
|
+
end
|
25
|
+
|
26
|
+
#Recursively replace the given code point with the values in its Decomposition_Mapping property
|
27
|
+
def decompose(code_point)
|
28
|
+
unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point)
|
29
|
+
return code_point unless unicode_data
|
30
|
+
decomposition_mapping = unicode_data.decomposition.split
|
31
|
+
|
32
|
+
# Special decomposition for Hangul syllables.
|
33
|
+
# Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
34
|
+
if unicode_data.name.include? 'Hangul'
|
35
|
+
sIndex = code_point.hex - @@hangul_constants[:SBase]
|
36
|
+
|
37
|
+
lIndex = sIndex / @@hangul_constants[:NCount]
|
38
|
+
vIndex = (sIndex % @@hangul_constants[:NCount]) / @@hangul_constants[:TCount]
|
39
|
+
tIndex = sIndex % @@hangul_constants[:TCount]
|
40
|
+
|
41
|
+
lPart = (@@hangul_constants[:LBase] + lIndex).to_s(16).upcase
|
42
|
+
vPart = (@@hangul_constants[:VBase] + vIndex).to_s(16).upcase
|
43
|
+
tPart = (@@hangul_constants[:TBase] + tIndex).to_s(16).upcase if tIndex > 0
|
44
|
+
|
45
|
+
[lPart, vPart, tPart].compact
|
46
|
+
|
47
|
+
#Return the code point if compatibility mapping or if no mapping exists
|
48
|
+
elsif decomposition_mapping.first =~ /<.*>/ || decomposition_mapping.empty?
|
49
|
+
code_point
|
50
|
+
else
|
51
|
+
decomposition_mapping.map do |decomposition_code_point|
|
52
|
+
decompose(decomposition_code_point)
|
53
|
+
end.flatten
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
#Swap any two adjacent code points A & B if ccc(A) > ccc(B) > 0
|
58
|
+
def reorder(code_points)
|
59
|
+
(code_points.size).times do
|
60
|
+
code_points.each_with_index do |cp, i|
|
61
|
+
unless i == (code_points.size - 1)
|
62
|
+
ccc_a, ccc_b = combining_class_for(cp), combining_class_for(code_points[i+1])
|
63
|
+
if (ccc_a > ccc_b) && (ccc_b > 0)
|
64
|
+
code_points[i], code_points[i+1] = code_points[i+1], code_points[i]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def combining_class_for(code_point)
|
72
|
+
begin
|
73
|
+
unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point).combining_class.to_i
|
74
|
+
rescue NoMethodError
|
75
|
+
0
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/shared/currencies.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
module TwitterCldr
|
4
7
|
module Shared
|
5
8
|
class Currencies
|
6
|
-
@@resource = TwitterCldr.
|
9
|
+
@@resource = TwitterCldr.get_resource("shared", "currencies")[:shared][:currencies]
|
7
10
|
|
8
11
|
class << self
|
9
12
|
def countries
|
data/lib/shared/languages.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
module TwitterCldr
|
4
7
|
module Shared
|
5
8
|
class Languages
|
@@ -42,7 +45,7 @@ module TwitterCldr
|
|
42
45
|
|
43
46
|
def get_resource(locale)
|
44
47
|
locale = TwitterCldr.convert_locale(locale)
|
45
|
-
TwitterCldr.
|
48
|
+
TwitterCldr.get_resource(locale, "languages")[locale]
|
46
49
|
end
|
47
50
|
end
|
48
51
|
end
|
data/lib/shared/resources.rb
CHANGED
@@ -1,47 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
module TwitterCldr
|
4
7
|
module Shared
|
5
8
|
class Resources
|
6
9
|
def initialize
|
7
|
-
@resources_by_locale =
|
10
|
+
@resources_by_locale = Hash.new do |hash, locale|
|
11
|
+
hash[locale] = Hash.new { |h, resource| h[resource] = data_for(locale, resource) }
|
12
|
+
end
|
8
13
|
end
|
9
14
|
|
10
15
|
def resource_for(locale, resource)
|
11
|
-
locale
|
12
|
-
unless @resources_by_locale.include?(locale)
|
13
|
-
@resources_by_locale[locale] = {}
|
14
|
-
end
|
15
|
-
|
16
|
-
unless @resources_by_locale[locale].include?(resource)
|
17
|
-
@resources_by_locale[locale][resource] = data_for(locale, resource)
|
18
|
-
end
|
19
|
-
|
20
|
-
@resources_by_locale[locale][resource]
|
16
|
+
@resources_by_locale[locale.to_sym][resource]
|
21
17
|
end
|
22
18
|
|
23
19
|
protected
|
24
20
|
|
25
21
|
def data_for(locale, resource)
|
26
|
-
deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
|
22
|
+
TwitterCldr::Utils.deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
|
27
23
|
end
|
28
24
|
|
29
|
-
# adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
|
30
|
-
def deep_symbolize_keys(arg)
|
31
|
-
case arg
|
32
|
-
when Array then
|
33
|
-
arg.map { |elem| deep_symbolize_keys(elem) }
|
34
|
-
when Hash then
|
35
|
-
Hash[
|
36
|
-
arg.map do |key, value|
|
37
|
-
k = key.is_a?(String) ? key.to_sym : key
|
38
|
-
v = deep_symbolize_keys(value)
|
39
|
-
[k, v]
|
40
|
-
end]
|
41
|
-
else
|
42
|
-
arg
|
43
|
-
end
|
44
|
-
end
|
45
25
|
end
|
46
26
|
end
|
47
27
|
end
|
data/lib/shared/timezones.rb
CHANGED
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Shared
|
8
|
+
class UnicodeData
|
9
|
+
Attributes = Struct.new(:code_point, :name, :category, :combining_class, :bidi_class, :decomposition,
|
10
|
+
:digit_value, :non_decimal_digit_value, :numeric_value, :bidi_mirrored, :unicode1_name,
|
11
|
+
:iso_comment, :simple_uppercase_map, :simple_lowercase_map, :simple_titlecase_map)
|
12
|
+
class << self
|
13
|
+
def for_code_point(code_point)
|
14
|
+
blocks = TwitterCldr.get_resource("unicode_data", "blocks")
|
15
|
+
|
16
|
+
#Find the target block
|
17
|
+
target = blocks.find do |block_name, range|
|
18
|
+
range.include? code_point.to_i(16)
|
19
|
+
end
|
20
|
+
|
21
|
+
if target
|
22
|
+
block_data = TwitterCldr.get_resource("unicode_data", target.first)
|
23
|
+
code_point_data = block_data.fetch(code_point.to_sym) { |code_point_sym| get_range_start(code_point_sym, block_data) }
|
24
|
+
Attributes.new(*code_point_data) if code_point_data
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
# Check if block constitutes a range. The code point beginning a range will have a name enclosed in <>, ending with 'First'
|
30
|
+
# eg: <CJK Ideograph Extension A, First>
|
31
|
+
# http://unicode.org/reports/tr44/#Code_Point_Ranges
|
32
|
+
def get_range_start(code_point, block_data)
|
33
|
+
start_code_point = block_data.keys.sort_by { |key| key.to_s.to_i(16) }.first
|
34
|
+
start_data = block_data[start_code_point].clone
|
35
|
+
if start_data[1] =~ /<.*, First>/
|
36
|
+
start_data[0] = code_point.to_s
|
37
|
+
start_data[1] = start_data[1].sub(', First', '')
|
38
|
+
start_data
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/tokenizers/base.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
module TwitterCldr
|
4
7
|
module Tokenizers
|
5
8
|
class DateTimeTokenizer < Base
|
@@ -32,7 +35,7 @@ module TwitterCldr
|
|
32
35
|
protected
|
33
36
|
|
34
37
|
def init_resources
|
35
|
-
@resource = TwitterCldr.
|
38
|
+
@resource = TwitterCldr.get_resource(@locale, "calendars")[TwitterCldr.convert_locale(@locale)]
|
36
39
|
end
|
37
40
|
|
38
41
|
def init_placeholders
|
data/lib/tokenizers/key_path.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
module TwitterCldr
|
4
7
|
module Tokenizers
|
5
8
|
class NumberTokenizer < Base
|
@@ -40,7 +43,7 @@ module TwitterCldr
|
|
40
43
|
end
|
41
44
|
|
42
45
|
def init_resources
|
43
|
-
@resource = TwitterCldr.
|
46
|
+
@resource = TwitterCldr.get_resource(@locale, "numbers")[TwitterCldr.convert_locale(@locale)]
|
44
47
|
end
|
45
48
|
|
46
49
|
def pattern_for(resource)
|
data/lib/tokenizers/token.rb
CHANGED
data/lib/twitter_cldr.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
3
6
|
$:.push(File.dirname(__FILE__))
|
4
7
|
|
5
8
|
$KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
|
@@ -7,6 +10,7 @@ $KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
|
|
7
10
|
require 'yaml'
|
8
11
|
require 'date'
|
9
12
|
require 'time'
|
13
|
+
require 'forwardable'
|
10
14
|
|
11
15
|
require 'version'
|
12
16
|
|
@@ -20,12 +24,18 @@ require 'ext/numbers/bignum'
|
|
20
24
|
require 'ext/numbers/fixnum'
|
21
25
|
require 'ext/numbers/float'
|
22
26
|
require 'ext/strings/symbol'
|
27
|
+
require 'ext/strings/string'
|
28
|
+
|
29
|
+
require 'utils'
|
23
30
|
|
24
31
|
# manages access to CLDR resources (yaml files in resources dir)
|
25
32
|
require 'shared/resources'
|
26
33
|
|
27
34
|
|
28
35
|
module TwitterCldr
|
36
|
+
|
37
|
+
extend SingleForwardable
|
38
|
+
|
29
39
|
DEFAULT_LOCALE = :en
|
30
40
|
RESOURCE_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), "resources")
|
31
41
|
|
@@ -36,51 +46,59 @@ module TwitterCldr
|
|
36
46
|
|
37
47
|
@@resources = TwitterCldr::Shared::Resources.new
|
38
48
|
|
39
|
-
|
40
|
-
File.join(RESOURCE_DIR, self.convert_locale(locale).to_s, "#{resource}.yml")
|
41
|
-
end
|
49
|
+
def_delegator :resources, :resource_for, :get_resource
|
42
50
|
|
43
|
-
|
44
|
-
@@resources
|
45
|
-
end
|
51
|
+
class << self
|
46
52
|
|
47
|
-
|
48
|
-
|
49
|
-
locale = FastGettext.locale
|
50
|
-
locale = DEFAULT_LOCALE if locale.to_s.empty?
|
51
|
-
else
|
52
|
-
locale = DEFAULT_LOCALE
|
53
|
+
def get_resource_file(locale, resource)
|
54
|
+
File.join(RESOURCE_DIR, convert_locale(locale).to_s, "#{resource}.yml")
|
53
55
|
end
|
54
56
|
|
55
|
-
|
56
|
-
|
57
|
+
def resources
|
58
|
+
@@resources
|
59
|
+
end
|
57
60
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
61
|
+
def get_locale
|
62
|
+
if defined?(FastGettext)
|
63
|
+
locale = FastGettext.locale
|
64
|
+
locale = DEFAULT_LOCALE if locale.to_s.empty?
|
65
|
+
else
|
66
|
+
locale = DEFAULT_LOCALE
|
67
|
+
end
|
62
68
|
|
63
|
-
|
64
|
-
unless defined?(@@supported_locales)
|
65
|
-
rejectable = [:shared]
|
66
|
-
@@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
|
67
|
-
File.basename(file).to_sym
|
68
|
-
end.reject { |file| rejectable.include?(file) }
|
69
|
+
(supported_locale?(locale) ? locale : DEFAULT_LOCALE).to_sym
|
69
70
|
end
|
70
71
|
|
71
|
-
|
72
|
-
|
72
|
+
def convert_locale(locale)
|
73
|
+
locale = locale.to_sym
|
74
|
+
TWITTER_LOCALE_MAP.include?(locale) ? TWITTER_LOCALE_MAP[locale] : locale
|
75
|
+
end
|
76
|
+
|
77
|
+
def supported_locales
|
78
|
+
unless defined?(@@supported_locales)
|
79
|
+
rejectable = [:shared]
|
80
|
+
@@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
|
81
|
+
File.basename(file).to_sym
|
82
|
+
end.reject { |file| rejectable.include?(file) }
|
83
|
+
end
|
84
|
+
|
85
|
+
@@supported_locales
|
86
|
+
end
|
87
|
+
|
88
|
+
def supported_locale?(locale)
|
89
|
+
locale = locale.to_sym
|
90
|
+
supported_locales.include?(locale) || supported_locales.include?(convert_locale(locale))
|
91
|
+
end
|
73
92
|
|
74
|
-
def self.supported_locale?(locale)
|
75
|
-
locale = locale.to_sym
|
76
|
-
self.supported_locales.include?(locale) || self.supported_locales.include?(self.convert_locale(locale))
|
77
93
|
end
|
94
|
+
|
78
95
|
end
|
79
96
|
|
80
97
|
|
81
98
|
# other shared libraries (most access shared resource data in resources/shared)
|
82
99
|
require 'shared/currencies'
|
83
100
|
require 'shared/languages'
|
101
|
+
require 'shared/unicode_data'
|
84
102
|
|
85
103
|
# all tokenizers
|
86
104
|
require 'tokenizers/base'
|
@@ -100,9 +118,14 @@ require 'formatters/numbers/number_formatter'
|
|
100
118
|
require 'formatters/numbers/decimal_formatter'
|
101
119
|
require 'formatters/numbers/currency_formatter'
|
102
120
|
require 'formatters/numbers/percent_formatter'
|
121
|
+
require 'formatters/plurals/plural_formatter'
|
103
122
|
require 'formatters/plurals/rules'
|
104
123
|
|
105
124
|
# formatter helpers
|
106
125
|
require 'formatters/numbers/helpers/base'
|
107
126
|
require 'formatters/numbers/helpers/fraction'
|
108
127
|
require 'formatters/numbers/helpers/integer'
|
128
|
+
|
129
|
+
# all normalizers
|
130
|
+
require 'normalizers/base'
|
131
|
+
require 'normalizers/canonical/nfd'
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
# The implementation of the TwitterCldr.interpolate method that backports String interpolation capabilities
|
7
|
+
# (originally implemented in String#% method) from Ruby 1.9 to Ruby 1.8 is heavily influenced by the
|
8
|
+
# implementation of the same feature in i18n (https://github.com/svenfuchs/i18n/blob/89ea337f48562370988421e50caa7c2fe89452c7/lib/i18n/core_ext/string/interpolate.rb)
|
9
|
+
# and gettext (https://github.com/mutoh/gettext/blob/11b8c1525ba9f00afb1942f7ebf34bec12f7558b/lib/gettext/core_ext/string.rb) gems.
|
10
|
+
#
|
11
|
+
# See NOTICE file for corresponding license agreements.
|
12
|
+
|
13
|
+
|
14
|
+
# KeyError is raised during interpolation when there is a placeholder that doesn't have corresponding key in the
|
15
|
+
# interpolation hash. KeyError is defined in 1.9. We define it for prior versions of Ruby to have the same behavior.
|
16
|
+
#
|
17
|
+
class KeyError < IndexError
|
18
|
+
def initialize(message = nil)
|
19
|
+
super(message || 'key not found')
|
20
|
+
end
|
21
|
+
end unless defined?(KeyError)
|
22
|
+
|
23
|
+
|
24
|
+
module TwitterCldr
|
25
|
+
module Utils
|
26
|
+
|
27
|
+
HASH_INTERPOLATION_REGEXP = Regexp.union(
|
28
|
+
/%\{(\w+)\}/,
|
29
|
+
/%<(\w+)>(.*?\d*\.?\d*[bBdiouxXeEfgGcps])/
|
30
|
+
)
|
31
|
+
|
32
|
+
HASH_INTERPOLATION_WITH_ESCAPE_REGEXP = Regexp.union(
|
33
|
+
/%%/,
|
34
|
+
HASH_INTERPOLATION_REGEXP
|
35
|
+
)
|
36
|
+
|
37
|
+
class << self
|
38
|
+
|
39
|
+
# Uses +string+ as a format specification and returns the result of applying it to +args+.
|
40
|
+
#
|
41
|
+
# There are three ways to use it:
|
42
|
+
#
|
43
|
+
# * Using a single argument or Array of arguments.
|
44
|
+
#
|
45
|
+
# This is the default behaviour of the String#% method. See Kernel#sprintf for more details about the format
|
46
|
+
# specification.
|
47
|
+
#
|
48
|
+
# Example:
|
49
|
+
#
|
50
|
+
# TwitterCldr::Utils.interpolate('%d %s', [1, 'message'])
|
51
|
+
# # => "1 message"
|
52
|
+
#
|
53
|
+
# * Using a Hash as an argument and unformatted, named placeholders (Ruby 1.9 syntax).
|
54
|
+
#
|
55
|
+
# When you pass a Hash as an argument and specify placeholders with %{foo} it will interpret the hash values as
|
56
|
+
# named arguments.
|
57
|
+
#
|
58
|
+
# Example:
|
59
|
+
#
|
60
|
+
# TwitterCldr::Utils.interpolate('%{firstname}, %{lastname}', :firstname => 'Masao', :lastname => 'Mutoh')
|
61
|
+
# # => "Masao Mutoh"
|
62
|
+
#
|
63
|
+
# * Using a Hash as an argument and formatted, named placeholders (Ruby 1.9 syntax).
|
64
|
+
#
|
65
|
+
# When you pass a Hash as an argument and specify placeholders with %<foo>d it will interpret the hash values
|
66
|
+
# as named arguments and format the value according to the formatting instruction appended to the closing >.
|
67
|
+
#
|
68
|
+
# Example:
|
69
|
+
#
|
70
|
+
# TwitterCldr::Utils.interpolate('%<integer>d, %<float>.1f', :integer => 10, :float => 43.4)
|
71
|
+
# # => "10, 43.3"
|
72
|
+
#
|
73
|
+
# An exception can be thrown in two cases when Ruby 1.9 interpolation syntax is used:
|
74
|
+
#
|
75
|
+
# * ArgumentError is thrown if Ruby 1.9. interpolation syntax is used in +string+, but +args+ is not a Hash;
|
76
|
+
# * KeyError is thrown if the value for one of the placeholders in +string+ is missing in +args+ hash.
|
77
|
+
#
|
78
|
+
def interpolate(string, args)
|
79
|
+
string =~ HASH_INTERPOLATION_REGEXP ? interpolate_hash(string, args) : interpolate_value_or_array(string, args)
|
80
|
+
end
|
81
|
+
|
82
|
+
private
|
83
|
+
|
84
|
+
def interpolate_hash(string, args)
|
85
|
+
raise ArgumentError.new('expected a Hash') unless args.is_a?(Hash)
|
86
|
+
|
87
|
+
string.gsub(HASH_INTERPOLATION_WITH_ESCAPE_REGEXP) do |match|
|
88
|
+
if match == '%%'
|
89
|
+
'%'
|
90
|
+
else
|
91
|
+
key = ($1 || $2).to_sym
|
92
|
+
raise KeyError unless args.has_key?(key)
|
93
|
+
$3 ? sprintf("%#{$3}", args[key]) : args[key]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def interpolate_value_or_array(string, args)
|
99
|
+
string.gsub(/%([{<])/, '%%\1') % args
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
data/lib/utils.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'utils/interpolation'
|
7
|
+
|
8
|
+
module TwitterCldr
|
9
|
+
module Utils
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
|
14
|
+
def deep_symbolize_keys(arg)
|
15
|
+
case arg
|
16
|
+
when Array
|
17
|
+
arg.map { |elem| deep_symbolize_keys(elem) }
|
18
|
+
when Hash
|
19
|
+
Hash[arg.map { |k, v| [k.is_a?(String) ? k.to_sym : k, deep_symbolize_keys(v)] }]
|
20
|
+
else
|
21
|
+
arg
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|