RubyGems - twitter_cldr - Versions diffs - 1.0.1 → 1.1.0 - Mend

twitter_cldr 1.0.1 → 1.1.0

Files changed (303) hide show

data/NOTICE +95 -1
data/README.md +4 -4
data/Rakefile +18 -28
data/lib/ext/calendars/date.rb +3 -0
data/lib/ext/calendars/datetime.rb +3 -0
data/lib/ext/calendars/time.rb +3 -0
data/lib/ext/localized_object.rb +3 -0
data/lib/ext/numbers/bignum.rb +3 -0
data/lib/ext/numbers/fixnum.rb +3 -0
data/lib/ext/numbers/float.rb +3 -0
data/lib/ext/numbers/localized_number.rb +3 -0
data/lib/ext/strings/string.rb +31 -0
data/lib/ext/strings/symbol.rb +3 -0
data/lib/formatters/base.rb +3 -0
data/lib/formatters/calendars/date_formatter.rb +3 -0
data/lib/formatters/calendars/datetime_formatter.rb +3 -0
data/lib/formatters/calendars/time_formatter.rb +3 -0
data/lib/formatters/numbers/currency_formatter.rb +3 -0
data/lib/formatters/numbers/decimal_formatter.rb +3 -0
data/lib/formatters/numbers/helpers/base.rb +3 -0
data/lib/formatters/numbers/helpers/fraction.rb +3 -0
data/lib/formatters/numbers/helpers/integer.rb +3 -0
data/lib/formatters/numbers/number_formatter.rb +3 -0
data/lib/formatters/numbers/percent_formatter.rb +3 -0
data/lib/formatters/plurals/plural_formatter.rb +141 -0
data/lib/formatters/plurals/rules.rb +4 -1
data/lib/normalizers/base.rb +17 -0
data/lib/normalizers/canonical/nfd.rb +81 -0
data/lib/shared/currencies.rb +4 -1
data/lib/shared/languages.rb +4 -1
data/lib/shared/resources.rb +8 -28
data/lib/shared/timezones.rb +3 -0
data/lib/shared/unicode_data.rb +44 -0
data/lib/tokenizers/base.rb +3 -0
data/lib/tokenizers/calendars/date_tokenizer.rb +3 -0
data/lib/tokenizers/calendars/datetime_tokenizer.rb +4 -1
data/lib/tokenizers/calendars/time_tokenizer.rb +3 -0
data/lib/tokenizers/key_path.rb +3 -0
data/lib/tokenizers/numbers/number_tokenizer.rb +4 -1
data/lib/tokenizers/token.rb +3 -0
data/lib/twitter_cldr.rb +52 -29
data/lib/utils/interpolation.rb +105 -0
data/lib/utils.rb +28 -0
data/lib/version.rb +6 -1
data/resources/unicode_data/aegean_numbers.yml +913 -0
data/resources/unicode_data/alchemical_symbols.yml +1857 -0
data/resources/unicode_data/alphabetic_presentation_forms.yml +929 -0
data/resources/unicode_data/ancient_greek_musical_notation.yml +1121 -0
data/resources/unicode_data/ancient_greek_numbers.yml +1201 -0
data/resources/unicode_data/ancient_symbols.yml +193 -0
data/resources/unicode_data/arabic.yml +4049 -0
data/resources/unicode_data/arabic_extended_a.yml +625 -0
data/resources/unicode_data/arabic_mathematical_alphabetic_symbols.yml +2289 -0
data/resources/unicode_data/arabic_presentation_forms_a.yml +9777 -0
data/resources/unicode_data/arabic_presentation_forms_b.yml +2257 -0
data/resources/unicode_data/arabic_supplement.yml +769 -0
data/resources/unicode_data/armenian.yml +1393 -0
data/resources/unicode_data/arrows.yml +1793 -0
data/resources/unicode_data/avestan.yml +977 -0
data/resources/unicode_data/balinese.yml +1937 -0
data/resources/unicode_data/bamum.yml +1409 -0
data/resources/unicode_data/bamum_supplement.yml +9105 -0
data/resources/unicode_data/basic_latin.yml +2049 -0
data/resources/unicode_data/batak.yml +897 -0
data/resources/unicode_data/bengali.yml +1473 -0
data/resources/unicode_data/block_elements.yml +513 -0
data/resources/unicode_data/blocks.yml +881 -0
data/resources/unicode_data/bopomofo.yml +657 -0
data/resources/unicode_data/bopomofo_extended.yml +433 -0
data/resources/unicode_data/box_drawing.yml +2049 -0
data/resources/unicode_data/brahmi.yml +1729 -0
data/resources/unicode_data/braille_patterns.yml +4097 -0
data/resources/unicode_data/buginese.yml +481 -0
data/resources/unicode_data/buhid.yml +321 -0
data/resources/unicode_data/byzantine_musical_symbols.yml +3937 -0
data/resources/unicode_data/carian.yml +785 -0
data/resources/unicode_data/chakma.yml +1073 -0
data/resources/unicode_data/cham.yml +1329 -0
data/resources/unicode_data/cherokee.yml +1361 -0
data/resources/unicode_data/cjk_compatibility.yml +4097 -0
data/resources/unicode_data/cjk_compatibility_forms.yml +513 -0
data/resources/unicode_data/cjk_compatibility_ideographs.yml +7553 -0
data/resources/unicode_data/cjk_compatibility_ideographs_supplement.yml +8673 -0
data/resources/unicode_data/cjk_radicals_supplement.yml +1841 -0
data/resources/unicode_data/cjk_strokes.yml +577 -0
data/resources/unicode_data/cjk_symbols_and_punctuation.yml +1025 -0
data/resources/unicode_data/cjk_unified_ideographs.yml +33 -0
data/resources/unicode_data/cjk_unified_ideographs_extension_a.yml +33 -0
data/resources/unicode_data/cjk_unified_ideographs_extension_b.yml +33 -0
data/resources/unicode_data/cjk_unified_ideographs_extension_c.yml +33 -0
data/resources/unicode_data/cjk_unified_ideographs_extension_d.yml +33 -0
data/resources/unicode_data/combining_diacritical_marks.yml +1793 -0
data/resources/unicode_data/combining_diacritical_marks_for_symbols.yml +529 -0
data/resources/unicode_data/combining_diacritical_marks_supplement.yml +689 -0
data/resources/unicode_data/combining_half_marks.yml +113 -0
data/resources/unicode_data/common_indic_number_forms.yml +161 -0
data/resources/unicode_data/control_pictures.yml +625 -0
data/resources/unicode_data/coptic.yml +1969 -0
data/resources/unicode_data/counting_rod_numerals.yml +289 -0
data/resources/unicode_data/cuneiform.yml +14065 -0
data/resources/unicode_data/cuneiform_numbers_and_punctuation.yml +1649 -0
data/resources/unicode_data/currency_symbols.yml +417 -0
data/resources/unicode_data/cypriot_syllabary.yml +881 -0
data/resources/unicode_data/cyrillic.yml +4097 -0
data/resources/unicode_data/cyrillic_extended_a.yml +513 -0
data/resources/unicode_data/cyrillic_extended_b.yml +1425 -0
data/resources/unicode_data/cyrillic_supplement.yml +641 -0
data/resources/unicode_data/deseret.yml +1281 -0
data/resources/unicode_data/devanagari.yml +2033 -0
data/resources/unicode_data/devanagari_extended.yml +449 -0
data/resources/unicode_data/dingbats.yml +3057 -0
data/resources/unicode_data/domino_tiles.yml +1601 -0
data/resources/unicode_data/egyptian_hieroglyphs.yml +17137 -0
data/resources/unicode_data/emoticons.yml +1217 -0
data/resources/unicode_data/enclosed_alphanumeric_supplement.yml +2737 -0
data/resources/unicode_data/enclosed_alphanumerics.yml +2561 -0
data/resources/unicode_data/enclosed_cjk_letters_and_months.yml +4065 -0
data/resources/unicode_data/enclosed_ideographic_supplement.yml +913 -0
data/resources/unicode_data/ethiopic.yml +5729 -0
data/resources/unicode_data/ethiopic_extended.yml +1265 -0
data/resources/unicode_data/ethiopic_extended_a.yml +513 -0
data/resources/unicode_data/ethiopic_supplement.yml +417 -0
data/resources/unicode_data/general_punctuation.yml +1713 -0
data/resources/unicode_data/geometric_shapes.yml +1537 -0
data/resources/unicode_data/georgian.yml +1409 -0
data/resources/unicode_data/georgian_supplement.yml +641 -0
data/resources/unicode_data/glagolitic.yml +1505 -0
data/resources/unicode_data/gothic.yml +433 -0
data/resources/unicode_data/greek_and_coptic.yml +2145 -0
data/resources/unicode_data/greek_extended.yml +3729 -0
data/resources/unicode_data/gujarati.yml +1345 -0
data/resources/unicode_data/gurmukhi.yml +1265 -0
data/resources/unicode_data/halfwidth_and_fullwidth_forms.yml +3601 -0
data/resources/unicode_data/hangul_compatibility_jamo.yml +1505 -0
data/resources/unicode_data/hangul_jamo.yml +4097 -0
data/resources/unicode_data/hangul_jamo_extended_a.yml +465 -0
data/resources/unicode_data/hangul_jamo_extended_b.yml +1153 -0
data/resources/unicode_data/hangul_syllables.yml +33 -0
data/resources/unicode_data/hanunoo.yml +369 -0
data/resources/unicode_data/hebrew.yml +1393 -0
data/resources/unicode_data/high_private_use_surrogates.yml +33 -0
data/resources/unicode_data/high_surrogates.yml +33 -0
data/resources/unicode_data/hiragana.yml +1489 -0
data/resources/unicode_data/ideographic_description_characters.yml +193 -0
data/resources/unicode_data/imperial_aramaic.yml +497 -0
data/resources/unicode_data/inscriptional_pahlavi.yml +433 -0
data/resources/unicode_data/inscriptional_parthian.yml +481 -0
data/resources/unicode_data/ipa_extensions.yml +1537 -0
data/resources/unicode_data/javanese.yml +1457 -0
data/resources/unicode_data/kaithi.yml +1057 -0
data/resources/unicode_data/kana_supplement.yml +33 -0
data/resources/unicode_data/kanbun.yml +257 -0
data/resources/unicode_data/kangxi_radicals.yml +3425 -0
data/resources/unicode_data/kannada.yml +1377 -0
data/resources/unicode_data/katakana.yml +1537 -0
data/resources/unicode_data/katakana_phonetic_extensions.yml +257 -0
data/resources/unicode_data/kayah_li.yml +769 -0
data/resources/unicode_data/kharoshthi.yml +1041 -0
data/resources/unicode_data/khmer.yml +1825 -0
data/resources/unicode_data/khmer_symbols.yml +513 -0
data/resources/unicode_data/lao.yml +1073 -0
data/resources/unicode_data/latin_1_supplement.yml +2049 -0
data/resources/unicode_data/latin_extended_a.yml +2049 -0
data/resources/unicode_data/latin_extended_additional.yml +4097 -0
data/resources/unicode_data/latin_extended_b.yml +3329 -0
data/resources/unicode_data/latin_extended_c.yml +513 -0
data/resources/unicode_data/latin_extended_d.yml +2145 -0
data/resources/unicode_data/lepcha.yml +1185 -0
data/resources/unicode_data/letterlike_symbols.yml +1281 -0
data/resources/unicode_data/limbu.yml +1057 -0
data/resources/unicode_data/linear_b_ideograms.yml +1969 -0
data/resources/unicode_data/linear_b_syllabary.yml +1409 -0
data/resources/unicode_data/lisu.yml +769 -0
data/resources/unicode_data/low_surrogates.yml +33 -0
data/resources/unicode_data/lycian.yml +465 -0
data/resources/unicode_data/lydian.yml +433 -0
data/resources/unicode_data/mahjong_tiles.yml +705 -0
data/resources/unicode_data/malayalam.yml +1569 -0
data/resources/unicode_data/mandaic.yml +465 -0
data/resources/unicode_data/mathematical_alphanumeric_symbols.yml +15937 -0
data/resources/unicode_data/mathematical_operators.yml +4097 -0
data/resources/unicode_data/meetei_mayek.yml +897 -0
data/resources/unicode_data/meetei_mayek_extensions.yml +369 -0
data/resources/unicode_data/meroitic_cursive.yml +417 -0
data/resources/unicode_data/meroitic_hieroglyphs.yml +513 -0
data/resources/unicode_data/miao.yml +2129 -0
data/resources/unicode_data/miscellaneous_mathematical_symbols_a.yml +769 -0
data/resources/unicode_data/miscellaneous_mathematical_symbols_b.yml +2049 -0
data/resources/unicode_data/miscellaneous_symbols.yml +4097 -0
data/resources/unicode_data/miscellaneous_symbols_and_arrows.yml +1393 -0
data/resources/unicode_data/miscellaneous_symbols_and_pictographs.yml +8529 -0
data/resources/unicode_data/miscellaneous_technical.yml +3905 -0
data/resources/unicode_data/modifier_tone_letters.yml +513 -0
data/resources/unicode_data/mongolian.yml +2497 -0
data/resources/unicode_data/musical_symbols.yml +3521 -0
data/resources/unicode_data/myanmar.yml +2561 -0
data/resources/unicode_data/myanmar_extended_a.yml +449 -0
data/resources/unicode_data/new_tai_lue.yml +1329 -0
data/resources/unicode_data/nko.yml +945 -0
data/resources/unicode_data/number_forms.yml +929 -0
data/resources/unicode_data/ogham.yml +465 -0
data/resources/unicode_data/ol_chiki.yml +769 -0
data/resources/unicode_data/old_italic.yml +561 -0
data/resources/unicode_data/old_persian.yml +801 -0
data/resources/unicode_data/old_south_arabian.yml +513 -0
data/resources/unicode_data/old_turkic.yml +1169 -0
data/resources/unicode_data/optical_character_recognition.yml +177 -0
data/resources/unicode_data/oriya.yml +1441 -0
data/resources/unicode_data/osmanya.yml +641 -0
data/resources/unicode_data/phags_pa.yml +897 -0
data/resources/unicode_data/phaistos_disc.yml +737 -0
data/resources/unicode_data/phoenician.yml +465 -0
data/resources/unicode_data/phonetic_extensions.yml +2049 -0
data/resources/unicode_data/phonetic_extensions_supplement.yml +1025 -0
data/resources/unicode_data/playing_cards.yml +945 -0
data/resources/unicode_data/private_use_area.yml +33 -0
data/resources/unicode_data/rejang.yml +593 -0
data/resources/unicode_data/rumi_numeral_symbols.yml +497 -0
data/resources/unicode_data/runic.yml +1297 -0
data/resources/unicode_data/samaritan.yml +977 -0
data/resources/unicode_data/saurashtra.yml +1297 -0
data/resources/unicode_data/sharada.yml +1329 -0
data/resources/unicode_data/shavian.yml +769 -0
data/resources/unicode_data/sinhala.yml +1281 -0
data/resources/unicode_data/small_form_variants.yml +417 -0
data/resources/unicode_data/sora_sompeng.yml +561 -0
data/resources/unicode_data/spacing_modifier_letters.yml +1281 -0
data/resources/unicode_data/specials.yml +81 -0
data/resources/unicode_data/sundanese.yml +1025 -0
data/resources/unicode_data/sundanese_supplement.yml +129 -0
data/resources/unicode_data/superscripts_and_subscripts.yml +673 -0
data/resources/unicode_data/supplemental_arrows_a.yml +257 -0
data/resources/unicode_data/supplemental_arrows_b.yml +2049 -0
data/resources/unicode_data/supplemental_mathematical_operators.yml +4097 -0
data/resources/unicode_data/supplemental_punctuation.yml +961 -0
data/resources/unicode_data/supplementary_private_use_area_a.yml +33 -0
data/resources/unicode_data/supplementary_private_use_area_b.yml +33 -0
data/resources/unicode_data/syloti_nagri.yml +705 -0
data/resources/unicode_data/syriac.yml +1233 -0
data/resources/unicode_data/tagalog.yml +321 -0
data/resources/unicode_data/tagbanwa.yml +289 -0
data/resources/unicode_data/tags.yml +1553 -0
data/resources/unicode_data/tai_le.yml +561 -0
data/resources/unicode_data/tai_tham.yml +2033 -0
data/resources/unicode_data/tai_viet.yml +1153 -0
data/resources/unicode_data/tai_xuan_jing_symbols.yml +1393 -0
data/resources/unicode_data/takri.yml +1057 -0
data/resources/unicode_data/tamil.yml +1153 -0
data/resources/unicode_data/telugu.yml +1489 -0
data/resources/unicode_data/thaana.yml +801 -0
data/resources/unicode_data/thai.yml +1393 -0
data/resources/unicode_data/tibetan.yml +3377 -0
data/resources/unicode_data/tifinagh.yml +945 -0
data/resources/unicode_data/transport_and_map_symbols.yml +1121 -0
data/resources/unicode_data/ugaritic.yml +497 -0
data/resources/unicode_data/unified_canadian_aboriginal_syllabics.yml +10241 -0
data/resources/unicode_data/unified_canadian_aboriginal_syllabics_extended.yml +1121 -0
data/resources/unicode_data/vai.yml +4801 -0
data/resources/unicode_data/variation_selectors.yml +257 -0
data/resources/unicode_data/variation_selectors_supplement.yml +3841 -0
data/resources/unicode_data/vedic_extensions.yml +625 -0
data/resources/unicode_data/vertical_forms.yml +161 -0
data/resources/unicode_data/yi_radicals.yml +881 -0
data/resources/unicode_data/yi_syllables.yml +18641 -0
data/resources/unicode_data/yijing_hexagram_symbols.yml +1025 -0
data/spec/ext/calendars/date_spec.rb +5 -1
data/spec/ext/calendars/datetime_spec.rb +5 -1
data/spec/ext/calendars/time_spec.rb +5 -1
data/spec/ext/numbers/bignum_spec.rb +5 -1
data/spec/ext/numbers/fixnum_spec.rb +5 -1
data/spec/ext/numbers/float_spec.rb +5 -1
data/spec/ext/numbers/localized_number_spec.rb +5 -1
data/spec/ext/strings/string_spec.rb +102 -0
data/spec/ext/strings/symbol_spec.rb +5 -1
data/spec/formatters/base_spec.rb +5 -1
data/spec/formatters/calendars/datetime_formatter_spec.rb +5 -1
data/spec/formatters/numbers/currency_formatter_spec.rb +5 -1
data/spec/formatters/numbers/decimal_formatter_spec.rb +5 -1
data/spec/formatters/numbers/helpers/fraction_spec.rb +5 -1
data/spec/formatters/numbers/helpers/integer_spec.rb +5 -1
data/spec/formatters/numbers/number_formatter_spec.rb +6 -2
data/spec/formatters/numbers/percent_formatter_spec.rb +5 -1
data/spec/formatters/plurals/plural_formatter_spec.rb +205 -0
data/spec/formatters/plurals/rules_spec.rb +28 -28
data/spec/normalizers/NormalizationTest.txt +602 -0
data/spec/normalizers/base_spec.rb +16 -0
data/spec/normalizers/canonical/nfd_spec.rb +50 -0
data/spec/shared/currencies_spec.rb +5 -1
data/spec/shared/languages_spec.rb +5 -1
data/spec/shared/resources_spec.rb +5 -18
data/spec/shared/unicode_data_spec.rb +51 -0
data/spec/spec_helper.rb +6 -3
data/spec/tokenizers/base_spec.rb +3 -0
data/spec/tokenizers/calendars/date_tokenizer_spec.rb +5 -1
data/spec/tokenizers/calendars/datetime_tokenizer_spec.rb +5 -1
data/spec/tokenizers/calendars/time_tokenizer_spec.rb +5 -1
data/spec/tokenizers/key_path_spec.rb +3 -0
data/spec/tokenizers/numbers/number_tokenizer_spec.rb +5 -1
data/spec/tokenizers/token_spec.rb +5 -1
data/spec/twitter_cldr_spec.rb +23 -1
data/spec/utils/interpolation_spec.rb +124 -0
data/spec/utils_spec.rb +32 -0
metadata +285 -21

data/lib/normalizers/canonical/nfd.rb ADDED Viewed

@@ -0,0 +1,81 @@
+# encoding: UTF-8
+module TwitterCldr
+  module Normalizers
+    class NFD < Base
+      @@hangul_constants = {:SBase => "AC00".hex, :LBase => "1100".hex, :VBase => "1161".hex, :TBase => "11A7".hex,
+                            :Scount => 11172, :LCount => 19, :VCount => 21, :TCount => 28, :NCount => 588, :Scount => 1172}
+      class << self
+        def normalize(string)
+          #Convert string to code points
+          code_points = string.split('').map { |char| char_to_code_point(char) }
+          #Normalize code points
+          normalized_code_points = normalize_code_points(code_points)
+          #Convert normalized code points back to string
+          normalized_code_points.map { |code_point| code_point_to_char(code_point) }.join
+        end
+        def normalize_code_points(code_points)
+          code_points = code_points.map { |code_point| decompose code_point }.flatten
+          reorder code_points
+          code_points
+        end
+        #Recursively replace the given code point with the values in its Decomposition_Mapping property
+        def decompose(code_point)
+          unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point)
+          return code_point unless unicode_data
+          decomposition_mapping = unicode_data.decomposition.split
+          # Special decomposition for Hangul syllables.
+          # Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
+          if unicode_data.name.include? 'Hangul'
+            sIndex = code_point.hex - @@hangul_constants[:SBase]
+            lIndex = sIndex / @@hangul_constants[:NCount]
+            vIndex = (sIndex % @@hangul_constants[:NCount]) / @@hangul_constants[:TCount]
+            tIndex = sIndex % @@hangul_constants[:TCount]
+            lPart = (@@hangul_constants[:LBase] + lIndex).to_s(16).upcase
+            vPart = (@@hangul_constants[:VBase] + vIndex).to_s(16).upcase
+            tPart = (@@hangul_constants[:TBase] + tIndex).to_s(16).upcase if tIndex > 0
+            [lPart, vPart, tPart].compact
+          #Return the code point if compatibility mapping or if no mapping exists
+          elsif decomposition_mapping.first =~ /<.*>/ || decomposition_mapping.empty?
+            code_point
+          else
+            decomposition_mapping.map do |decomposition_code_point|
+              decompose(decomposition_code_point)
+            end.flatten
+          end
+        end
+        #Swap any two adjacent code points A & B if ccc(A) > ccc(B) > 0
+        def reorder(code_points)
+          (code_points.size).times do
+            code_points.each_with_index do |cp, i|
+              unless i == (code_points.size - 1)
+                ccc_a, ccc_b = combining_class_for(cp), combining_class_for(code_points[i+1])
+                if (ccc_a > ccc_b) && (ccc_b > 0)
+                  code_points[i], code_points[i+1] = code_points[i+1], code_points[i]
+                end
+              end
+            end
+          end
+        end
+        def combining_class_for(code_point)
+          begin
+            unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point).combining_class.to_i
+          rescue NoMethodError
+            0
+          end
+        end
+      end
+    end
+  end
+end

data/lib/shared/currencies.rb CHANGED Viewed

@@ -1,9 +1,12 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Shared
     class Currencies
-      @@resource = TwitterCldr.resources.resource_for("shared", "currencies")[:shared][:currencies]
+      @@resource = TwitterCldr.get_resource("shared", "currencies")[:shared][:currencies]
       class << self
         def countries

data/lib/shared/languages.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Shared
     class Languages
@@ -42,7 +45,7 @@ module TwitterCldr
         def get_resource(locale)
           locale = TwitterCldr.convert_locale(locale)
-          TwitterCldr.resources.resource_for(locale, "languages")[locale]
+          TwitterCldr.get_resource(locale, "languages")[locale]
         end
       end
     end

data/lib/shared/resources.rb CHANGED Viewed

@@ -1,47 +1,27 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Shared
     class Resources
       def initialize
-        @resources_by_locale = {}
+        @resources_by_locale = Hash.new do |hash, locale|
+          hash[locale] = Hash.new { |h, resource| h[resource] = data_for(locale, resource) }
+        end
       end
       def resource_for(locale, resource)
-        locale = locale.to_sym
-        unless @resources_by_locale.include?(locale)
-          @resources_by_locale[locale] = {}
-        end
-        unless @resources_by_locale[locale].include?(resource)
-          @resources_by_locale[locale][resource] = data_for(locale, resource)
-        end
-        @resources_by_locale[locale][resource]
+        @resources_by_locale[locale.to_sym][resource]
       end
       protected
       def data_for(locale, resource)
-        deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
+        TwitterCldr::Utils.deep_symbolize_keys(YAML.load(File.read(TwitterCldr.get_resource_file(locale, resource))))
       end
-      # adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
-      def deep_symbolize_keys(arg)
-        case arg
-          when Array then
-            arg.map { |elem| deep_symbolize_keys(elem) }
-          when Hash then
-            Hash[
-              arg.map do |key, value|
-                k = key.is_a?(String) ? key.to_sym : key
-                v = deep_symbolize_keys(value)
-                [k, v]
-              end]
-          else
-            arg
-        end
-      end
     end
   end
 end

data/lib/shared/timezones.rb CHANGED Viewed

@@ -1,3 +1,6 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 # not yet implemented

data/lib/shared/unicode_data.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Shared
+    class UnicodeData
+      Attributes = Struct.new(:code_point, :name, :category, :combining_class, :bidi_class, :decomposition,
+                             :digit_value, :non_decimal_digit_value, :numeric_value, :bidi_mirrored, :unicode1_name,
+                             :iso_comment, :simple_uppercase_map, :simple_lowercase_map, :simple_titlecase_map)
+      class << self
+        def for_code_point(code_point)
+          blocks = TwitterCldr.get_resource("unicode_data", "blocks")
+          #Find the target block
+          target = blocks.find do |block_name, range|
+            range.include? code_point.to_i(16)
+          end
+          if target
+            block_data = TwitterCldr.get_resource("unicode_data", target.first)
+            code_point_data = block_data.fetch(code_point.to_sym) { |code_point_sym| get_range_start(code_point_sym, block_data) }
+            Attributes.new(*code_point_data) if code_point_data
+          end
+        end
+        private
+        # Check if block constitutes a range. The code point beginning a range will have a name enclosed in <>, ending with 'First'
+        # eg: <CJK Ideograph Extension A, First>
+        # http://unicode.org/reports/tr44/#Code_Point_Ranges
+        def get_range_start(code_point, block_data)
+          start_code_point = block_data.keys.sort_by { |key| key.to_s.to_i(16) }.first
+          start_data = block_data[start_code_point].clone
+          if start_data[1] =~ /<.*, First>/
+            start_data[0] = code_point.to_s
+            start_data[1] = start_data[1].sub(', First', '')
+            start_data
+          end
+        end
+      end
+    end
+  end
+end

data/lib/tokenizers/base.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class Base

data/lib/tokenizers/calendars/date_tokenizer.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class DateTokenizer < TwitterCldr::Tokenizers::DateTimeTokenizer

data/lib/tokenizers/calendars/datetime_tokenizer.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class DateTimeTokenizer < Base
@@ -32,7 +35,7 @@ module TwitterCldr
       protected
       def init_resources
-        @resource = TwitterCldr.resources.resource_for(@locale, "calendars")[TwitterCldr.convert_locale(@locale)]
+        @resource = TwitterCldr.get_resource(@locale, "calendars")[TwitterCldr.convert_locale(@locale)]
       end
       def init_placeholders

data/lib/tokenizers/calendars/time_tokenizer.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class TimeTokenizer < TwitterCldr::Tokenizers::DateTimeTokenizer

data/lib/tokenizers/key_path.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class KeyPath

data/lib/tokenizers/numbers/number_tokenizer.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class NumberTokenizer < Base
@@ -40,7 +43,7 @@ module TwitterCldr
       end
       def init_resources
-        @resource = TwitterCldr.resources.resource_for(@locale, "numbers")[TwitterCldr.convert_locale(@locale)]
+        @resource = TwitterCldr.get_resource(@locale, "numbers")[TwitterCldr.convert_locale(@locale)]
       end
       def pattern_for(resource)

data/lib/tokenizers/token.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
   module Tokenizers
     class Token

data/lib/twitter_cldr.rb CHANGED Viewed

@@ -1,5 +1,8 @@
 # encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 $:.push(File.dirname(__FILE__))
 $KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
@@ -7,6 +10,7 @@ $KCODE = 'UTF-8' unless RUBY_VERSION >= '1.9.0'
 require 'yaml'
 require 'date'
 require 'time'
+require 'forwardable'
 require 'version'
@@ -20,12 +24,18 @@ require 'ext/numbers/bignum'
 require 'ext/numbers/fixnum'
 require 'ext/numbers/float'
 require 'ext/strings/symbol'
+require 'ext/strings/string'
+require 'utils'
 # manages access to CLDR resources (yaml files in resources dir)
 require 'shared/resources'
 module TwitterCldr
+  extend SingleForwardable
   DEFAULT_LOCALE = :en
   RESOURCE_DIR = File.join(File.dirname(File.dirname(File.expand_path(__FILE__))), "resources")
@@ -36,51 +46,59 @@ module TwitterCldr
   @@resources = TwitterCldr::Shared::Resources.new
-  def self.get_resource_file(locale, resource)
-    File.join(RESOURCE_DIR, self.convert_locale(locale).to_s, "#{resource}.yml")
-  end
+  def_delegator :resources, :resource_for, :get_resource
-  def self.resources
-    @@resources
-  end
+  class << self
-  def self.get_locale
-    if defined?(FastGettext)
-      locale = FastGettext.locale
-      locale = DEFAULT_LOCALE if locale.to_s.empty?
-    else
-      locale = DEFAULT_LOCALE
+    def get_resource_file(locale, resource)
+      File.join(RESOURCE_DIR, convert_locale(locale).to_s, "#{resource}.yml")
     end
-    (self.supported_locale?(locale) ? locale : DEFAULT_LOCALE).to_sym
-  end
+    def resources
+      @@resources
+    end
-  def self.convert_locale(locale)
-    locale = locale.to_sym
-    TWITTER_LOCALE_MAP.include?(locale) ? TWITTER_LOCALE_MAP[locale] : locale
-  end
+    def get_locale
+      if defined?(FastGettext)
+        locale = FastGettext.locale
+        locale = DEFAULT_LOCALE if locale.to_s.empty?
+      else
+        locale = DEFAULT_LOCALE
+      end
-  def self.supported_locales
-    unless defined?(@@supported_locales)
-      rejectable = [:shared]
-      @@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
-        File.basename(file).to_sym
-      end.reject { |file| rejectable.include?(file) }
+      (supported_locale?(locale) ? locale : DEFAULT_LOCALE).to_sym
     end
-    @@supported_locales
-  end
+    def convert_locale(locale)
+      locale = locale.to_sym
+      TWITTER_LOCALE_MAP.include?(locale) ? TWITTER_LOCALE_MAP[locale] : locale
+    end
+    def supported_locales
+      unless defined?(@@supported_locales)
+        rejectable = [:shared]
+        @@supported_locales = Dir.glob(File.join(File.dirname(File.dirname(__FILE__)), "resources/*")).map do |file|
+          File.basename(file).to_sym
+        end.reject { |file| rejectable.include?(file) }
+      end
+      @@supported_locales
+    end
+    def supported_locale?(locale)
+      locale = locale.to_sym
+      supported_locales.include?(locale) || supported_locales.include?(convert_locale(locale))
+    end
-  def self.supported_locale?(locale)
-    locale = locale.to_sym
-    self.supported_locales.include?(locale) || self.supported_locales.include?(self.convert_locale(locale))
   end
 end
 # other shared libraries (most access shared resource data in resources/shared)
 require 'shared/currencies'
 require 'shared/languages'
+require 'shared/unicode_data'
 # all tokenizers
 require 'tokenizers/base'
@@ -100,9 +118,14 @@ require 'formatters/numbers/number_formatter'
 require 'formatters/numbers/decimal_formatter'
 require 'formatters/numbers/currency_formatter'
 require 'formatters/numbers/percent_formatter'
+require 'formatters/plurals/plural_formatter'
 require 'formatters/plurals/rules'
 # formatter helpers
 require 'formatters/numbers/helpers/base'
 require 'formatters/numbers/helpers/fraction'
 require 'formatters/numbers/helpers/integer'
+# all normalizers
+require 'normalizers/base'
+require 'normalizers/canonical/nfd'

data/lib/utils/interpolation.rb ADDED Viewed

@@ -0,0 +1,105 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+# The implementation of the TwitterCldr.interpolate method that backports String interpolation capabilities
+# (originally implemented in String#% method) from Ruby 1.9 to Ruby 1.8 is heavily influenced by the
+# implementation of the same feature in i18n (https://github.com/svenfuchs/i18n/blob/89ea337f48562370988421e50caa7c2fe89452c7/lib/i18n/core_ext/string/interpolate.rb)
+# and gettext (https://github.com/mutoh/gettext/blob/11b8c1525ba9f00afb1942f7ebf34bec12f7558b/lib/gettext/core_ext/string.rb) gems.
+#
+# See NOTICE file for corresponding license agreements.
+# KeyError is raised during interpolation when there is a placeholder that doesn't have corresponding key in the
+# interpolation hash. KeyError is defined in 1.9. We define it for prior versions of Ruby to have the same behavior.
+#
+class KeyError < IndexError
+  def initialize(message = nil)
+    super(message || 'key not found')
+  end
+end unless defined?(KeyError)
+module TwitterCldr
+  module Utils
+    HASH_INTERPOLATION_REGEXP = Regexp.union(
+        /%\{(\w+)\}/,
+        /%<(\w+)>(.*?\d*\.?\d*[bBdiouxXeEfgGcps])/
+    )
+    HASH_INTERPOLATION_WITH_ESCAPE_REGEXP = Regexp.union(
+        /%%/,
+        HASH_INTERPOLATION_REGEXP
+    )
+    class << self
+      # Uses +string+ as a format specification and returns the result of applying it to +args+.
+      #
+      # There are three ways to use it:
+      #
+      # * Using a single argument or Array of arguments.
+      #
+      #   This is the default behaviour of the String#% method. See Kernel#sprintf for more details about the format
+      #   specification.
+      #
+      #   Example:
+      #
+      #     TwitterCldr::Utils.interpolate('%d %s', [1, 'message'])
+      #     # => "1 message"
+      #
+      # * Using a Hash as an argument and unformatted, named placeholders (Ruby 1.9 syntax).
+      #
+      #   When you pass a Hash as an argument and specify placeholders with %{foo} it will interpret the hash values as
+      #   named arguments.
+      #
+      #   Example:
+      #
+      #     TwitterCldr::Utils.interpolate('%{firstname}, %{lastname}', :firstname => 'Masao', :lastname => 'Mutoh')
+      #     # => "Masao Mutoh"
+      #
+      # * Using a Hash as an argument and formatted, named placeholders (Ruby 1.9 syntax).
+      #
+      #   When you pass a Hash as an argument and specify placeholders with %<foo>d  it will interpret the hash values
+      #   as named arguments and format the value according to the formatting instruction appended to the closing >.
+      #
+      #   Example:
+      #
+      #     TwitterCldr::Utils.interpolate('%<integer>d, %<float>.1f', :integer => 10, :float => 43.4)
+      #     # => "10, 43.3"
+      #
+      # An exception can be thrown in two cases when Ruby 1.9 interpolation syntax is used:
+      #
+      # * ArgumentError is thrown if Ruby 1.9. interpolation syntax is used in +string+, but +args+ is not a Hash;
+      # * KeyError is thrown if the value for one of the placeholders in +string+ is missing in +args+ hash.
+      #
+      def interpolate(string, args)
+        string =~ HASH_INTERPOLATION_REGEXP ? interpolate_hash(string, args) : interpolate_value_or_array(string, args)
+      end
+      private
+      def interpolate_hash(string, args)
+        raise ArgumentError.new('expected a Hash') unless args.is_a?(Hash)
+        string.gsub(HASH_INTERPOLATION_WITH_ESCAPE_REGEXP) do |match|
+          if match == '%%'
+            '%'
+          else
+            key = ($1 || $2).to_sym
+            raise KeyError unless args.has_key?(key)
+            $3 ? sprintf("%#{$3}", args[key]) : args[key]
+          end
+        end
+      end
+      def interpolate_value_or_array(string, args)
+        string.gsub(/%([{<])/, '%%\1') % args
+      end
+    end
+  end
+end

data/lib/utils.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+require 'utils/interpolation'
+module TwitterCldr
+  module Utils
+    class << self
+      # adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
+      def deep_symbolize_keys(arg)
+        case arg
+          when Array
+            arg.map { |elem| deep_symbolize_keys(elem) }
+          when Hash
+            Hash[arg.map { |k, v| [k.is_a?(String) ? k.to_sym : k, deep_symbolize_keys(v)] }]
+          else
+            arg
+        end
+      end
+    end
+  end
+end

data/lib/version.rb CHANGED Viewed

@@ -1,3 +1,8 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
 module TwitterCldr
-  VERSION = "1.0.1"
+  VERSION = "1.1.0"
 end