RubyGems - twitter_cldr - Versions diffs - 1.7.0 → 1.8.0 - Mend

twitter_cldr 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

data/History.txt +6 -0
data/README.md +61 -5
data/Rakefile +64 -60
data/js/lib/twitter_cldr_js.rb +0 -2
data/lib/twitter_cldr/core_ext.rb +12 -12
data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +13 -11
data/lib/twitter_cldr/localized/localized_array.rb +33 -0
data/lib/twitter_cldr/localized/localized_date.rb +23 -0
data/lib/twitter_cldr/localized/localized_datetime.rb +63 -0
data/lib/twitter_cldr/localized/localized_number.rb +50 -0
data/lib/twitter_cldr/localized/localized_object.rb +38 -0
data/lib/twitter_cldr/localized/localized_string.rb +41 -0
data/lib/twitter_cldr/localized/localized_symbol.rb +20 -0
data/lib/twitter_cldr/localized/localized_time.rb +23 -0
data/lib/twitter_cldr/localized/localized_timespan.rb +26 -0
data/lib/twitter_cldr/localized.rb +18 -0
data/lib/twitter_cldr/normalization.rb +23 -0
data/lib/twitter_cldr/resources/{tries_dumper.rb → collation_tries_dumper.rb} +1 -1
data/lib/twitter_cldr/resources/composition_exclusions_importer.rb +1 -1
data/lib/twitter_cldr/resources/language_codes_importer.rb +232 -0
data/lib/twitter_cldr/resources/locales_resources_importer.rb +1 -1
data/lib/twitter_cldr/resources/phone_codes_importer.rb +1 -1
data/lib/twitter_cldr/resources/postal_codes_importer.rb +1 -1
data/lib/twitter_cldr/resources/tailoring_importer.rb +12 -3
data/lib/twitter_cldr/resources/unicode_data_importer.rb +3 -1
data/lib/twitter_cldr/resources.rb +2 -1
data/lib/twitter_cldr/shared/calendar.rb +2 -6
data/lib/twitter_cldr/shared/language_codes.rb +75 -0
data/lib/twitter_cldr/shared/languages.rb +4 -11
data/lib/twitter_cldr/shared.rb +8 -7
data/lib/twitter_cldr/tokenizers/base.rb +2 -8
data/lib/twitter_cldr/utils.rb +8 -0
data/lib/twitter_cldr/version.rb +1 -1
data/lib/twitter_cldr.rb +5 -4
data/resources/custom/locales/cs/units.yml +3 -3
data/resources/custom/locales/pl/units.yml +4 -4
data/resources/custom/locales/pt/units.yml +2 -2
data/resources/shared/language_codes_table.dump +0 -0
data/spec/core_ext_spec.rb +19 -0
data/spec/{core_ext/array_spec.rb → localized/localized_array_spec.rb} +1 -1
data/spec/{core_ext/calendars/date_spec.rb → localized/localized_date_spec.rb} +24 -44
data/spec/localized/localized_datetime_spec.rb +81 -0
data/spec/{core_ext/numbers → localized}/localized_number_spec.rb +34 -1
data/spec/localized/localized_object_spec.rb +89 -0
data/spec/{core_ext/string_spec.rb → localized/localized_string_spec.rb} +16 -33
data/spec/{core_ext/symbol_spec.rb → localized/localized_symbol_spec.rb} +3 -1
data/spec/localized/localized_time_spec.rb +70 -0
data/spec/normalization_spec.rb +42 -0
data/spec/readme_spec.rb +51 -5
data/spec/shared/language_codes_spec.rb +161 -0
data/spec/shared/phone_codes_spec.rb +2 -2
data/spec/shared/postal_codes_spec.rb +2 -2
data/spec/spec_helper.rb +2 -0
data/spec/tokenizers/base_spec.rb +15 -6
data/spec/utils_spec.rb +18 -2
data/twitter_cldr.gemspec +2 -1
metadata +28 -44
data/lib/twitter_cldr/core_ext/array.rb +0 -35
data/lib/twitter_cldr/core_ext/calendars/date.rb +0 -25
data/lib/twitter_cldr/core_ext/calendars/datetime.rb +0 -65
data/lib/twitter_cldr/core_ext/calendars/time.rb +0 -25
data/lib/twitter_cldr/core_ext/calendars/timespan.rb +0 -24
data/lib/twitter_cldr/core_ext/localized_object.rb +0 -25
data/lib/twitter_cldr/core_ext/numbers/bignum.rb +0 -8
data/lib/twitter_cldr/core_ext/numbers/fixnum.rb +0 -8
data/lib/twitter_cldr/core_ext/numbers/float.rb +0 -8
data/lib/twitter_cldr/core_ext/numbers/localized_number.rb +0 -54
data/lib/twitter_cldr/core_ext/string.rb +0 -51
data/lib/twitter_cldr/core_ext/symbol.rb +0 -22
data/spec/core_ext/calendars/datetime_spec.rb +0 -90
data/spec/core_ext/calendars/time_spec.rb +0 -90
data/spec/core_ext/calendars_spec.rb +0 -34
data/spec/core_ext/numbers/bignum_spec.rb +0 -25
data/spec/core_ext/numbers/fixnum_spec.rb +0 -25
data/spec/core_ext/numbers/float_spec.rb +0 -25
data/spec/core_ext/numbers_spec.rb +0 -39

data/lib/twitter_cldr/localized/localized_number.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedNumber < LocalizedObject
+      TYPES = [:decimal, :currency, :percent]
+      DEFAULT_TYPE = :decimal
+      attr_reader :type
+      def initialize(obj, locale, options = {})
+        @options = options.dup
+        @type = @options.delete(:type) || DEFAULT_TYPE
+        raise ArgumentError.new("type #{@type} is not supported") unless @type && TYPES.include?(@type.to_sym)
+        super(obj, locale, @options)
+      end
+      TYPES.each do |type|
+        define_method "to_#{type}" do
+          to_type(type)
+        end
+      end
+      def to_s(options = {})
+        @formatter.format(@base_obj, options)
+      end
+      def plural_rule
+        TwitterCldr::Formatters::Plurals::Rules.rule_for(@base_obj, @locale)
+      end
+      protected
+      def formatter_const
+        TwitterCldr::Formatters.const_get("#{@type.to_s.capitalize}Formatter")
+      end
+      def to_type(target_type)
+        self.class.new(@base_obj, @locale, @options.merge(:type => target_type))
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized/localized_object.rb ADDED Viewed

@@ -0,0 +1,38 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedObject
+      attr_reader :locale, :base_obj, :formatter
+      def initialize(obj, locale, options = {})
+        @base_obj = obj
+        @locale = TwitterCldr.convert_locale(locale)
+        @locale = TwitterCldr::DEFAULT_LOCALE unless TwitterCldr.supported_locale?(@locale)
+        options = options.dup
+        options[:locale] = @locale
+        @formatter = formatter_const.new(options) if formatter_const
+      end
+      def formatter_const
+        raise NotImplementedError
+      end
+      def self.localize(klass)
+        klass.class_eval <<-LOCALIZE, __FILE__, __LINE__ + 1
+          def localize(locale = TwitterCldr.get_locale, options = {})
+            #{self}.new(self, locale, options)
+          end
+        LOCALIZE
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized/localized_string.rb ADDED Viewed

@@ -0,0 +1,41 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedString < LocalizedObject
+      # Uses wrapped string object as a format specification and returns the result of applying it to +args+ (see
+      # +TwitterCldr::Utils.interpolate+ method for interpolation syntax).
+      #
+      # If +args+ is a Hash than pluralization is performed before interpolation (see +PluralFormatter+ class for
+      # pluralization specification).
+      #
+      def %(args)
+        pluralized = args.is_a?(Hash) ? @formatter.format(@base_obj, args) : @base_obj
+        TwitterCldr::Utils.interpolate(pluralized, args)
+      end
+      def formatter_const
+        TwitterCldr::Formatters::PluralFormatter
+      end
+      def normalize(options = {})
+        TwitterCldr::Normalization.normalize(@base_obj, options).localize(@locale)
+      end
+      def code_points
+        TwitterCldr::Utils::CodePoints.from_string(@base_obj)
+      end
+      def to_s
+        @base_obj.dup
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized/localized_symbol.rb ADDED Viewed

@@ -0,0 +1,20 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedSymbol < LocalizedObject
+      def as_language_code
+        TwitterCldr::Shared::Languages.from_code_for_locale(@base_obj, @locale)
+      end
+      def formatter_const
+        nil
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized/localized_time.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedTime < LocalizedDateTime
+      def to_datetime(date)
+        date_obj = date.is_a?(LocalizedDate) ? date.base_obj : date
+        LocalizedDateTime.new(DateTime.parse("#{date_obj.strftime("%Y-%m-%d")}T#{@base_obj.strftime("%H:%M:%S%z")}"), @locale, :calendar_type => @calendar_type)
+      end
+      protected
+      def formatter_const
+        TwitterCldr::Formatters::TimeFormatter
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized/localized_timespan.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    class LocalizedTimespan < LocalizedObject
+      def initialize(seconds, options = {})
+        super(seconds, options[:locale], options)
+      end
+      def to_s(options = {})
+        @formatter.format(@base_obj, options)
+      end
+      protected
+      def formatter_const
+        TwitterCldr::Formatters::TimespanFormatter
+      end
+    end
+  end
+end

data/lib/twitter_cldr/localized.rb ADDED Viewed

@@ -0,0 +1,18 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Localized
+    autoload :LocalizedArray,    'twitter_cldr/localized/localized_array'
+    autoload :LocalizedDate,     'twitter_cldr/localized/localized_date'
+    autoload :LocalizedDateTime, 'twitter_cldr/localized/localized_datetime'
+    autoload :LocalizedNumber,   'twitter_cldr/localized/localized_number'
+    autoload :LocalizedObject,   'twitter_cldr/localized/localized_object'
+    autoload :LocalizedString,   'twitter_cldr/localized/localized_string'
+    autoload :LocalizedSymbol,   'twitter_cldr/localized/localized_symbol'
+    autoload :LocalizedTime,     'twitter_cldr/localized/localized_time'
+    autoload :LocalizedTimespan, 'twitter_cldr/localized/localized_timespan'
+  end
+end

data/lib/twitter_cldr/normalization.rb CHANGED Viewed

@@ -11,5 +11,28 @@ module TwitterCldr
     autoload :NFD,    'twitter_cldr/normalization/nfd'
     autoload :NFKC,   'twitter_cldr/normalization/nfkc'
     autoload :NFKD,   'twitter_cldr/normalization/nfkd'
+    VALID_NORMALIZERS  = [:NFD, :NFKD, :NFC, :NFKC]
+    DEFAULT_NORMALIZER = :NFD
+    class << self
+      def normalize(string, options = {})
+        normalizer(options[:using] || DEFAULT_NORMALIZER).normalize(string)
+      end
+      private
+      def normalizer(normalizer_name)
+        const_name = normalizer_name.to_s.upcase.to_sym
+        if VALID_NORMALIZERS.include?(const_name)
+          const_get(const_name)
+        else
+          raise ArgumentError.new("#{normalizer_name.inspect} is not a valid normalizer (valid normalizers are #{VALID_NORMALIZERS.join(', ')})")
+        end
+      end
+    end
   end
 end

data/lib/twitter_cldr/resources/{tries_dumper.rb → collation_tries_dumper.rb} RENAMED Viewed

@@ -6,7 +6,7 @@
 module TwitterCldr
   module Resources
-    module TriesDumper
+    module CollationTriesDumper
       class << self

data/lib/twitter_cldr/resources/composition_exclusions_importer.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 # Copyright 2012 Twitter, Inc
 # http://www.apache.org/licenses/LICENSE-2.0
-require 'lib/twitter_cldr/resources/download'
+require 'twitter_cldr/resources/download'
 module TwitterCldr
   module Resources

data/lib/twitter_cldr/resources/language_codes_importer.rb ADDED Viewed

@@ -0,0 +1,232 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+require 'twitter_cldr/resources/download'
+module TwitterCldr
+  module Resources
+    class LanguageCodesImporter
+      BCP_47_FILE, ISO_639_FILE = %w[bcp-47.txt iso-639.txt]
+      INPUT_DATA = {
+          BCP_47_FILE  => 'http://www.iana.org/assignments/language-subtag-registry',
+          ISO_639_FILE => 'http://www.sil.org/iso639-3/iso-639-3_20120614.tab'
+      }
+      KEYS_TO_STANDARDS = {
+          :iso_639_1      => :iso_639_1,
+          :iso_639_2      => :iso_639_2,
+          :iso_639_2_term => :iso_639_2,
+          :iso_639_3      => :iso_639_3,
+          :bcp_47         => :bcp_47,
+          :bcp_47_alt     => :bcp_47
+      }
+      def initialize(input_path, output_path)
+        @input_path  = input_path
+        @output_path = output_path
+      end
+      def import(import_yaml = false)
+        prepare_data
+        import_data(import_yaml)
+      end
+      private
+      def prepare_data
+        INPUT_DATA.each do |file, url|
+          TwitterCldr::Resources.download_if_necessary(File.join(@input_path, file), url)
+        end
+      end
+      def import_data(import_yaml)
+        result = import_iso_639
+        result = import_bcp_47(result)
+        language_codes = Hash[result.inject({}) { |memo, (key, value)| memo[key] = Hash[value.sort]; memo }.sort]
+        language_codes_table = build_table(language_codes)
+        write('language_codes_table.dump', 'wb', Marshal.dump(language_codes_table))
+        if import_yaml
+          write('language_codes.yml', 'w:utf-8', YAML.dump(language_codes))
+          write('language_codes_table.yml', 'w:utf-8', YAML.dump(language_codes_table))
+        end
+      end
+      def write(file, mode, data)
+        File.open(File.join(@output_path, file), mode) { |output| output.write(data) }
+      end
+      # Generates codes in the following format:
+      #
+      # {
+      #   :Albanian => {
+      #     :iso_639_1      => "sq",
+      #     :iso_639_2      => "alb", # default (bibliographic) code
+      #     :iso_639_2_term => "sqi", # terminology code (optional)
+      #     :iso_639_3      => "sqi"
+      #   }
+      # }
+      #
+      def import_iso_639(result = {})
+        File.open(File.join(@input_path, ISO_639_FILE)) do |file|
+          lines = file.lines
+          lines.next # skip header
+          lines.each do |line|
+            entry = line.chomp.gsub(/"(.*)"/) { $1.gsub("\t", '') }
+            data = Hash[ISO_639_COLUMNS.zip(entry.split("\t"))]
+            # either bibliographic and terminology codes are the same (:bt_equiv is empty)
+            # or :iso_639_2 contains terminology code and :bt_equiv contains bibliographic code
+            # skip 'collection' scope
+            if (data[:bt_equiv].empty? || !data[:b_code].empty?) && data[:name] != 'Reserved for local use' && data[:scope] != 'C'
+              h = result[data[:name].to_sym] ||= {}
+              set_iso_639_data(h, :iso_639_1, data[:iso_639_1])
+              if data[:bt_equiv].empty?
+                set_iso_639_data(h, :iso_639_2, data[:iso_639_2])
+              else
+                set_iso_639_data(h, :iso_639_2, data[:bt_equiv])
+                set_iso_639_data(h, :iso_639_2_term, data[:iso_639_2])
+              end
+              set_iso_639_data(h, :iso_639_3, data[:iso_639_3])
+            end
+          end
+        end
+        result
+      end
+      def set_iso_639_data(data, key, value)
+        data[key] = value.to_sym unless value.nil? || value.empty?
+      end
+      # Generates codes in the following format:
+      #
+      # {
+      #   :Bangka => {
+      #       :bcp_47     => "mfb",   # preferred code
+      #       :bcp_47_alt => "ms-mfb" # alternative code (optional)
+      #   }
+      # }
+      def import_bcp_47(result = {})
+        File.open(File.join(@input_path, BCP_47_FILE)) do |file|
+          lines = file.lines
+          lines.next # skip header
+          data  = {}
+          entry = ''
+          lines.each do |line|
+            line.chomp!
+            if line == '%%'
+              process_bcp_47_entry(entry, data)
+              process_bcp_47_data(data, result)
+            else
+              if line.include?(':')
+                process_bcp_47_entry(entry, data)
+                entry = line
+              else
+                entry += line
+              end
+            end
+          end
+          process_bcp_47_entry(entry, data)
+          process_bcp_47_data(data, result)
+        end
+        result
+      end
+      def process_bcp_47_entry(entry, data)
+        return if entry.nil? || entry.empty?
+        key, value = entry.chomp.split(':', 2).map(&:strip)
+        if key == 'Description'
+          (data['names'] ||= []) << value.to_sym
+        else
+          data[key.downcase] = value
+        end
+        entry.clear
+      end
+      def process_bcp_47_data(data, result)
+        if !data.empty? && %w[language extlang].include?(data['type']) && !data['names'].include?('Private use') && data['scope'] != 'collection'
+          existing_names = data['names'].select { |name| result.has_key?(name) }
+          prefered    = data['preferred-value']
+          alternative = [data['prefix'], data['subtag']].compact.join('-')
+          bcp_47 = {}
+          bcp_47[:bcp_47]     = (prefered || alternative).to_sym
+          bcp_47[:bcp_47_alt] = alternative.to_sym if prefered
+          existing_names.each do |name|
+            result[name.to_sym].merge!(bcp_47)
+          end
+          bcp_47.merge!(result[existing_names.first]) unless existing_names.empty?
+          (data['names'] - existing_names).each do |name|
+            result[name.to_sym] = bcp_47.dup
+          end
+        end
+        data.clear
+      end
+      def build_table(language_codes_map)
+        # can't use Hash with default proc here, because we won't be able to marshal this hash later in this case
+        table = ([:name] + KEYS_TO_STANDARDS.values.uniq.sort_by(&:to_s)).inject({}) do |memo, key|
+          memo.merge!(key => {})
+        end
+        language_codes_map.each do |name, codes|
+          table[:name][name] = { :name => name }.merge(codes)
+        end
+        table[:name].values.each do |data|
+          KEYS_TO_STANDARDS.each do |key, standard|
+            table[standard][data[key].to_sym] = data if data[key]
+          end
+        end
+        table.each do |key, codes|
+          table[key] = Hash[codes.sort]
+        end
+      end
+      ISO_639_COLUMNS = [
+          :code,            # Code
+          :status,          # Status
+          :partner_agency,  # Partner Agency
+          :iso_639_3,       # 639_3
+          :iso_639_2,       # 639_2 (alpha-3 bibliographic/terminology code)
+          :b_code,          # alpha-3 bibliographic code if iso_639_2 contains terminology code
+          :bt_equiv,        # bt_equiv (alpha-3 bibliographic/terminology equivalent)
+          :iso_639_1,       # 639_1
+          :name,            # Reference_Name
+          :scope,           # Element_Scope
+          :type,            # Language_Type
+          :docs             # Documentation
+      ]
+    end
+  end
+end