RubyGems - twitter_cldr - Versions diffs - 6.9.0 → 6.10.0 - Mend

twitter_cldr 6.9.0 → 6.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

checksums.yaml +4 -4
data/README.md +31 -28
data/lib/twitter_cldr/resources/calendars_importer.rb +48 -46
data/lib/twitter_cldr/resources/cldr_data_builder.rb +36 -0
data/lib/twitter_cldr/resources/cldr_document_set.rb +149 -0
data/lib/twitter_cldr/resources/cldr_dtd.rb +110 -0
data/lib/twitter_cldr/resources/cldr_locale.rb +78 -0
data/lib/twitter_cldr/resources/list_formats_importer.rb +1 -1
data/lib/twitter_cldr/resources/number_formats_importer.rb +1 -1
data/lib/twitter_cldr/resources/readme_renderer.rb +6 -2
data/lib/twitter_cldr/resources/requirements/cldr_requirement.rb +12 -61
data/lib/twitter_cldr/resources/territories_importer.rb +1 -1
data/lib/twitter_cldr/resources/timezones_importer.rb +1 -1
data/lib/twitter_cldr/resources/unicode_property_aliases_importer.rb +2 -0
data/lib/twitter_cldr/resources/units_importer.rb +1 -1
data/lib/twitter_cldr/resources.rb +8 -3
data/lib/twitter_cldr/shared/calendar.rb +1 -1
data/lib/twitter_cldr/version.rb +1 -1
data/resources/locales/ar/calendars.yml +2 -1
data/resources/locales/az/calendars.yml +2 -1
data/resources/locales/be/calendars.yml +2 -1
data/resources/locales/bg/calendars.yml +2 -1
data/resources/locales/bn/calendars.yml +2 -1
data/resources/locales/bo/calendars.yml +34 -2
data/resources/locales/bs/calendars.yml +1 -0
data/resources/locales/cy/calendars.yml +1 -0
data/resources/locales/el/calendars.yml +2 -1
data/resources/locales/en/calendars.yml +15 -0
data/resources/locales/en-001/calendars.yml +6 -0
data/resources/locales/en-150/calendars.yml +6 -0
data/resources/locales/en-AU/calendars.yml +4 -0
data/resources/locales/en-CA/calendars.yml +11 -5
data/resources/locales/en-GB/calendars.yml +3 -0
data/resources/locales/en-IE/calendars.yml +6 -0
data/resources/locales/en-IN/calendars.yml +6 -0
data/resources/locales/en-NZ/calendars.yml +6 -0
data/resources/locales/en-SG/calendars.yml +6 -0
data/resources/locales/en-US/calendars.yml +15 -0
data/resources/locales/en-ZA/calendars.yml +6 -0
data/resources/locales/eo/calendars.yml +27 -0
data/resources/locales/es/calendars.yml +2 -1
data/resources/locales/es-419/calendars.yml +2 -1
data/resources/locales/es-AR/calendars.yml +2 -1
data/resources/locales/es-CO/calendars.yml +2 -1
data/resources/locales/es-MX/calendars.yml +2 -1
data/resources/locales/es-US/calendars.yml +2 -1
data/resources/locales/fil/calendars.yml +4 -3
data/resources/locales/gl/calendars.yml +2 -1
data/resources/locales/hi/calendars.yml +2 -1
data/resources/locales/hy/calendars.yml +4 -3
data/resources/locales/ka/calendars.yml +2 -1
data/resources/locales/kk/calendars.yml +2 -1
data/resources/locales/km/calendars.yml +2 -1
data/resources/locales/kn/calendars.yml +2 -1
data/resources/locales/ko/calendars.yml +2 -1
data/resources/locales/lo/calendars.yml +2 -1
data/resources/locales/mk/calendars.yml +1 -1
data/resources/locales/mr/calendars.yml +2 -1
data/resources/locales/mt/calendars.yml +2 -1
data/resources/locales/my/calendars.yml +2 -1
data/resources/locales/pl/calendars.yml +2 -1
data/resources/locales/pt/calendars.yml +2 -1
data/resources/locales/pt-PT/calendars.yml +2 -1
data/resources/locales/sk/calendars.yml +2 -1
data/resources/locales/sl/calendars.yml +2 -1
data/resources/locales/sr/calendars.yml +2 -0
data/resources/locales/sr-Cyrl-ME/calendars.yml +3 -1
data/resources/locales/sr-Latn-ME/calendars.yml +3 -1
data/resources/locales/sw/calendars.yml +2 -1
data/resources/locales/ta/calendars.yml +2 -1
data/resources/locales/th/calendars.yml +1 -0
data/resources/locales/tr/calendars.yml +2 -1
data/resources/locales/ur/calendars.yml +2 -1
data/resources/locales/xh/calendars.yml +32 -1
data/resources/locales/zu/calendars.yml +3 -1
data/spec/formatters/calendars/datetime_formatter_spec.rb +2 -2
data/spec/shared/calendar_spec.rb +8 -8
metadata +6 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c8aceda15c987295a62672bd4f7a78546b34eed2a83eb9178b58bd5a5efa49be
-  data.tar.gz: b179d63e855785bdd871b12dda0f7c0ecd67b91106510972d89f74ab93b8ec5d
+  metadata.gz: ce23d1d3e7d5428401b92b60c2570f8a2370e41beb32068fc0df7a6ce4959126
+  data.tar.gz: 595d913024e24dd3fd86426f12288c7040904f446291145e5ba92c0945a33d12
 SHA512:
-  metadata.gz: 92f18ec12c13f8ed66b23a9453c9b932e502738266957b326a62493ef1062878ae29932d16f4963658f0b30591528de8415293f1e1c90e394a3e2b4fba2b1244
-  data.tar.gz: b34f99bbb5e5e37d3c95d63ce56d47c097d2aac14730ed56d72fc2f5e3c5a0db26b1ff0d20ea599a200d4e75bb3c38cd22ecf5321ece0a606967f0f49290738d
+  metadata.gz: 402c8198faf70a1f72631e202278e2bf464a67c81d42dec357f0c474d078085054651b2bd166e1de6c1d078c83ddfbcc757de638affb3415e597d6283033465e
+  data.tar.gz: d06ad1e4fc3aac7d069d606b75c1bcde9261f53805d9441914a042a641ed83cd0e06caa8039e7f44308726f686cc02ebeb0aeda52a693d5dbbbd9da3c0b46694

data/README.md CHANGED Viewed

@@ -92,7 +92,7 @@ TwitterCLDR supports formatting numbers with an attached unit, for example "12 d
 ```ruby
 12.localize.to_unit.length_mile  # "12 miles"
-12.localize(:ru).to_unit.length_mile  # "12 миль"
+12.localize(:ru).to_unit.length_mile  # "12 милях"
 ```
 Units support a few different forms, long, short, and narrow:
@@ -177,8 +177,8 @@ For English (and other languages), you can also specify an ordinal spellout:
 ```ruby
 DateTime.now.localize(:es).to_full_s               # "viernes, 14 de febrero de 2014, 12:20:05 (tiempo universal coordinado)"
 DateTime.now.localize(:es).to_long_s               # "14 de febrero de 2014, 12:20:05 UTC"
-DateTime.now.localize(:es).to_medium_s             # "14 feb. 2014 12:20:05"
-DateTime.now.localize(:es).to_short_s              # "14/2/14 12:20"
+DateTime.now.localize(:es).to_medium_s             # "14 feb 2014, 12:20:05"
+DateTime.now.localize(:es).to_short_s              # "14/2/14, 12:20"
 Time.now.localize(:es).to_full_s                   # "12:20:05 (tiempo universal coordinado)"
 Time.now.localize(:es).to_long_s                   # "12:20:05 UTC"
@@ -187,7 +187,7 @@ Time.now.localize(:es).to_short_s                  # "12:20"
 DateTime.now.localize(:es).to_date.to_full_s       # "viernes, 14 de febrero de 2014"
 DateTime.now.localize(:es).to_date.to_long_s       # "14 de febrero de 2014"
-DateTime.now.localize(:es).to_date.to_medium_s     # "14 feb. 2014"
+DateTime.now.localize(:es).to_date.to_medium_s     # "14 feb 2014"
 DateTime.now.localize(:es).to_date.to_short_s      # "14/2/14"
 ```
@@ -239,6 +239,7 @@ It's important to know that, even though any given format may not be available a
 | GyMMM      | Feb 2014 CE            |
 | GyMMMEd    | Fri, Feb 14, 2014 CE   |
 | GyMMMd     | Feb 14, 2014 CE        |
+| GyMd       | 2/14/2014 Common Era   |
 | H          | 12                     |
 | Hm         | 12:20                  |
 | Hms        | 12:20:05               |
@@ -246,7 +247,7 @@ It's important to know that, even though any given format may not be available a
 | Hmv        | 12:20 GMT              |
 | M          | 2                      |
 | MEd        | Fri, 2/14              |
-| MMM        | Feb                    |
+| MMM        | M02                    |
 | MMMEd      | Fri, Feb 14            |
 | MMMMW      | week 3 of February     |
 | MMMMd      | February 14            |
@@ -366,6 +367,8 @@ tz.display_name_for(DateTime.new(2019, 11, 5), :generic_location)
 tz.display_name_for(DateTime.new(2019, 11, 5), :generic_long)
 ```
+`#display_name_for` also accepts arguments for resolving ambiguous times. See [TZInfo Documentation](https://www.rubydoc.info/gems/tzinfo/TZInfo/Timezone#period_for_local-instance_method) for more information.
 ### Calendar Data
 CLDR contains a trove of calendar data, much of which can be accessed. One example is names of months, days, years.
@@ -416,8 +419,8 @@ Behind the scenes, these convenience methods use the `TwitterCldr::Formatters::P
 TwitterCldr::Formatters::Plurals::Rules.all                # [:one, :other]
 # get all rules for a specific locale
-TwitterCldr::Formatters::Plurals::Rules.all_for(:es)       # [:one, :other]
-TwitterCldr::Formatters::Plurals::Rules.all_for(:ru)       # [:few, :many, :one, :other]
+TwitterCldr::Formatters::Plurals::Rules.all_for(:es)       # [:one, :many, :other]
+TwitterCldr::Formatters::Plurals::Rules.all_for(:ru)       # [:one, :few, :many, :other]
 # get the rule for a number in a specific locale
 TwitterCldr::Formatters::Plurals::Rules.rule_for(1, :ru)   # :one
@@ -498,21 +501,21 @@ In addition to translating language codes, TwitterCLDR provides access to the fu
 ```ruby
 # get all languages for the default locale
-TwitterCldr::Shared::Languages.all                                                  # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Mandarin Chinese" ... }
+TwitterCldr::Shared::Languages.all                                                  # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Chinese" ... }
 # get all languages for a specific locale
-TwitterCldr::Shared::Languages.all_for(:es)                                         # { ... :vi => "vietnamita", :"zh-Hant" => "chino mandarín tradicional" ... }
+TwitterCldr::Shared::Languages.all_for(:es)                                         # { ... :vi => "vietnamita", :"zh-Hant" => "chino tradicional" ... }
 # get a language by its code for the default locale
-TwitterCldr::Shared::Languages.from_code(:'zh-Hant')                                # "Traditional Mandarin Chinese"
+TwitterCldr::Shared::Languages.from_code(:'zh-Hant')                                # "Traditional Chinese"
 # get a language from its code for a specific locale
-TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es)                # "chino mandarín tradicional"
+TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es)                # "chino tradicional"
 # translate a language from one locale to another
 # signature: translate_language(lang, source_locale, destination_locale)
-TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en)    # "Traditional Mandarin Chinese"
-TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es)  # "chino mandarín tradicional"
+TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en)    # "Traditional Chinese"
+TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es)  # "chino tradicional"
 ```
 ### World Territories
@@ -558,20 +561,20 @@ The CLDR contains postal code validation regexes for a number of countries.
 ```ruby
 # United States
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
 postal_code.valid?("94103")     # true
 postal_code.valid?("9410")      # false
 # England (Great Britain)
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
 postal_code.valid?("BS98 1TL")  # true
 # Sweden
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
 postal_code.valid?("280 12")    # true
 # Canada
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
 postal_code.valid?("V3H 1Z7")   # true
 ```
@@ -579,7 +582,7 @@ Match all valid postal codes in a string with the `#find_all` method:
 ```ruby
 # United States
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
 postal_code.find_all("12345 23456")    # ["12345", "23456"]
 ```
@@ -592,14 +595,14 @@ TwitterCldr::Shared::PostalCodes.territories  # [:ac, :ad, :af, :ai, :al, ... ]
 Just want the regex?  No problem:
 ```ruby
-postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
+postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
 postal_code.regexp  # /(\d{5})(?:[ \-](\d{4}))?/
 ```
 Get a sample of valid postal codes with the `#sample` method:
 ```ruby
-postal_code.sample(5)  # ["29294", "22486-2369", "76632", "40800-9860", "06727-6194"]
+postal_code.sample(5)  # ["60668-3382", "36022", "22364-5670", "32142-1738", "32633-0502"]
 ```
 ### Phone Codes
@@ -1014,17 +1017,17 @@ The Psych gem that is the default YAML engine in Ruby 1.9 doesn't handle Unicode
 You can make use of TwitterCLDR's YAML dumper by calling `localize` and then `to_yaml` on an `Array`, `Hash`, or `String`:
 ```ruby
-{ :hello => "world" }.localize.to_yaml
-["hello", "world"].localize.to_yaml
-"hello, world".localize.to_yaml
+{ :hello => "world" }.localize.to_yaml
+["hello", "world"].localize.to_yaml
+"hello, world".localize.to_yaml
 ```
 Behind the scenes, these convenience methods are using the `TwitterCldr::Shared::YAML` class.  You can do the same thing if you're feeling adventurous:
 ```ruby
-TwitterCldr::Shared::YAML.dump({ :hello => "world" })
-TwitterCldr::Shared::YAML.dump(["hello", "world"])
-TwitterCldr::Shared::YAML.dump("hello, world")
+TwitterCldr::Shared::YAML.dump({ :hello => "world" })
+TwitterCldr::Shared::YAML.dump(["hello", "world"])
+TwitterCldr::Shared::YAML.dump("hello, world")
 ```
 ## Adding New Locales
@@ -1070,7 +1073,7 @@ TwitterCldr.locale    # will return :ru
 ## Compatibility
-TwitterCLDR is fully compatible with Ruby 1.9.3, 2.0.0, 2.2.0.
+TwitterCLDR is fully compatible with Ruby 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0.
 ## Requirements
@@ -1103,6 +1106,6 @@ TwitterCLDR currently supports localization of certain textual objects in JavaSc
 ## License
-Copyright 2019 Twitter, Inc.
+Copyright 2021 Twitter, Inc.
 Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0

data/lib/twitter_cldr/resources/calendars_importer.rb CHANGED Viewed

@@ -36,11 +36,12 @@ module TwitterCldr
       end
       def import_locale(locale)
-        data = requirements[:cldr].merge_each_ancestor(locale) do |ancestor_locale|
+        data = requirements[:cldr].build_data(locale) do |ancestor_locale|
           GregorianCalendar.new(ancestor_locale, requirements[:cldr]).to_h
         end
         output_file = File.join(output_path, locale.to_s, 'calendars.yml')
+        FileUtils.mkdir_p(File.dirname(output_file))
         File.open(output_file, 'w:utf-8') do |output|
           output.write(
@@ -60,6 +61,8 @@ module TwitterCldr
     class GregorianCalendar
+      ERA_TAGS = ['eraNames', 'eraAbbr', 'eraNarrow'].freeze
       attr_reader :locale, :cldr_req
       def initialize(locale, cldr_req)
@@ -71,8 +74,8 @@ module TwitterCldr
         {
           calendars: {
             gregorian: {
-              months:   contexts('month'),
               days:     contexts('day'),
+              months:   contexts('month'),
               eras:     eras,
               quarters: contexts('quarter'),
               periods:  contexts('dayPeriod', group: "alt"),
@@ -91,42 +94,39 @@ module TwitterCldr
       private
       def calendar
-        @calendar ||= doc.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
+        @calendar ||= docset.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
       end
       def contexts(kind, options = {})
         return {} unless calendar
-        calendar.xpath("#{kind}s/#{kind}Context").each_with_object({}) do |node, result|
-          context = node.attribute('type').value.to_sym
+        dtd.find_attr("#{kind}Context", 'type').values.each_with_object({}) do |context, result|
+          node = calendar.xpath("#{kind}s/#{kind}Context[@type='#{context}']").first
+          next unless node
           result[context] = widths(node, kind, context, options)
         end
       end
       def widths(node, kind, context, options = {})
-        node.xpath("#{kind}Width").each_with_object({}) do |node, result|
-          width = node.attribute('type').value.to_sym
-          result[width] = elements(node, kind, context, width, options)
+        dtd.find_attr("#{kind}Width", 'type').values.each_with_object({}) do |width, result|
+          width_node = node.xpath("#{kind}Width[@type='#{width}']").first
+          next unless width_node
+          result[width] = elements(width_node, kind, context, width, options)
         end
       end
       def elements(node, kind, context, width, options = {})
-        aliased = node.xpath('alias').first
-        if aliased
-          alias_path = "#{node.path}/#{aliased.attribute('path').value}"
-          elements(doc.xpath(alias_path).first, kind, context, width, options)
-        else
-          node.xpath(kind).each_with_object({}) do |node, result|
-            key = node.attribute('type').value
-            key = key =~ /^\d*$/ ? key.to_i : key.to_sym
-            if options[:group] && found_group = node.attribute(options[:group])
-              result[found_group.value] ||= {}
-              result[found_group.value][key] = node.content
-            else
-              result[key] = node.content
-            end
+        node.xpath(kind).each_with_object({}) do |node, result|
+          key = node.attribute('type').value
+          key = key =~ /^\d*$/ ? key.to_i : key.to_sym
+          if options[:group] && found_group = node.attribute(options[:group])
+            result[found_group.value] ||= {}
+            result[found_group.value][key] = node.content
+          else
+            result[key] = node.content
           end
         end
       end
@@ -144,15 +144,14 @@ module TwitterCldr
       def eras
         return {} unless calendar
-        base_path = "#{calendar.path}/eras"
-        keys = doc.xpath("#{base_path}/*").map { |node| node.name }
+        ERA_TAGS.each_with_object({}) do |era_tag, result|
+          key  = era_tag.gsub('era', '').gsub(/s$/, '').downcase.to_sym
+          path = "eras/#{era_tag}"
-        keys.each_with_object({}) do |name, result|
-          path = "#{base_path}/#{name}/*"
-          key  = name.gsub('era', '').gsub(/s$/, '').downcase.to_sym
-          result[key] = doc.xpath(path).each_with_object({}) do |node, ret|
-            type = node.attribute('type').value.to_i rescue 0
-            ret[type] = node.content
+          result[key] = dtd.find_attr('era', 'type').values.each_with_object({}) do |type, ret|
+            node = calendar.xpath("#{path}/era[@type='#{type}' and @alt='variant']").first ||
+              calendar.xpath("#{path}/era[@type='#{type}']").first
+            ret[type] = node.content if node
             ret
           end
         end
@@ -161,22 +160,24 @@ module TwitterCldr
       def formats(type)
         return {} unless calendar
-        formats = calendar.xpath("#{type}Formats/#{type}FormatLength").each_with_object({}) do |node, result|
-          key = node.attribute('type').value.to_sym rescue :format
-          result[key] = pattern(node, type)
+        formats = dtd.find_attr("#{type}FormatLength", 'type').values.each_with_object({}) do |format_length, result|
+          node = calendar.xpath("#{type}Formats/#{type}FormatLength[@type='#{format_length}']").first
+          result[format_length] = pattern(node, type) if node
         end
         if default = default_format(type)
           formats = default.merge(formats)
         end
         formats
       end
       def additional_formats
         return {} unless calendar
-        calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem").each_with_object({}) do |node, result|
-          key = node.attribute('id').value
-          result[key] = node.content
+        dtd.find_attr('dateFormatItem', 'id').values.each_with_object({}) do |id, result|
+          node = calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem[@id='#{id}']").first
+          result[id] = node.content if node
         end
       end
@@ -196,24 +197,25 @@ module TwitterCldr
       end
       def fields
-        doc.xpath("//ldml/dates/fields/field").each_with_object({}) do |node, result|
-          key  = node.attribute('type').value.to_sym
+        dtd.find_attr('field', 'type').values.each_with_object({}) do |field, result|
+          node = docset.xpath("//ldml/dates/fields/field[@type='#{field}']").first
           name = node.xpath('displayName').first
-          result[key] = name.content if name
+          result[field] = name.content if name
         end
       end
-      def doc
-        @doc ||= begin
-          locale_fs = locale.to_s.gsub('-', '_')
-          Nokogiri.XML(File.read(File.join(cldr_main_path, "#{locale_fs}.xml")))
-        end
+      def docset
+        @docset ||= cldr_req.docset(cldr_main_path, locale)
       end
       def cldr_main_path
         @cldr_main_path ||= File.join(cldr_req.common_path, 'main')
       end
+      def dtd
+        cldr_req.dtd
+      end
     end
   end

data/lib/twitter_cldr/resources/cldr_data_builder.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+require 'nokogiri'
+module TwitterCldr
+  module Resources
+    class CldrDataBuilder
+      DEEP_MERGER = proc do |key, v1, v2|
+        Hash === v1 && Hash === v2 ? v1.merge(v2, &DEEP_MERGER) : (v2 || v1)
+      end
+      attr_reader :cldr_locale
+      def initialize(cldr_locale)
+        @cldr_locale = cldr_locale
+      end
+      def merge_each_ancestor
+        cldr_locale.ancestors.inject({}) do |result, ancestor_locale|
+          deep_merge(yield(ancestor_locale), result)
+        end
+      end
+      private
+      def deep_merge(h1, h2)
+        h1.merge(h2, &DEEP_MERGER)
+      end
+    end
+  end
+end

data/lib/twitter_cldr/resources/cldr_document_set.rb ADDED Viewed

@@ -0,0 +1,149 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+require 'forwardable'
+require 'nokogiri'
+module TwitterCldr
+  module Resources
+    class CldrDocumentSet
+      class Element
+        extend Forwardable
+        def_delegators :@element, :attribute, :content, :name, :path
+        attr_reader :docset, :element
+        def initialize(docset, element)
+          @docset = docset
+          @element = element
+        end
+        def xpath(path)
+          path = CldrDocumentSet.join_xpaths(docset.path_for(element), path)
+          docset.xpath(path)
+        end
+      end
+      class ElementList
+        include Enumerable
+        extend Forwardable
+        def_delegators :@elements, :size
+        attr_reader :docset, :elements
+        def initialize(docset, elements)
+          @docset = docset
+          @elements = elements
+        end
+        def [](idx)
+          return unless elements[idx]
+          Element.new(docset, elements[idx])
+        end
+        def first
+          self[0]
+        end
+        def each
+          return to_enum(__method__) unless block_given?
+          elements.size.times do |idx|
+            yield self[idx]
+          end
+        end
+      end
+      def self.join_xpaths(*paths)
+        paths.map { |a| a.chomp('/') }.join('/')
+      end
+      attr_reader :path, :cldr_locale, :cldr_requirement
+      def initialize(path, cldr_locale, cldr_requirement)
+        @path = path
+        @cldr_locale = cldr_locale
+        @cldr_requirement = cldr_requirement
+      end
+      def xpath(path)
+        cldr_locale.ancestors.each do |ancestor_locale|
+          data = doc_for(ancestor_locale).xpath(path)
+          unless data.empty?
+            return ElementList.new(self, resolve_aliases_in(data))
+          end
+        end
+        ElementList.new(self, [])
+      end
+      def path_for(node)
+        orig_node = node
+        path = []
+        while node
+          path << selector_for(node)
+          node = node.parent
+          break if node.name == 'document'
+        end
+        "//#{path.reverse.join('/')}"
+      end
+      private
+      def resolve_aliases_in(data)
+        alias_nodes = data.xpath('.//alias')
+        alias_nodes.each do |alias_node|
+          alias_path = alias_node.attribute('path').value
+          full_path = join_xpaths(path_for(alias_node.parent), alias_path)
+          cldr_locale.ancestors.find do |ancestor_locale|
+            resolved_node = doc_for(ancestor_locale).xpath(full_path).first.dup
+            if resolved_node
+              resolved_copy = Nokogiri::XML(resolved_node.to_xml).children.first
+              parent = alias_node.parent
+              alias_node.replace(resolved_copy.children)
+              resolve_aliases_in(parent)
+              break
+            end
+          end
+        end
+        data
+      end
+      def join_xpaths(*paths)
+        self.class.join_xpaths(*paths)
+      end
+      def selector_for(node)
+        node.name.dup.tap do |selector|
+          if type = node.attribute('type')
+            selector << "[@type='#{type.value}']"
+          end
+        end
+      end
+      def doc_for(locale)
+        locale_fs = locale.to_s.gsub('-', '_')
+        docs[locale_fs] ||= Nokogiri.XML(File.read(File.join(path, "#{locale_fs}.xml")))
+      end
+      def docs
+        @docs ||= {}
+      end
+    end
+  end
+end

data/lib/twitter_cldr/resources/cldr_dtd.rb ADDED Viewed

@@ -0,0 +1,110 @@
+# encoding: UTF-8
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+module TwitterCldr
+  module Resources
+    class CldrDTD
+      class Attr
+        attr_reader :name, :element_name, :dtd
+        def initialize(name, element_name, dtd)
+          @name = name
+          @element_name = element_name
+          @dtd = dtd
+        end
+        def values
+          @values ||= begin
+            attr_line_idx = schema.find_index do |line|
+              line.include?("<!ATTLIST #{element_name} #{name} ")
+            end
+            return [] unless attr_line_idx
+            attr_line = schema[attr_line_idx]
+            if comment = find_match_comment_after(attr_line_idx + 1)
+              parse_match(comment)
+            else
+              start_idx = attr_line.index('(')
+              return [] unless start_idx
+              finish_idx = attr_line.rindex(')')
+              attr_line[(start_idx + 1)...finish_idx].split('|').map(&:strip)
+            end
+          end
+        end
+        private
+        def find_match_comment_after(idx)
+          loop do
+            return nil if idx > schema.size
+            if schema[idx].strip.start_with?('<!--@MATCH')
+              break
+            elsif schema[idx].strip.start_with?('<!--')
+              idx += 1
+            else
+              return nil
+            end
+          end
+          schema[idx]
+        end
+        def parse_match(str)
+          m = str.match(/<!--@MATCH:([^\/]+)\/(.*)-->/)
+          return [] unless m
+          type, args = m.captures
+          case type
+            when 'literal'
+              args.split(',').map(&:strip)
+            when 'range'
+              start, finish = args.split('~')
+              ((start.to_i)..(finish.to_i)).to_a
+          end
+        end
+        def schema
+          dtd.schema
+        end
+      end
+      attr_reader :cldr_requirement
+      def initialize(cldr_requirement)
+        @cldr_requirement = cldr_requirement
+      end
+      def find_attr(element_name, attr_name)
+        elements[element_name] ||= {}
+        elements[element_name][attr_name] ||= Attr.new(
+          attr_name, element_name, self
+        )
+      end
+      def schema
+        @schema ||= File.read(schema_path).split("\n")
+      end
+      private
+      def elements
+        @elements ||= {}
+      end
+      def schema_path
+        @schema_path ||= File.join(
+          cldr_requirement.common_path, 'dtd', 'ldml.dtd'
+        )
+      end
+    end
+  end
+end