twitter_cldr 6.9.0 → 6.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +31 -28
  3. data/lib/twitter_cldr/resources/calendars_importer.rb +48 -46
  4. data/lib/twitter_cldr/resources/cldr_data_builder.rb +36 -0
  5. data/lib/twitter_cldr/resources/cldr_document_set.rb +149 -0
  6. data/lib/twitter_cldr/resources/cldr_dtd.rb +110 -0
  7. data/lib/twitter_cldr/resources/cldr_locale.rb +78 -0
  8. data/lib/twitter_cldr/resources/list_formats_importer.rb +1 -1
  9. data/lib/twitter_cldr/resources/number_formats_importer.rb +1 -1
  10. data/lib/twitter_cldr/resources/readme_renderer.rb +6 -2
  11. data/lib/twitter_cldr/resources/requirements/cldr_requirement.rb +12 -61
  12. data/lib/twitter_cldr/resources/territories_importer.rb +1 -1
  13. data/lib/twitter_cldr/resources/timezones_importer.rb +1 -1
  14. data/lib/twitter_cldr/resources/unicode_property_aliases_importer.rb +2 -0
  15. data/lib/twitter_cldr/resources/units_importer.rb +1 -1
  16. data/lib/twitter_cldr/resources.rb +8 -3
  17. data/lib/twitter_cldr/shared/calendar.rb +1 -1
  18. data/lib/twitter_cldr/version.rb +1 -1
  19. data/resources/locales/ar/calendars.yml +2 -1
  20. data/resources/locales/az/calendars.yml +2 -1
  21. data/resources/locales/be/calendars.yml +2 -1
  22. data/resources/locales/bg/calendars.yml +2 -1
  23. data/resources/locales/bn/calendars.yml +2 -1
  24. data/resources/locales/bo/calendars.yml +34 -2
  25. data/resources/locales/bs/calendars.yml +1 -0
  26. data/resources/locales/cy/calendars.yml +1 -0
  27. data/resources/locales/el/calendars.yml +2 -1
  28. data/resources/locales/en/calendars.yml +15 -0
  29. data/resources/locales/en-001/calendars.yml +6 -0
  30. data/resources/locales/en-150/calendars.yml +6 -0
  31. data/resources/locales/en-AU/calendars.yml +4 -0
  32. data/resources/locales/en-CA/calendars.yml +11 -5
  33. data/resources/locales/en-GB/calendars.yml +3 -0
  34. data/resources/locales/en-IE/calendars.yml +6 -0
  35. data/resources/locales/en-IN/calendars.yml +6 -0
  36. data/resources/locales/en-NZ/calendars.yml +6 -0
  37. data/resources/locales/en-SG/calendars.yml +6 -0
  38. data/resources/locales/en-US/calendars.yml +15 -0
  39. data/resources/locales/en-ZA/calendars.yml +6 -0
  40. data/resources/locales/eo/calendars.yml +27 -0
  41. data/resources/locales/es/calendars.yml +2 -1
  42. data/resources/locales/es-419/calendars.yml +2 -1
  43. data/resources/locales/es-AR/calendars.yml +2 -1
  44. data/resources/locales/es-CO/calendars.yml +2 -1
  45. data/resources/locales/es-MX/calendars.yml +2 -1
  46. data/resources/locales/es-US/calendars.yml +2 -1
  47. data/resources/locales/fil/calendars.yml +4 -3
  48. data/resources/locales/gl/calendars.yml +2 -1
  49. data/resources/locales/hi/calendars.yml +2 -1
  50. data/resources/locales/hy/calendars.yml +4 -3
  51. data/resources/locales/ka/calendars.yml +2 -1
  52. data/resources/locales/kk/calendars.yml +2 -1
  53. data/resources/locales/km/calendars.yml +2 -1
  54. data/resources/locales/kn/calendars.yml +2 -1
  55. data/resources/locales/ko/calendars.yml +2 -1
  56. data/resources/locales/lo/calendars.yml +2 -1
  57. data/resources/locales/mk/calendars.yml +1 -1
  58. data/resources/locales/mr/calendars.yml +2 -1
  59. data/resources/locales/mt/calendars.yml +2 -1
  60. data/resources/locales/my/calendars.yml +2 -1
  61. data/resources/locales/pl/calendars.yml +2 -1
  62. data/resources/locales/pt/calendars.yml +2 -1
  63. data/resources/locales/pt-PT/calendars.yml +2 -1
  64. data/resources/locales/sk/calendars.yml +2 -1
  65. data/resources/locales/sl/calendars.yml +2 -1
  66. data/resources/locales/sr/calendars.yml +2 -0
  67. data/resources/locales/sr-Cyrl-ME/calendars.yml +3 -1
  68. data/resources/locales/sr-Latn-ME/calendars.yml +3 -1
  69. data/resources/locales/sw/calendars.yml +2 -1
  70. data/resources/locales/ta/calendars.yml +2 -1
  71. data/resources/locales/th/calendars.yml +1 -0
  72. data/resources/locales/tr/calendars.yml +2 -1
  73. data/resources/locales/ur/calendars.yml +2 -1
  74. data/resources/locales/xh/calendars.yml +32 -1
  75. data/resources/locales/zu/calendars.yml +3 -1
  76. data/spec/formatters/calendars/datetime_formatter_spec.rb +2 -2
  77. data/spec/shared/calendar_spec.rb +8 -8
  78. metadata +6 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c8aceda15c987295a62672bd4f7a78546b34eed2a83eb9178b58bd5a5efa49be
4
- data.tar.gz: b179d63e855785bdd871b12dda0f7c0ecd67b91106510972d89f74ab93b8ec5d
3
+ metadata.gz: ce23d1d3e7d5428401b92b60c2570f8a2370e41beb32068fc0df7a6ce4959126
4
+ data.tar.gz: 595d913024e24dd3fd86426f12288c7040904f446291145e5ba92c0945a33d12
5
5
  SHA512:
6
- metadata.gz: 92f18ec12c13f8ed66b23a9453c9b932e502738266957b326a62493ef1062878ae29932d16f4963658f0b30591528de8415293f1e1c90e394a3e2b4fba2b1244
7
- data.tar.gz: b34f99bbb5e5e37d3c95d63ce56d47c097d2aac14730ed56d72fc2f5e3c5a0db26b1ff0d20ea599a200d4e75bb3c38cd22ecf5321ece0a606967f0f49290738d
6
+ metadata.gz: 402c8198faf70a1f72631e202278e2bf464a67c81d42dec357f0c474d078085054651b2bd166e1de6c1d078c83ddfbcc757de638affb3415e597d6283033465e
7
+ data.tar.gz: d06ad1e4fc3aac7d069d606b75c1bcde9261f53805d9441914a042a641ed83cd0e06caa8039e7f44308726f686cc02ebeb0aeda52a693d5dbbbd9da3c0b46694
data/README.md CHANGED
@@ -92,7 +92,7 @@ TwitterCLDR supports formatting numbers with an attached unit, for example "12 d
92
92
 
93
93
  ```ruby
94
94
  12.localize.to_unit.length_mile # "12 miles"
95
- 12.localize(:ru).to_unit.length_mile # "12 миль"
95
+ 12.localize(:ru).to_unit.length_mile # "12 милях"
96
96
  ```
97
97
  Units support a few different forms, long, short, and narrow:
98
98
 
@@ -177,8 +177,8 @@ For English (and other languages), you can also specify an ordinal spellout:
177
177
  ```ruby
178
178
  DateTime.now.localize(:es).to_full_s # "viernes, 14 de febrero de 2014, 12:20:05 (tiempo universal coordinado)"
179
179
  DateTime.now.localize(:es).to_long_s # "14 de febrero de 2014, 12:20:05 UTC"
180
- DateTime.now.localize(:es).to_medium_s # "14 feb. 2014 12:20:05"
181
- DateTime.now.localize(:es).to_short_s # "14/2/14 12:20"
180
+ DateTime.now.localize(:es).to_medium_s # "14 feb 2014, 12:20:05"
181
+ DateTime.now.localize(:es).to_short_s # "14/2/14, 12:20"
182
182
 
183
183
  Time.now.localize(:es).to_full_s # "12:20:05 (tiempo universal coordinado)"
184
184
  Time.now.localize(:es).to_long_s # "12:20:05 UTC"
@@ -187,7 +187,7 @@ Time.now.localize(:es).to_short_s # "12:20"
187
187
 
188
188
  DateTime.now.localize(:es).to_date.to_full_s # "viernes, 14 de febrero de 2014"
189
189
  DateTime.now.localize(:es).to_date.to_long_s # "14 de febrero de 2014"
190
- DateTime.now.localize(:es).to_date.to_medium_s # "14 feb. 2014"
190
+ DateTime.now.localize(:es).to_date.to_medium_s # "14 feb 2014"
191
191
  DateTime.now.localize(:es).to_date.to_short_s # "14/2/14"
192
192
  ```
193
193
 
@@ -239,6 +239,7 @@ It's important to know that, even though any given format may not be available a
239
239
  | GyMMM | Feb 2014 CE |
240
240
  | GyMMMEd | Fri, Feb 14, 2014 CE |
241
241
  | GyMMMd | Feb 14, 2014 CE |
242
+ | GyMd | 2/14/2014 Common Era |
242
243
  | H | 12 |
243
244
  | Hm | 12:20 |
244
245
  | Hms | 12:20:05 |
@@ -246,7 +247,7 @@ It's important to know that, even though any given format may not be available a
246
247
  | Hmv | 12:20 GMT |
247
248
  | M | 2 |
248
249
  | MEd | Fri, 2/14 |
249
- | MMM | Feb |
250
+ | MMM | M02 |
250
251
  | MMMEd | Fri, Feb 14 |
251
252
  | MMMMW | week 3 of February |
252
253
  | MMMMd | February 14 |
@@ -366,6 +367,8 @@ tz.display_name_for(DateTime.new(2019, 11, 5), :generic_location)
366
367
  tz.display_name_for(DateTime.new(2019, 11, 5), :generic_long)
367
368
  ```
368
369
 
370
+ `#display_name_for` also accepts arguments for resolving ambiguous times. See [TZInfo Documentation](https://www.rubydoc.info/gems/tzinfo/TZInfo/Timezone#period_for_local-instance_method) for more information.
371
+
369
372
  ### Calendar Data
370
373
 
371
374
  CLDR contains a trove of calendar data, much of which can be accessed. One example is names of months, days, years.
@@ -416,8 +419,8 @@ Behind the scenes, these convenience methods use the `TwitterCldr::Formatters::P
416
419
  TwitterCldr::Formatters::Plurals::Rules.all # [:one, :other]
417
420
 
418
421
  # get all rules for a specific locale
419
- TwitterCldr::Formatters::Plurals::Rules.all_for(:es) # [:one, :other]
420
- TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:few, :many, :one, :other]
422
+ TwitterCldr::Formatters::Plurals::Rules.all_for(:es) # [:one, :many, :other]
423
+ TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:one, :few, :many, :other]
421
424
 
422
425
  # get the rule for a number in a specific locale
423
426
  TwitterCldr::Formatters::Plurals::Rules.rule_for(1, :ru) # :one
@@ -498,21 +501,21 @@ In addition to translating language codes, TwitterCLDR provides access to the fu
498
501
 
499
502
  ```ruby
500
503
  # get all languages for the default locale
501
- TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Mandarin Chinese" ... }
504
+ TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Chinese" ... }
502
505
 
503
506
  # get all languages for a specific locale
504
- TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino mandarín tradicional" ... }
507
+ TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino tradicional" ... }
505
508
 
506
509
  # get a language by its code for the default locale
507
- TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Mandarin Chinese"
510
+ TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Chinese"
508
511
 
509
512
  # get a language from its code for a specific locale
510
- TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es) # "chino mandarín tradicional"
513
+ TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es) # "chino tradicional"
511
514
 
512
515
  # translate a language from one locale to another
513
516
  # signature: translate_language(lang, source_locale, destination_locale)
514
- TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en) # "Traditional Mandarin Chinese"
515
- TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es) # "chino mandarín tradicional"
517
+ TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en) # "Traditional Chinese"
518
+ TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es) # "chino tradicional"
516
519
  ```
517
520
 
518
521
  ### World Territories
@@ -558,20 +561,20 @@ The CLDR contains postal code validation regexes for a number of countries.
558
561
 
559
562
  ```ruby
560
563
  # United States
561
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
564
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
562
565
  postal_code.valid?("94103") # true
563
566
  postal_code.valid?("9410") # false
564
567
 
565
568
  # England (Great Britain)
566
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
569
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
567
570
  postal_code.valid?("BS98 1TL") # true
568
571
 
569
572
  # Sweden
570
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
573
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
571
574
  postal_code.valid?("280 12") # true
572
575
 
573
576
  # Canada
574
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
577
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
575
578
  postal_code.valid?("V3H 1Z7") # true
576
579
  ```
577
580
 
@@ -579,7 +582,7 @@ Match all valid postal codes in a string with the `#find_all` method:
579
582
 
580
583
  ```ruby
581
584
  # United States
582
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
585
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
583
586
  postal_code.find_all("12345 23456") # ["12345", "23456"]
584
587
  ```
585
588
 
@@ -592,14 +595,14 @@ TwitterCldr::Shared::PostalCodes.territories # [:ac, :ad, :af, :ai, :al, ... ]
592
595
  Just want the regex? No problem:
593
596
 
594
597
  ```ruby
595
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
598
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
596
599
  postal_code.regexp # /(\d{5})(?:[ \-](\d{4}))?/
597
600
  ```
598
601
 
599
602
  Get a sample of valid postal codes with the `#sample` method:
600
603
 
601
604
  ```ruby
602
- postal_code.sample(5) # ["29294", "22486-2369", "76632", "40800-9860", "06727-6194"]
605
+ postal_code.sample(5) # ["60668-3382", "36022", "22364-5670", "32142-1738", "32633-0502"]
603
606
  ```
604
607
 
605
608
  ### Phone Codes
@@ -1014,17 +1017,17 @@ The Psych gem that is the default YAML engine in Ruby 1.9 doesn't handle Unicode
1014
1017
  You can make use of TwitterCLDR's YAML dumper by calling `localize` and then `to_yaml` on an `Array`, `Hash`, or `String`:
1015
1018
 
1016
1019
  ```ruby
1017
- { :hello => "world" }.localize.to_yaml
1018
- ["hello", "world"].localize.to_yaml
1019
- "hello, world".localize.to_yaml
1020
+ { :hello => "world" }.localize.to_yaml
1021
+ ["hello", "world"].localize.to_yaml
1022
+ "hello, world".localize.to_yaml
1020
1023
  ```
1021
1024
 
1022
1025
  Behind the scenes, these convenience methods are using the `TwitterCldr::Shared::YAML` class. You can do the same thing if you're feeling adventurous:
1023
1026
 
1024
1027
  ```ruby
1025
- TwitterCldr::Shared::YAML.dump({ :hello => "world" })
1026
- TwitterCldr::Shared::YAML.dump(["hello", "world"])
1027
- TwitterCldr::Shared::YAML.dump("hello, world")
1028
+ TwitterCldr::Shared::YAML.dump({ :hello => "world" })
1029
+ TwitterCldr::Shared::YAML.dump(["hello", "world"])
1030
+ TwitterCldr::Shared::YAML.dump("hello, world")
1028
1031
  ```
1029
1032
 
1030
1033
  ## Adding New Locales
@@ -1070,7 +1073,7 @@ TwitterCldr.locale # will return :ru
1070
1073
 
1071
1074
  ## Compatibility
1072
1075
 
1073
- TwitterCLDR is fully compatible with Ruby 1.9.3, 2.0.0, 2.2.0.
1076
+ TwitterCLDR is fully compatible with Ruby 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0.
1074
1077
 
1075
1078
  ## Requirements
1076
1079
 
@@ -1103,6 +1106,6 @@ TwitterCLDR currently supports localization of certain textual objects in JavaSc
1103
1106
 
1104
1107
  ## License
1105
1108
 
1106
- Copyright 2019 Twitter, Inc.
1109
+ Copyright 2021 Twitter, Inc.
1107
1110
 
1108
1111
  Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
@@ -36,11 +36,12 @@ module TwitterCldr
36
36
  end
37
37
 
38
38
  def import_locale(locale)
39
- data = requirements[:cldr].merge_each_ancestor(locale) do |ancestor_locale|
39
+ data = requirements[:cldr].build_data(locale) do |ancestor_locale|
40
40
  GregorianCalendar.new(ancestor_locale, requirements[:cldr]).to_h
41
41
  end
42
42
 
43
43
  output_file = File.join(output_path, locale.to_s, 'calendars.yml')
44
+ FileUtils.mkdir_p(File.dirname(output_file))
44
45
 
45
46
  File.open(output_file, 'w:utf-8') do |output|
46
47
  output.write(
@@ -60,6 +61,8 @@ module TwitterCldr
60
61
 
61
62
 
62
63
  class GregorianCalendar
64
+ ERA_TAGS = ['eraNames', 'eraAbbr', 'eraNarrow'].freeze
65
+
63
66
  attr_reader :locale, :cldr_req
64
67
 
65
68
  def initialize(locale, cldr_req)
@@ -71,8 +74,8 @@ module TwitterCldr
71
74
  {
72
75
  calendars: {
73
76
  gregorian: {
74
- months: contexts('month'),
75
77
  days: contexts('day'),
78
+ months: contexts('month'),
76
79
  eras: eras,
77
80
  quarters: contexts('quarter'),
78
81
  periods: contexts('dayPeriod', group: "alt"),
@@ -91,42 +94,39 @@ module TwitterCldr
91
94
  private
92
95
 
93
96
  def calendar
94
- @calendar ||= doc.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
97
+ @calendar ||= docset.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
95
98
  end
96
99
 
97
100
  def contexts(kind, options = {})
98
101
  return {} unless calendar
99
102
 
100
- calendar.xpath("#{kind}s/#{kind}Context").each_with_object({}) do |node, result|
101
- context = node.attribute('type').value.to_sym
103
+ dtd.find_attr("#{kind}Context", 'type').values.each_with_object({}) do |context, result|
104
+ node = calendar.xpath("#{kind}s/#{kind}Context[@type='#{context}']").first
105
+ next unless node
106
+
102
107
  result[context] = widths(node, kind, context, options)
103
108
  end
104
109
  end
105
110
 
106
111
  def widths(node, kind, context, options = {})
107
- node.xpath("#{kind}Width").each_with_object({}) do |node, result|
108
- width = node.attribute('type').value.to_sym
109
- result[width] = elements(node, kind, context, width, options)
112
+ dtd.find_attr("#{kind}Width", 'type').values.each_with_object({}) do |width, result|
113
+ width_node = node.xpath("#{kind}Width[@type='#{width}']").first
114
+ next unless width_node
115
+
116
+ result[width] = elements(width_node, kind, context, width, options)
110
117
  end
111
118
  end
112
119
 
113
120
  def elements(node, kind, context, width, options = {})
114
- aliased = node.xpath('alias').first
115
-
116
- if aliased
117
- alias_path = "#{node.path}/#{aliased.attribute('path').value}"
118
- elements(doc.xpath(alias_path).first, kind, context, width, options)
119
- else
120
- node.xpath(kind).each_with_object({}) do |node, result|
121
- key = node.attribute('type').value
122
- key = key =~ /^\d*$/ ? key.to_i : key.to_sym
123
-
124
- if options[:group] && found_group = node.attribute(options[:group])
125
- result[found_group.value] ||= {}
126
- result[found_group.value][key] = node.content
127
- else
128
- result[key] = node.content
129
- end
121
+ node.xpath(kind).each_with_object({}) do |node, result|
122
+ key = node.attribute('type').value
123
+ key = key =~ /^\d*$/ ? key.to_i : key.to_sym
124
+
125
+ if options[:group] && found_group = node.attribute(options[:group])
126
+ result[found_group.value] ||= {}
127
+ result[found_group.value][key] = node.content
128
+ else
129
+ result[key] = node.content
130
130
  end
131
131
  end
132
132
  end
@@ -144,15 +144,14 @@ module TwitterCldr
144
144
  def eras
145
145
  return {} unless calendar
146
146
 
147
- base_path = "#{calendar.path}/eras"
148
- keys = doc.xpath("#{base_path}/*").map { |node| node.name }
147
+ ERA_TAGS.each_with_object({}) do |era_tag, result|
148
+ key = era_tag.gsub('era', '').gsub(/s$/, '').downcase.to_sym
149
+ path = "eras/#{era_tag}"
149
150
 
150
- keys.each_with_object({}) do |name, result|
151
- path = "#{base_path}/#{name}/*"
152
- key = name.gsub('era', '').gsub(/s$/, '').downcase.to_sym
153
- result[key] = doc.xpath(path).each_with_object({}) do |node, ret|
154
- type = node.attribute('type').value.to_i rescue 0
155
- ret[type] = node.content
151
+ result[key] = dtd.find_attr('era', 'type').values.each_with_object({}) do |type, ret|
152
+ node = calendar.xpath("#{path}/era[@type='#{type}' and @alt='variant']").first ||
153
+ calendar.xpath("#{path}/era[@type='#{type}']").first
154
+ ret[type] = node.content if node
156
155
  ret
157
156
  end
158
157
  end
@@ -161,22 +160,24 @@ module TwitterCldr
161
160
  def formats(type)
162
161
  return {} unless calendar
163
162
 
164
- formats = calendar.xpath("#{type}Formats/#{type}FormatLength").each_with_object({}) do |node, result|
165
- key = node.attribute('type').value.to_sym rescue :format
166
- result[key] = pattern(node, type)
163
+ formats = dtd.find_attr("#{type}FormatLength", 'type').values.each_with_object({}) do |format_length, result|
164
+ node = calendar.xpath("#{type}Formats/#{type}FormatLength[@type='#{format_length}']").first
165
+ result[format_length] = pattern(node, type) if node
167
166
  end
167
+
168
168
  if default = default_format(type)
169
169
  formats = default.merge(formats)
170
170
  end
171
+
171
172
  formats
172
173
  end
173
174
 
174
175
  def additional_formats
175
176
  return {} unless calendar
176
177
 
177
- calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem").each_with_object({}) do |node, result|
178
- key = node.attribute('id').value
179
- result[key] = node.content
178
+ dtd.find_attr('dateFormatItem', 'id').values.each_with_object({}) do |id, result|
179
+ node = calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem[@id='#{id}']").first
180
+ result[id] = node.content if node
180
181
  end
181
182
  end
182
183
 
@@ -196,24 +197,25 @@ module TwitterCldr
196
197
  end
197
198
 
198
199
  def fields
199
- doc.xpath("//ldml/dates/fields/field").each_with_object({}) do |node, result|
200
- key = node.attribute('type').value.to_sym
200
+ dtd.find_attr('field', 'type').values.each_with_object({}) do |field, result|
201
+ node = docset.xpath("//ldml/dates/fields/field[@type='#{field}']").first
201
202
  name = node.xpath('displayName').first
202
- result[key] = name.content if name
203
+ result[field] = name.content if name
203
204
  end
204
205
  end
205
206
 
206
- def doc
207
- @doc ||= begin
208
- locale_fs = locale.to_s.gsub('-', '_')
209
- Nokogiri.XML(File.read(File.join(cldr_main_path, "#{locale_fs}.xml")))
210
- end
207
+ def docset
208
+ @docset ||= cldr_req.docset(cldr_main_path, locale)
211
209
  end
212
210
 
213
211
  def cldr_main_path
214
212
  @cldr_main_path ||= File.join(cldr_req.common_path, 'main')
215
213
  end
216
214
 
215
+ def dtd
216
+ cldr_req.dtd
217
+ end
218
+
217
219
  end
218
220
 
219
221
  end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'nokogiri'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class CldrDataBuilder
12
+ DEEP_MERGER = proc do |key, v1, v2|
13
+ Hash === v1 && Hash === v2 ? v1.merge(v2, &DEEP_MERGER) : (v2 || v1)
14
+ end
15
+
16
+ attr_reader :cldr_locale
17
+
18
+ def initialize(cldr_locale)
19
+ @cldr_locale = cldr_locale
20
+ end
21
+
22
+ def merge_each_ancestor
23
+ cldr_locale.ancestors.inject({}) do |result, ancestor_locale|
24
+ deep_merge(yield(ancestor_locale), result)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def deep_merge(h1, h2)
31
+ h1.merge(h2, &DEEP_MERGER)
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,149 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'forwardable'
7
+ require 'nokogiri'
8
+
9
+ module TwitterCldr
10
+ module Resources
11
+ class CldrDocumentSet
12
+
13
+ class Element
14
+ extend Forwardable
15
+
16
+ def_delegators :@element, :attribute, :content, :name, :path
17
+
18
+ attr_reader :docset, :element
19
+
20
+ def initialize(docset, element)
21
+ @docset = docset
22
+ @element = element
23
+ end
24
+
25
+ def xpath(path)
26
+ path = CldrDocumentSet.join_xpaths(docset.path_for(element), path)
27
+ docset.xpath(path)
28
+ end
29
+ end
30
+
31
+ class ElementList
32
+ include Enumerable
33
+ extend Forwardable
34
+
35
+ def_delegators :@elements, :size
36
+
37
+ attr_reader :docset, :elements
38
+
39
+ def initialize(docset, elements)
40
+ @docset = docset
41
+ @elements = elements
42
+ end
43
+
44
+ def [](idx)
45
+ return unless elements[idx]
46
+
47
+ Element.new(docset, elements[idx])
48
+ end
49
+
50
+ def first
51
+ self[0]
52
+ end
53
+
54
+ def each
55
+ return to_enum(__method__) unless block_given?
56
+
57
+ elements.size.times do |idx|
58
+ yield self[idx]
59
+ end
60
+ end
61
+ end
62
+
63
+
64
+ def self.join_xpaths(*paths)
65
+ paths.map { |a| a.chomp('/') }.join('/')
66
+ end
67
+
68
+ attr_reader :path, :cldr_locale, :cldr_requirement
69
+
70
+ def initialize(path, cldr_locale, cldr_requirement)
71
+ @path = path
72
+ @cldr_locale = cldr_locale
73
+ @cldr_requirement = cldr_requirement
74
+ end
75
+
76
+
77
+ def xpath(path)
78
+ cldr_locale.ancestors.each do |ancestor_locale|
79
+ data = doc_for(ancestor_locale).xpath(path)
80
+
81
+ unless data.empty?
82
+ return ElementList.new(self, resolve_aliases_in(data))
83
+ end
84
+ end
85
+
86
+ ElementList.new(self, [])
87
+ end
88
+
89
+ def path_for(node)
90
+ orig_node = node
91
+ path = []
92
+
93
+ while node
94
+ path << selector_for(node)
95
+ node = node.parent
96
+ break if node.name == 'document'
97
+ end
98
+
99
+ "//#{path.reverse.join('/')}"
100
+ end
101
+
102
+ private
103
+
104
+ def resolve_aliases_in(data)
105
+ alias_nodes = data.xpath('.//alias')
106
+ alias_nodes.each do |alias_node|
107
+ alias_path = alias_node.attribute('path').value
108
+ full_path = join_xpaths(path_for(alias_node.parent), alias_path)
109
+
110
+ cldr_locale.ancestors.find do |ancestor_locale|
111
+ resolved_node = doc_for(ancestor_locale).xpath(full_path).first.dup
112
+
113
+ if resolved_node
114
+ resolved_copy = Nokogiri::XML(resolved_node.to_xml).children.first
115
+ parent = alias_node.parent
116
+ alias_node.replace(resolved_copy.children)
117
+ resolve_aliases_in(parent)
118
+ break
119
+ end
120
+ end
121
+ end
122
+
123
+ data
124
+ end
125
+
126
+ def join_xpaths(*paths)
127
+ self.class.join_xpaths(*paths)
128
+ end
129
+
130
+ def selector_for(node)
131
+ node.name.dup.tap do |selector|
132
+ if type = node.attribute('type')
133
+ selector << "[@type='#{type.value}']"
134
+ end
135
+ end
136
+ end
137
+
138
+ def doc_for(locale)
139
+ locale_fs = locale.to_s.gsub('-', '_')
140
+ docs[locale_fs] ||= Nokogiri.XML(File.read(File.join(path, "#{locale_fs}.xml")))
141
+ end
142
+
143
+ def docs
144
+ @docs ||= {}
145
+ end
146
+ end
147
+
148
+ end
149
+ end
@@ -0,0 +1,110 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+
9
+ class CldrDTD
10
+ class Attr
11
+ attr_reader :name, :element_name, :dtd
12
+
13
+ def initialize(name, element_name, dtd)
14
+ @name = name
15
+ @element_name = element_name
16
+ @dtd = dtd
17
+ end
18
+
19
+ def values
20
+ @values ||= begin
21
+ attr_line_idx = schema.find_index do |line|
22
+ line.include?("<!ATTLIST #{element_name} #{name} ")
23
+ end
24
+
25
+ return [] unless attr_line_idx
26
+
27
+ attr_line = schema[attr_line_idx]
28
+
29
+ if comment = find_match_comment_after(attr_line_idx + 1)
30
+ parse_match(comment)
31
+ else
32
+ start_idx = attr_line.index('(')
33
+ return [] unless start_idx
34
+
35
+ finish_idx = attr_line.rindex(')')
36
+ attr_line[(start_idx + 1)...finish_idx].split('|').map(&:strip)
37
+ end
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def find_match_comment_after(idx)
44
+ loop do
45
+ return nil if idx > schema.size
46
+
47
+ if schema[idx].strip.start_with?('<!--@MATCH')
48
+ break
49
+ elsif schema[idx].strip.start_with?('<!--')
50
+ idx += 1
51
+ else
52
+ return nil
53
+ end
54
+ end
55
+
56
+ schema[idx]
57
+ end
58
+
59
+ def parse_match(str)
60
+ m = str.match(/<!--@MATCH:([^\/]+)\/(.*)-->/)
61
+ return [] unless m
62
+
63
+ type, args = m.captures
64
+
65
+ case type
66
+ when 'literal'
67
+ args.split(',').map(&:strip)
68
+ when 'range'
69
+ start, finish = args.split('~')
70
+ ((start.to_i)..(finish.to_i)).to_a
71
+ end
72
+ end
73
+
74
+ def schema
75
+ dtd.schema
76
+ end
77
+ end
78
+
79
+ attr_reader :cldr_requirement
80
+
81
+ def initialize(cldr_requirement)
82
+ @cldr_requirement = cldr_requirement
83
+ end
84
+
85
+ def find_attr(element_name, attr_name)
86
+ elements[element_name] ||= {}
87
+ elements[element_name][attr_name] ||= Attr.new(
88
+ attr_name, element_name, self
89
+ )
90
+ end
91
+
92
+ def schema
93
+ @schema ||= File.read(schema_path).split("\n")
94
+ end
95
+
96
+ private
97
+
98
+ def elements
99
+ @elements ||= {}
100
+ end
101
+
102
+ def schema_path
103
+ @schema_path ||= File.join(
104
+ cldr_requirement.common_path, 'dtd', 'ldml.dtd'
105
+ )
106
+ end
107
+ end
108
+
109
+ end
110
+ end