twitter_cldr 6.8.0 → 6.11.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +31 -28
  3. data/lib/twitter_cldr/resources/calendars_importer.rb +48 -46
  4. data/lib/twitter_cldr/resources/cldr_data_builder.rb +36 -0
  5. data/lib/twitter_cldr/resources/cldr_document_set.rb +157 -0
  6. data/lib/twitter_cldr/resources/cldr_dtd.rb +110 -0
  7. data/lib/twitter_cldr/resources/cldr_locale.rb +78 -0
  8. data/lib/twitter_cldr/resources/list_formats_importer.rb +1 -1
  9. data/lib/twitter_cldr/resources/loader.rb +18 -1
  10. data/lib/twitter_cldr/resources/number_formats_importer.rb +1 -1
  11. data/lib/twitter_cldr/resources/readme_renderer.rb +6 -2
  12. data/lib/twitter_cldr/resources/requirements/cldr_requirement.rb +12 -61
  13. data/lib/twitter_cldr/resources/territories_importer.rb +1 -1
  14. data/lib/twitter_cldr/resources/timezones_importer.rb +1 -1
  15. data/lib/twitter_cldr/resources/unicode_property_aliases_importer.rb +2 -0
  16. data/lib/twitter_cldr/resources/units_importer.rb +1 -1
  17. data/lib/twitter_cldr/resources.rb +8 -3
  18. data/lib/twitter_cldr/shared/calendar.rb +1 -1
  19. data/lib/twitter_cldr/timezones/generic_location.rb +15 -15
  20. data/lib/twitter_cldr/timezones/gmt_location.rb +2 -2
  21. data/lib/twitter_cldr/timezones/iso8601_location.rb +2 -2
  22. data/lib/twitter_cldr/timezones/timezone.rb +11 -7
  23. data/lib/twitter_cldr/version.rb +1 -1
  24. data/resources/locales/ar/calendars.yml +2 -1
  25. data/resources/locales/az/calendars.yml +2 -1
  26. data/resources/locales/be/calendars.yml +2 -1
  27. data/resources/locales/bg/calendars.yml +2 -1
  28. data/resources/locales/bn/calendars.yml +2 -1
  29. data/resources/locales/bo/calendars.yml +68 -36
  30. data/resources/locales/bs/calendars.yml +1 -0
  31. data/resources/locales/cy/calendars.yml +1 -0
  32. data/resources/locales/el/calendars.yml +2 -1
  33. data/resources/locales/en/calendars.yml +75 -54
  34. data/resources/locales/en-001/calendars.yml +65 -53
  35. data/resources/locales/en-150/calendars.yml +65 -53
  36. data/resources/locales/en-AU/calendars.yml +4 -0
  37. data/resources/locales/en-CA/calendars.yml +66 -58
  38. data/resources/locales/en-GB/calendars.yml +7 -0
  39. data/resources/locales/en-IE/calendars.yml +65 -53
  40. data/resources/locales/en-IN/calendars.yml +65 -53
  41. data/resources/locales/en-NZ/calendars.yml +65 -53
  42. data/resources/locales/en-SG/calendars.yml +65 -53
  43. data/resources/locales/en-US/calendars.yml +75 -54
  44. data/resources/locales/en-ZA/calendars.yml +65 -53
  45. data/resources/locales/eo/calendars.yml +88 -61
  46. data/resources/locales/es/calendars.yml +2 -1
  47. data/resources/locales/es-419/calendars.yml +2 -1
  48. data/resources/locales/es-AR/calendars.yml +2 -1
  49. data/resources/locales/es-CO/calendars.yml +2 -1
  50. data/resources/locales/es-MX/calendars.yml +2 -1
  51. data/resources/locales/es-US/calendars.yml +2 -1
  52. data/resources/locales/fil/calendars.yml +4 -3
  53. data/resources/locales/ga/calendars.yml +4 -4
  54. data/resources/locales/gl/calendars.yml +2 -1
  55. data/resources/locales/hi/calendars.yml +2 -1
  56. data/resources/locales/hy/calendars.yml +4 -3
  57. data/resources/locales/ka/calendars.yml +2 -1
  58. data/resources/locales/kk/calendars.yml +2 -1
  59. data/resources/locales/km/calendars.yml +2 -1
  60. data/resources/locales/kn/calendars.yml +2 -1
  61. data/resources/locales/ko/calendars.yml +2 -1
  62. data/resources/locales/lo/calendars.yml +2 -1
  63. data/resources/locales/mk/calendars.yml +1 -1
  64. data/resources/locales/mr/calendars.yml +2 -1
  65. data/resources/locales/mt/calendars.yml +2 -1
  66. data/resources/locales/my/calendars.yml +2 -1
  67. data/resources/locales/pl/calendars.yml +2 -1
  68. data/resources/locales/pt/calendars.yml +35 -34
  69. data/resources/locales/pt-PT/calendars.yml +21 -20
  70. data/resources/locales/ru/calendars.yml +7 -7
  71. data/resources/locales/sk/calendars.yml +2 -1
  72. data/resources/locales/sl/calendars.yml +2 -1
  73. data/resources/locales/sr/calendars.yml +2 -0
  74. data/resources/locales/sr-Cyrl-ME/calendars.yml +3 -1
  75. data/resources/locales/sr-Latn-ME/calendars.yml +3 -1
  76. data/resources/locales/sw/calendars.yml +2 -1
  77. data/resources/locales/ta/calendars.yml +2 -1
  78. data/resources/locales/th/calendars.yml +1 -0
  79. data/resources/locales/tr/calendars.yml +2 -1
  80. data/resources/locales/ur/calendars.yml +2 -1
  81. data/resources/locales/xh/calendars.yml +46 -15
  82. data/resources/locales/zh-Hant/calendars.yml +4 -4
  83. data/resources/locales/zu/calendars.yml +3 -1
  84. data/spec/formatters/calendars/datetime_formatter_spec.rb +2 -2
  85. data/spec/localized/localized_array_spec.rb +3 -1
  86. data/spec/localized/localized_hash_spec.rb +1 -1
  87. data/spec/shared/calendar_spec.rb +25 -8
  88. data/spec/timezones/timezone_spec.rb +41 -0
  89. data/spec/utils/yaml/yaml_spec.rb +2 -3
  90. metadata +7 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: eb3f9e357d9106f3b87744f5a8203eda30efd0dac40a24dacfbfaca6122431bc
4
- data.tar.gz: a39e1373d65e2665124caedba55b039d62214950ced3db2e79636e38aef9e9e6
3
+ metadata.gz: 48841a5568664132e97c2e06a0ea9381e7099502d07da55a9b09ab9a60590297
4
+ data.tar.gz: bec1e296fb57a3b93b11eaff0bfa9d834cf44153fea3413202fa7ad3c8ac9b04
5
5
  SHA512:
6
- metadata.gz: 0e810877761c6898a5e5efb187c2f16a91536bf5e2bf784d802eb99ef9fe482140c79490f20e2c3748622147f340be3905cdc2cebc1c046e7eff9c5929130c8d
7
- data.tar.gz: 2e9d84af9731ba6ada0cd0fc63224a54fbdb4f7de6b3de1d0f4748d20f71c6f17b65b490b5d32b56e269273c5c05dc94fa704da8fe89af077a62e50aad562023
6
+ metadata.gz: 27b9c0be54620888b8c3fd6310b366b1c3eeaaa606a5e4164b1db5104d3d10be4d5933bd7a4b372b8552f89c0c8a5f41ed97d74c1531c1ffd1be0648be8cd9dd
7
+ data.tar.gz: '09ec1c6f77f31d3ba433ac1d9c6e6f119e39f70b51e44550b876c18371fe05699965f900b18c202ab8774af33702fa41eaa09f6c6134d546dd98beea7b7e60f4'
data/README.md CHANGED
@@ -92,7 +92,7 @@ TwitterCLDR supports formatting numbers with an attached unit, for example "12 d
92
92
 
93
93
  ```ruby
94
94
  12.localize.to_unit.length_mile # "12 miles"
95
- 12.localize(:ru).to_unit.length_mile # "12 миль"
95
+ 12.localize(:ru).to_unit.length_mile # "12 милях"
96
96
  ```
97
97
  Units support a few different forms, long, short, and narrow:
98
98
 
@@ -177,8 +177,8 @@ For English (and other languages), you can also specify an ordinal spellout:
177
177
  ```ruby
178
178
  DateTime.now.localize(:es).to_full_s # "viernes, 14 de febrero de 2014, 12:20:05 (tiempo universal coordinado)"
179
179
  DateTime.now.localize(:es).to_long_s # "14 de febrero de 2014, 12:20:05 UTC"
180
- DateTime.now.localize(:es).to_medium_s # "14 feb. 2014 12:20:05"
181
- DateTime.now.localize(:es).to_short_s # "14/2/14 12:20"
180
+ DateTime.now.localize(:es).to_medium_s # "14 feb 2014, 12:20:05"
181
+ DateTime.now.localize(:es).to_short_s # "14/2/14, 12:20"
182
182
 
183
183
  Time.now.localize(:es).to_full_s # "12:20:05 (tiempo universal coordinado)"
184
184
  Time.now.localize(:es).to_long_s # "12:20:05 UTC"
@@ -187,7 +187,7 @@ Time.now.localize(:es).to_short_s # "12:20"
187
187
 
188
188
  DateTime.now.localize(:es).to_date.to_full_s # "viernes, 14 de febrero de 2014"
189
189
  DateTime.now.localize(:es).to_date.to_long_s # "14 de febrero de 2014"
190
- DateTime.now.localize(:es).to_date.to_medium_s # "14 feb. 2014"
190
+ DateTime.now.localize(:es).to_date.to_medium_s # "14 feb 2014"
191
191
  DateTime.now.localize(:es).to_date.to_short_s # "14/2/14"
192
192
  ```
193
193
 
@@ -239,6 +239,7 @@ It's important to know that, even though any given format may not be available a
239
239
  | GyMMM | Feb 2014 CE |
240
240
  | GyMMMEd | Fri, Feb 14, 2014 CE |
241
241
  | GyMMMd | Feb 14, 2014 CE |
242
+ | GyMd | 2/14/2014 Common Era |
242
243
  | H | 12 |
243
244
  | Hm | 12:20 |
244
245
  | Hms | 12:20:05 |
@@ -246,7 +247,7 @@ It's important to know that, even though any given format may not be available a
246
247
  | Hmv | 12:20 GMT |
247
248
  | M | 2 |
248
249
  | MEd | Fri, 2/14 |
249
- | MMM | Feb |
250
+ | MMM | M02 |
250
251
  | MMMEd | Fri, Feb 14 |
251
252
  | MMMMW | week 3 of February |
252
253
  | MMMMd | February 14 |
@@ -366,6 +367,8 @@ tz.display_name_for(DateTime.new(2019, 11, 5), :generic_location)
366
367
  tz.display_name_for(DateTime.new(2019, 11, 5), :generic_long)
367
368
  ```
368
369
 
370
+ `#display_name_for` also accepts arguments for resolving ambiguous times. See [TZInfo Documentation](https://www.rubydoc.info/gems/tzinfo/TZInfo/Timezone#period_for_local-instance_method) for more information.
371
+
369
372
  ### Calendar Data
370
373
 
371
374
  CLDR contains a trove of calendar data, much of which can be accessed. One example is names of months, days, years.
@@ -416,8 +419,8 @@ Behind the scenes, these convenience methods use the `TwitterCldr::Formatters::P
416
419
  TwitterCldr::Formatters::Plurals::Rules.all # [:one, :other]
417
420
 
418
421
  # get all rules for a specific locale
419
- TwitterCldr::Formatters::Plurals::Rules.all_for(:es) # [:one, :other]
420
- TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:few, :many, :one, :other]
422
+ TwitterCldr::Formatters::Plurals::Rules.all_for(:es) # [:one, :many, :other]
423
+ TwitterCldr::Formatters::Plurals::Rules.all_for(:ru) # [:one, :few, :many, :other]
421
424
 
422
425
  # get the rule for a number in a specific locale
423
426
  TwitterCldr::Formatters::Plurals::Rules.rule_for(1, :ru) # :one
@@ -498,21 +501,21 @@ In addition to translating language codes, TwitterCLDR provides access to the fu
498
501
 
499
502
  ```ruby
500
503
  # get all languages for the default locale
501
- TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Mandarin Chinese" ... }
504
+ TwitterCldr::Shared::Languages.all # { ... :vi => "Vietnamese", :"zh-Hant" => "Traditional Chinese" ... }
502
505
 
503
506
  # get all languages for a specific locale
504
- TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino mandarín tradicional" ... }
507
+ TwitterCldr::Shared::Languages.all_for(:es) # { ... :vi => "vietnamita", :"zh-Hant" => "chino tradicional" ... }
505
508
 
506
509
  # get a language by its code for the default locale
507
- TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Mandarin Chinese"
510
+ TwitterCldr::Shared::Languages.from_code(:'zh-Hant') # "Traditional Chinese"
508
511
 
509
512
  # get a language from its code for a specific locale
510
- TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es) # "chino mandarín tradicional"
513
+ TwitterCldr::Shared::Languages.from_code_for_locale(:'zh-Hant', :es) # "chino tradicional"
511
514
 
512
515
  # translate a language from one locale to another
513
516
  # signature: translate_language(lang, source_locale, destination_locale)
514
- TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en) # "Traditional Mandarin Chinese"
515
- TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es) # "chino mandarín tradicional"
517
+ TwitterCldr::Shared::Languages.translate_language("chino tradicional", :es, :en) # "Traditional Chinese"
518
+ TwitterCldr::Shared::Languages.translate_language("Traditional Chinese", :en, :es) # "chino tradicional"
516
519
  ```
517
520
 
518
521
  ### World Territories
@@ -558,20 +561,20 @@ The CLDR contains postal code validation regexes for a number of countries.
558
561
 
559
562
  ```ruby
560
563
  # United States
561
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
564
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
562
565
  postal_code.valid?("94103") # true
563
566
  postal_code.valid?("9410") # false
564
567
 
565
568
  # England (Great Britain)
566
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
569
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:gb)
567
570
  postal_code.valid?("BS98 1TL") # true
568
571
 
569
572
  # Sweden
570
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
573
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:se)
571
574
  postal_code.valid?("280 12") # true
572
575
 
573
576
  # Canada
574
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
577
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:ca)
575
578
  postal_code.valid?("V3H 1Z7") # true
576
579
  ```
577
580
 
@@ -579,7 +582,7 @@ Match all valid postal codes in a string with the `#find_all` method:
579
582
 
580
583
  ```ruby
581
584
  # United States
582
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
585
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
583
586
  postal_code.find_all("12345 23456") # ["12345", "23456"]
584
587
  ```
585
588
 
@@ -592,14 +595,14 @@ TwitterCldr::Shared::PostalCodes.territories # [:ac, :ad, :af, :ai, :al, ... ]
592
595
  Just want the regex? No problem:
593
596
 
594
597
  ```ruby
595
- postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
598
+ postal_code = TwitterCldr::Shared::PostalCodes.for_territory(:us)
596
599
  postal_code.regexp # /(\d{5})(?:[ \-](\d{4}))?/
597
600
  ```
598
601
 
599
602
  Get a sample of valid postal codes with the `#sample` method:
600
603
 
601
604
  ```ruby
602
- postal_code.sample(5) # ["29294", "22486-2369", "76632", "40800-9860", "06727-6194"]
605
+ postal_code.sample(5) # ["60668-3382", "36022", "22364-5670", "32142-1738", "32633-0502"]
603
606
  ```
604
607
 
605
608
  ### Phone Codes
@@ -1014,17 +1017,17 @@ The Psych gem that is the default YAML engine in Ruby 1.9 doesn't handle Unicode
1014
1017
  You can make use of TwitterCLDR's YAML dumper by calling `localize` and then `to_yaml` on an `Array`, `Hash`, or `String`:
1015
1018
 
1016
1019
  ```ruby
1017
- { :hello => "world" }.localize.to_yaml
1018
- ["hello", "world"].localize.to_yaml
1019
- "hello, world".localize.to_yaml
1020
+ { :hello => "world" }.localize.to_yaml
1021
+ ["hello", "world"].localize.to_yaml
1022
+ "hello, world".localize.to_yaml
1020
1023
  ```
1021
1024
 
1022
1025
  Behind the scenes, these convenience methods are using the `TwitterCldr::Shared::YAML` class. You can do the same thing if you're feeling adventurous:
1023
1026
 
1024
1027
  ```ruby
1025
- TwitterCldr::Shared::YAML.dump({ :hello => "world" })
1026
- TwitterCldr::Shared::YAML.dump(["hello", "world"])
1027
- TwitterCldr::Shared::YAML.dump("hello, world")
1028
+ TwitterCldr::Shared::YAML.dump({ :hello => "world" })
1029
+ TwitterCldr::Shared::YAML.dump(["hello", "world"])
1030
+ TwitterCldr::Shared::YAML.dump("hello, world")
1028
1031
  ```
1029
1032
 
1030
1033
  ## Adding New Locales
@@ -1070,7 +1073,7 @@ TwitterCldr.locale # will return :ru
1070
1073
 
1071
1074
  ## Compatibility
1072
1075
 
1073
- TwitterCLDR is fully compatible with Ruby 1.9.3, 2.0.0, 2.2.0.
1076
+ TwitterCLDR is fully compatible with Ruby 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 3.0.
1074
1077
 
1075
1078
  ## Requirements
1076
1079
 
@@ -1103,6 +1106,6 @@ TwitterCLDR currently supports localization of certain textual objects in JavaSc
1103
1106
 
1104
1107
  ## License
1105
1108
 
1106
- Copyright 2019 Twitter, Inc.
1109
+ Copyright 2021 Twitter, Inc.
1107
1110
 
1108
1111
  Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
@@ -36,11 +36,12 @@ module TwitterCldr
36
36
  end
37
37
 
38
38
  def import_locale(locale)
39
- data = requirements[:cldr].merge_each_ancestor(locale) do |ancestor_locale|
39
+ data = requirements[:cldr].build_data(locale) do |ancestor_locale|
40
40
  GregorianCalendar.new(ancestor_locale, requirements[:cldr]).to_h
41
41
  end
42
42
 
43
43
  output_file = File.join(output_path, locale.to_s, 'calendars.yml')
44
+ FileUtils.mkdir_p(File.dirname(output_file))
44
45
 
45
46
  File.open(output_file, 'w:utf-8') do |output|
46
47
  output.write(
@@ -60,6 +61,8 @@ module TwitterCldr
60
61
 
61
62
 
62
63
  class GregorianCalendar
64
+ ERA_TAGS = ['eraNames', 'eraAbbr', 'eraNarrow'].freeze
65
+
63
66
  attr_reader :locale, :cldr_req
64
67
 
65
68
  def initialize(locale, cldr_req)
@@ -71,8 +74,8 @@ module TwitterCldr
71
74
  {
72
75
  calendars: {
73
76
  gregorian: {
74
- months: contexts('month'),
75
77
  days: contexts('day'),
78
+ months: contexts('month'),
76
79
  eras: eras,
77
80
  quarters: contexts('quarter'),
78
81
  periods: contexts('dayPeriod', group: "alt"),
@@ -91,42 +94,39 @@ module TwitterCldr
91
94
  private
92
95
 
93
96
  def calendar
94
- @calendar ||= doc.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
97
+ @calendar ||= docset.xpath('//ldml/dates/calendars/calendar[@type="gregorian"]').first
95
98
  end
96
99
 
97
100
  def contexts(kind, options = {})
98
101
  return {} unless calendar
99
102
 
100
- calendar.xpath("#{kind}s/#{kind}Context").each_with_object({}) do |node, result|
101
- context = node.attribute('type').value.to_sym
103
+ dtd.find_attr("#{kind}Context", 'type').values.each_with_object({}) do |context, result|
104
+ node = calendar.xpath("#{kind}s/#{kind}Context[@type='#{context}']").first
105
+ next unless node
106
+
102
107
  result[context] = widths(node, kind, context, options)
103
108
  end
104
109
  end
105
110
 
106
111
  def widths(node, kind, context, options = {})
107
- node.xpath("#{kind}Width").each_with_object({}) do |node, result|
108
- width = node.attribute('type').value.to_sym
109
- result[width] = elements(node, kind, context, width, options)
112
+ dtd.find_attr("#{kind}Width", 'type').values.each_with_object({}) do |width, result|
113
+ width_node = node.xpath("#{kind}Width[@type='#{width}']").first
114
+ next unless width_node
115
+
116
+ result[width] = elements(width_node, kind, context, width, options)
110
117
  end
111
118
  end
112
119
 
113
120
  def elements(node, kind, context, width, options = {})
114
- aliased = node.xpath('alias').first
115
-
116
- if aliased
117
- alias_path = "#{node.path}/#{aliased.attribute('path').value}"
118
- elements(doc.xpath(alias_path).first, kind, context, width, options)
119
- else
120
- node.xpath(kind).each_with_object({}) do |node, result|
121
- key = node.attribute('type').value
122
- key = key =~ /^\d*$/ ? key.to_i : key.to_sym
123
-
124
- if options[:group] && found_group = node.attribute(options[:group])
125
- result[found_group.value] ||= {}
126
- result[found_group.value][key] = node.content
127
- else
128
- result[key] = node.content
129
- end
121
+ node.xpath(kind).each_with_object({}) do |node, result|
122
+ key = node.attribute('type').value
123
+ key = key =~ /^\d*$/ ? key.to_i : key.to_sym
124
+
125
+ if options[:group] && found_group = node.attribute(options[:group])
126
+ result[found_group.value] ||= {}
127
+ result[found_group.value][key] = node.content
128
+ else
129
+ result[key] = node.content
130
130
  end
131
131
  end
132
132
  end
@@ -144,15 +144,14 @@ module TwitterCldr
144
144
  def eras
145
145
  return {} unless calendar
146
146
 
147
- base_path = "#{calendar.path}/eras"
148
- keys = doc.xpath("#{base_path}/*").map { |node| node.name }
147
+ ERA_TAGS.each_with_object({}) do |era_tag, result|
148
+ key = era_tag.gsub('era', '').gsub(/s$/, '').downcase.to_sym
149
+ path = "eras/#{era_tag}"
149
150
 
150
- keys.each_with_object({}) do |name, result|
151
- path = "#{base_path}/#{name}/*"
152
- key = name.gsub('era', '').gsub(/s$/, '').downcase.to_sym
153
- result[key] = doc.xpath(path).each_with_object({}) do |node, ret|
154
- type = node.attribute('type').value.to_i rescue 0
155
- ret[type] = node.content
151
+ result[key] = dtd.find_attr('era', 'type').values.each_with_object({}) do |type, ret|
152
+ node = calendar.xpath("#{path}/era[@type='#{type}' and @alt='variant']").first ||
153
+ calendar.xpath("#{path}/era[@type='#{type}']").first
154
+ ret[type] = node.content if node
156
155
  ret
157
156
  end
158
157
  end
@@ -161,22 +160,24 @@ module TwitterCldr
161
160
  def formats(type)
162
161
  return {} unless calendar
163
162
 
164
- formats = calendar.xpath("#{type}Formats/#{type}FormatLength").each_with_object({}) do |node, result|
165
- key = node.attribute('type').value.to_sym rescue :format
166
- result[key] = pattern(node, type)
163
+ formats = dtd.find_attr("#{type}FormatLength", 'type').values.each_with_object({}) do |format_length, result|
164
+ node = calendar.xpath("#{type}Formats/#{type}FormatLength[@type='#{format_length}']").first
165
+ result[format_length] = pattern(node, type) if node
167
166
  end
167
+
168
168
  if default = default_format(type)
169
169
  formats = default.merge(formats)
170
170
  end
171
+
171
172
  formats
172
173
  end
173
174
 
174
175
  def additional_formats
175
176
  return {} unless calendar
176
177
 
177
- calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem").each_with_object({}) do |node, result|
178
- key = node.attribute('id').value
179
- result[key] = node.content
178
+ dtd.find_attr('dateFormatItem', 'id').values.each_with_object({}) do |id, result|
179
+ node = calendar.xpath("dateTimeFormats/availableFormats/dateFormatItem[@id='#{id}']").first
180
+ result[id] = node.content if node
180
181
  end
181
182
  end
182
183
 
@@ -196,24 +197,25 @@ module TwitterCldr
196
197
  end
197
198
 
198
199
  def fields
199
- doc.xpath("//ldml/dates/fields/field").each_with_object({}) do |node, result|
200
- key = node.attribute('type').value.to_sym
200
+ dtd.find_attr('field', 'type').values.each_with_object({}) do |field, result|
201
+ node = docset.xpath("//ldml/dates/fields/field[@type='#{field}']").first
201
202
  name = node.xpath('displayName').first
202
- result[key] = name.content if name
203
+ result[field] = name.content if name
203
204
  end
204
205
  end
205
206
 
206
- def doc
207
- @doc ||= begin
208
- locale_fs = locale.to_s.gsub('-', '_')
209
- Nokogiri.XML(File.read(File.join(cldr_main_path, "#{locale_fs}.xml")))
210
- end
207
+ def docset
208
+ @docset ||= cldr_req.docset(cldr_main_path, locale)
211
209
  end
212
210
 
213
211
  def cldr_main_path
214
212
  @cldr_main_path ||= File.join(cldr_req.common_path, 'main')
215
213
  end
216
214
 
215
+ def dtd
216
+ cldr_req.dtd
217
+ end
218
+
217
219
  end
218
220
 
219
221
  end
@@ -0,0 +1,36 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'nokogiri'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class CldrDataBuilder
12
+ DEEP_MERGER = proc do |key, v1, v2|
13
+ Hash === v1 && Hash === v2 ? v1.merge(v2, &DEEP_MERGER) : (v2 || v1)
14
+ end
15
+
16
+ attr_reader :cldr_locale
17
+
18
+ def initialize(cldr_locale)
19
+ @cldr_locale = cldr_locale
20
+ end
21
+
22
+ def merge_each_ancestor
23
+ cldr_locale.ancestors.inject({}) do |result, ancestor_locale|
24
+ deep_merge(yield(ancestor_locale), result)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def deep_merge(h1, h2)
31
+ h1.merge(h2, &DEEP_MERGER)
32
+ end
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,157 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'forwardable'
7
+ require 'nokogiri'
8
+
9
+ module TwitterCldr
10
+ module Resources
11
+ class CldrDocumentSet
12
+
13
+ class Element
14
+ extend Forwardable
15
+
16
+ def_delegators :@element, :attribute, :content, :name, :path
17
+
18
+ attr_reader :docset, :element
19
+
20
+ def initialize(docset, element)
21
+ @docset = docset
22
+ @element = element
23
+ end
24
+
25
+ def xpath(path)
26
+ path = CldrDocumentSet.join_xpaths(docset.path_for(element), path)
27
+ docset.xpath(path)
28
+ end
29
+ end
30
+
31
+ class ElementList
32
+ include Enumerable
33
+ extend Forwardable
34
+
35
+ def_delegators :@elements, :size
36
+
37
+ attr_reader :docset, :elements
38
+
39
+ def initialize(docset, elements)
40
+ @docset = docset
41
+ @elements = elements
42
+ end
43
+
44
+ def [](idx)
45
+ return unless elements[idx]
46
+
47
+ Element.new(docset, elements[idx])
48
+ end
49
+
50
+ def first
51
+ self[0]
52
+ end
53
+
54
+ def each
55
+ return to_enum(__method__) unless block_given?
56
+
57
+ elements.size.times do |idx|
58
+ yield self[idx]
59
+ end
60
+ end
61
+ end
62
+
63
+
64
+ def self.join_xpaths(*paths)
65
+ segments = paths.flat_map { |a| a.chomp('/').split('/') }
66
+ segments = segments.each_with_object([]) do |segment, result|
67
+ if segment == '..'
68
+ result.pop
69
+ else
70
+ result << segment
71
+ end
72
+ end
73
+ segments.join('/')
74
+ end
75
+
76
+ attr_reader :path, :cldr_locale, :cldr_requirement
77
+
78
+ def initialize(path, cldr_locale, cldr_requirement)
79
+ @path = path
80
+ @cldr_locale = cldr_locale
81
+ @cldr_requirement = cldr_requirement
82
+ end
83
+
84
+
85
+ def xpath(path)
86
+ cldr_locale.ancestors.each do |ancestor_locale|
87
+ data = doc_for(ancestor_locale).xpath(path)
88
+
89
+ unless data.empty?
90
+ return ElementList.new(self, resolve_aliases_in(data))
91
+ end
92
+ end
93
+
94
+ ElementList.new(self, [])
95
+ end
96
+
97
+ def path_for(node)
98
+ orig_node = node
99
+ path = []
100
+
101
+ while node
102
+ path << selector_for(node)
103
+ node = node.parent
104
+ break if node.name == 'document'
105
+ end
106
+
107
+ "//#{path.reverse.join('/')}"
108
+ end
109
+
110
+ private
111
+
112
+ def resolve_aliases_in(data)
113
+ alias_nodes = data.xpath('.//alias')
114
+ alias_nodes.each do |alias_node|
115
+ alias_path = alias_node.attribute('path').value
116
+ full_path = join_xpaths(path_for(alias_node.parent), alias_path)
117
+
118
+ cldr_locale.ancestors.find do |ancestor_locale|
119
+ resolved_node = doc_for(ancestor_locale).xpath(full_path).first.dup
120
+
121
+ if resolved_node
122
+ resolved_copy = Nokogiri::XML(resolved_node.to_xml).children.first
123
+ parent = alias_node.parent
124
+ alias_node.replace(resolved_copy.children)
125
+ resolve_aliases_in(parent)
126
+ break
127
+ end
128
+ end
129
+ end
130
+
131
+ data
132
+ end
133
+
134
+ def join_xpaths(*paths)
135
+ self.class.join_xpaths(*paths)
136
+ end
137
+
138
+ def selector_for(node)
139
+ node.name.dup.tap do |selector|
140
+ if type = node.attribute('type')
141
+ selector << "[@type='#{type.value}']"
142
+ end
143
+ end
144
+ end
145
+
146
+ def doc_for(locale)
147
+ locale_fs = locale.to_s.gsub('-', '_')
148
+ docs[locale_fs] ||= Nokogiri.XML(File.read(File.join(path, "#{locale_fs}.xml")))
149
+ end
150
+
151
+ def docs
152
+ @docs ||= {}
153
+ end
154
+ end
155
+
156
+ end
157
+ end
@@ -0,0 +1,110 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+
9
+ class CldrDTD
10
+ class Attr
11
+ attr_reader :name, :element_name, :dtd
12
+
13
+ def initialize(name, element_name, dtd)
14
+ @name = name
15
+ @element_name = element_name
16
+ @dtd = dtd
17
+ end
18
+
19
+ def values
20
+ @values ||= begin
21
+ attr_line_idx = schema.find_index do |line|
22
+ line.include?("<!ATTLIST #{element_name} #{name} ")
23
+ end
24
+
25
+ return [] unless attr_line_idx
26
+
27
+ attr_line = schema[attr_line_idx]
28
+
29
+ if comment = find_match_comment_after(attr_line_idx + 1)
30
+ parse_match(comment)
31
+ else
32
+ start_idx = attr_line.index('(')
33
+ return [] unless start_idx
34
+
35
+ finish_idx = attr_line.rindex(')')
36
+ attr_line[(start_idx + 1)...finish_idx].split('|').map(&:strip)
37
+ end
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def find_match_comment_after(idx)
44
+ loop do
45
+ return nil if idx > schema.size
46
+
47
+ if schema[idx].strip.start_with?('<!--@MATCH')
48
+ break
49
+ elsif schema[idx].strip.start_with?('<!--')
50
+ idx += 1
51
+ else
52
+ return nil
53
+ end
54
+ end
55
+
56
+ schema[idx]
57
+ end
58
+
59
+ def parse_match(str)
60
+ m = str.match(/<!--@MATCH:([^\/]+)\/(.*)-->/)
61
+ return [] unless m
62
+
63
+ type, args = m.captures
64
+
65
+ case type
66
+ when 'literal'
67
+ args.split(',').map(&:strip)
68
+ when 'range'
69
+ start, finish = args.split('~')
70
+ ((start.to_i)..(finish.to_i)).to_a
71
+ end
72
+ end
73
+
74
+ def schema
75
+ dtd.schema
76
+ end
77
+ end
78
+
79
+ attr_reader :cldr_requirement
80
+
81
+ def initialize(cldr_requirement)
82
+ @cldr_requirement = cldr_requirement
83
+ end
84
+
85
+ def find_attr(element_name, attr_name)
86
+ elements[element_name] ||= {}
87
+ elements[element_name][attr_name] ||= Attr.new(
88
+ attr_name, element_name, self
89
+ )
90
+ end
91
+
92
+ def schema
93
+ @schema ||= File.read(schema_path).split("\n")
94
+ end
95
+
96
+ private
97
+
98
+ def elements
99
+ @elements ||= {}
100
+ end
101
+
102
+ def schema_path
103
+ @schema_path ||= File.join(
104
+ cldr_requirement.common_path, 'dtd', 'ldml.dtd'
105
+ )
106
+ end
107
+ end
108
+
109
+ end
110
+ end