twitter_cldr 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/History.txt +6 -0
  2. data/README.md +61 -5
  3. data/Rakefile +64 -60
  4. data/js/lib/twitter_cldr_js.rb +0 -2
  5. data/lib/twitter_cldr/core_ext.rb +12 -12
  6. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +13 -11
  7. data/lib/twitter_cldr/localized/localized_array.rb +33 -0
  8. data/lib/twitter_cldr/localized/localized_date.rb +23 -0
  9. data/lib/twitter_cldr/localized/localized_datetime.rb +63 -0
  10. data/lib/twitter_cldr/localized/localized_number.rb +50 -0
  11. data/lib/twitter_cldr/localized/localized_object.rb +38 -0
  12. data/lib/twitter_cldr/localized/localized_string.rb +41 -0
  13. data/lib/twitter_cldr/localized/localized_symbol.rb +20 -0
  14. data/lib/twitter_cldr/localized/localized_time.rb +23 -0
  15. data/lib/twitter_cldr/localized/localized_timespan.rb +26 -0
  16. data/lib/twitter_cldr/localized.rb +18 -0
  17. data/lib/twitter_cldr/normalization.rb +23 -0
  18. data/lib/twitter_cldr/resources/{tries_dumper.rb → collation_tries_dumper.rb} +1 -1
  19. data/lib/twitter_cldr/resources/composition_exclusions_importer.rb +1 -1
  20. data/lib/twitter_cldr/resources/language_codes_importer.rb +232 -0
  21. data/lib/twitter_cldr/resources/locales_resources_importer.rb +1 -1
  22. data/lib/twitter_cldr/resources/phone_codes_importer.rb +1 -1
  23. data/lib/twitter_cldr/resources/postal_codes_importer.rb +1 -1
  24. data/lib/twitter_cldr/resources/tailoring_importer.rb +12 -3
  25. data/lib/twitter_cldr/resources/unicode_data_importer.rb +3 -1
  26. data/lib/twitter_cldr/resources.rb +2 -1
  27. data/lib/twitter_cldr/shared/calendar.rb +2 -6
  28. data/lib/twitter_cldr/shared/language_codes.rb +75 -0
  29. data/lib/twitter_cldr/shared/languages.rb +4 -11
  30. data/lib/twitter_cldr/shared.rb +8 -7
  31. data/lib/twitter_cldr/tokenizers/base.rb +2 -8
  32. data/lib/twitter_cldr/utils.rb +8 -0
  33. data/lib/twitter_cldr/version.rb +1 -1
  34. data/lib/twitter_cldr.rb +5 -4
  35. data/resources/custom/locales/cs/units.yml +3 -3
  36. data/resources/custom/locales/pl/units.yml +4 -4
  37. data/resources/custom/locales/pt/units.yml +2 -2
  38. data/resources/shared/language_codes_table.dump +0 -0
  39. data/spec/core_ext_spec.rb +19 -0
  40. data/spec/{core_ext/array_spec.rb → localized/localized_array_spec.rb} +1 -1
  41. data/spec/{core_ext/calendars/date_spec.rb → localized/localized_date_spec.rb} +24 -44
  42. data/spec/localized/localized_datetime_spec.rb +81 -0
  43. data/spec/{core_ext/numbers → localized}/localized_number_spec.rb +34 -1
  44. data/spec/localized/localized_object_spec.rb +89 -0
  45. data/spec/{core_ext/string_spec.rb → localized/localized_string_spec.rb} +16 -33
  46. data/spec/{core_ext/symbol_spec.rb → localized/localized_symbol_spec.rb} +3 -1
  47. data/spec/localized/localized_time_spec.rb +70 -0
  48. data/spec/normalization_spec.rb +42 -0
  49. data/spec/readme_spec.rb +51 -5
  50. data/spec/shared/language_codes_spec.rb +161 -0
  51. data/spec/shared/phone_codes_spec.rb +2 -2
  52. data/spec/shared/postal_codes_spec.rb +2 -2
  53. data/spec/spec_helper.rb +2 -0
  54. data/spec/tokenizers/base_spec.rb +15 -6
  55. data/spec/utils_spec.rb +18 -2
  56. data/twitter_cldr.gemspec +2 -1
  57. metadata +28 -44
  58. data/lib/twitter_cldr/core_ext/array.rb +0 -35
  59. data/lib/twitter_cldr/core_ext/calendars/date.rb +0 -25
  60. data/lib/twitter_cldr/core_ext/calendars/datetime.rb +0 -65
  61. data/lib/twitter_cldr/core_ext/calendars/time.rb +0 -25
  62. data/lib/twitter_cldr/core_ext/calendars/timespan.rb +0 -24
  63. data/lib/twitter_cldr/core_ext/localized_object.rb +0 -25
  64. data/lib/twitter_cldr/core_ext/numbers/bignum.rb +0 -8
  65. data/lib/twitter_cldr/core_ext/numbers/fixnum.rb +0 -8
  66. data/lib/twitter_cldr/core_ext/numbers/float.rb +0 -8
  67. data/lib/twitter_cldr/core_ext/numbers/localized_number.rb +0 -54
  68. data/lib/twitter_cldr/core_ext/string.rb +0 -51
  69. data/lib/twitter_cldr/core_ext/symbol.rb +0 -22
  70. data/spec/core_ext/calendars/datetime_spec.rb +0 -90
  71. data/spec/core_ext/calendars/time_spec.rb +0 -90
  72. data/spec/core_ext/calendars_spec.rb +0 -34
  73. data/spec/core_ext/numbers/bignum_spec.rb +0 -25
  74. data/spec/core_ext/numbers/fixnum_spec.rb +0 -25
  75. data/spec/core_ext/numbers/float_spec.rb +0 -25
  76. data/spec/core_ext/numbers_spec.rb +0 -39
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedNumber < LocalizedObject
10
+ TYPES = [:decimal, :currency, :percent]
11
+ DEFAULT_TYPE = :decimal
12
+
13
+ attr_reader :type
14
+
15
+ def initialize(obj, locale, options = {})
16
+ @options = options.dup
17
+
18
+ @type = @options.delete(:type) || DEFAULT_TYPE
19
+ raise ArgumentError.new("type #{@type} is not supported") unless @type && TYPES.include?(@type.to_sym)
20
+
21
+ super(obj, locale, @options)
22
+ end
23
+
24
+ TYPES.each do |type|
25
+ define_method "to_#{type}" do
26
+ to_type(type)
27
+ end
28
+ end
29
+
30
+ def to_s(options = {})
31
+ @formatter.format(@base_obj, options)
32
+ end
33
+
34
+ def plural_rule
35
+ TwitterCldr::Formatters::Plurals::Rules.rule_for(@base_obj, @locale)
36
+ end
37
+
38
+ protected
39
+
40
+ def formatter_const
41
+ TwitterCldr::Formatters.const_get("#{@type.to_s.capitalize}Formatter")
42
+ end
43
+
44
+ def to_type(target_type)
45
+ self.class.new(@base_obj, @locale, @options.merge(:type => target_type))
46
+ end
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedObject
10
+
11
+ attr_reader :locale, :base_obj, :formatter
12
+
13
+ def initialize(obj, locale, options = {})
14
+ @base_obj = obj
15
+ @locale = TwitterCldr.convert_locale(locale)
16
+ @locale = TwitterCldr::DEFAULT_LOCALE unless TwitterCldr.supported_locale?(@locale)
17
+
18
+ options = options.dup
19
+ options[:locale] = @locale
20
+
21
+ @formatter = formatter_const.new(options) if formatter_const
22
+ end
23
+
24
+ def formatter_const
25
+ raise NotImplementedError
26
+ end
27
+
28
+ def self.localize(klass)
29
+ klass.class_eval <<-LOCALIZE, __FILE__, __LINE__ + 1
30
+ def localize(locale = TwitterCldr.get_locale, options = {})
31
+ #{self}.new(self, locale, options)
32
+ end
33
+ LOCALIZE
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedString < LocalizedObject
10
+
11
+ # Uses wrapped string object as a format specification and returns the result of applying it to +args+ (see
12
+ # +TwitterCldr::Utils.interpolate+ method for interpolation syntax).
13
+ #
14
+ # If +args+ is a Hash than pluralization is performed before interpolation (see +PluralFormatter+ class for
15
+ # pluralization specification).
16
+ #
17
+ def %(args)
18
+ pluralized = args.is_a?(Hash) ? @formatter.format(@base_obj, args) : @base_obj
19
+ TwitterCldr::Utils.interpolate(pluralized, args)
20
+ end
21
+
22
+ def formatter_const
23
+ TwitterCldr::Formatters::PluralFormatter
24
+ end
25
+
26
+ def normalize(options = {})
27
+ TwitterCldr::Normalization.normalize(@base_obj, options).localize(@locale)
28
+ end
29
+
30
+ def code_points
31
+ TwitterCldr::Utils::CodePoints.from_string(@base_obj)
32
+ end
33
+
34
+ def to_s
35
+ @base_obj.dup
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedSymbol < LocalizedObject
10
+ def as_language_code
11
+ TwitterCldr::Shared::Languages.from_code_for_locale(@base_obj, @locale)
12
+ end
13
+
14
+ def formatter_const
15
+ nil
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,23 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedTime < LocalizedDateTime
10
+ def to_datetime(date)
11
+ date_obj = date.is_a?(LocalizedDate) ? date.base_obj : date
12
+ LocalizedDateTime.new(DateTime.parse("#{date_obj.strftime("%Y-%m-%d")}T#{@base_obj.strftime("%H:%M:%S%z")}"), @locale, :calendar_type => @calendar_type)
13
+ end
14
+
15
+ protected
16
+
17
+ def formatter_const
18
+ TwitterCldr::Formatters::TimeFormatter
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedTimespan < LocalizedObject
10
+ def initialize(seconds, options = {})
11
+ super(seconds, options[:locale], options)
12
+ end
13
+
14
+ def to_s(options = {})
15
+ @formatter.format(@base_obj, options)
16
+ end
17
+
18
+ protected
19
+
20
+ def formatter_const
21
+ TwitterCldr::Formatters::TimespanFormatter
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+ autoload :LocalizedArray, 'twitter_cldr/localized/localized_array'
9
+ autoload :LocalizedDate, 'twitter_cldr/localized/localized_date'
10
+ autoload :LocalizedDateTime, 'twitter_cldr/localized/localized_datetime'
11
+ autoload :LocalizedNumber, 'twitter_cldr/localized/localized_number'
12
+ autoload :LocalizedObject, 'twitter_cldr/localized/localized_object'
13
+ autoload :LocalizedString, 'twitter_cldr/localized/localized_string'
14
+ autoload :LocalizedSymbol, 'twitter_cldr/localized/localized_symbol'
15
+ autoload :LocalizedTime, 'twitter_cldr/localized/localized_time'
16
+ autoload :LocalizedTimespan, 'twitter_cldr/localized/localized_timespan'
17
+ end
18
+ end
@@ -11,5 +11,28 @@ module TwitterCldr
11
11
  autoload :NFD, 'twitter_cldr/normalization/nfd'
12
12
  autoload :NFKC, 'twitter_cldr/normalization/nfkc'
13
13
  autoload :NFKD, 'twitter_cldr/normalization/nfkd'
14
+
15
+ VALID_NORMALIZERS = [:NFD, :NFKD, :NFC, :NFKC]
16
+ DEFAULT_NORMALIZER = :NFD
17
+
18
+ class << self
19
+
20
+ def normalize(string, options = {})
21
+ normalizer(options[:using] || DEFAULT_NORMALIZER).normalize(string)
22
+ end
23
+
24
+ private
25
+
26
+ def normalizer(normalizer_name)
27
+ const_name = normalizer_name.to_s.upcase.to_sym
28
+
29
+ if VALID_NORMALIZERS.include?(const_name)
30
+ const_get(const_name)
31
+ else
32
+ raise ArgumentError.new("#{normalizer_name.inspect} is not a valid normalizer (valid normalizers are #{VALID_NORMALIZERS.join(', ')})")
33
+ end
34
+ end
35
+
36
+ end
14
37
  end
15
38
  end
@@ -6,7 +6,7 @@
6
6
  module TwitterCldr
7
7
  module Resources
8
8
 
9
- module TriesDumper
9
+ module CollationTriesDumper
10
10
 
11
11
  class << self
12
12
 
@@ -3,7 +3,7 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'lib/twitter_cldr/resources/download'
6
+ require 'twitter_cldr/resources/download'
7
7
 
8
8
  module TwitterCldr
9
9
  module Resources
@@ -0,0 +1,232 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'twitter_cldr/resources/download'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class LanguageCodesImporter
12
+
13
+ BCP_47_FILE, ISO_639_FILE = %w[bcp-47.txt iso-639.txt]
14
+
15
+ INPUT_DATA = {
16
+ BCP_47_FILE => 'http://www.iana.org/assignments/language-subtag-registry',
17
+ ISO_639_FILE => 'http://www.sil.org/iso639-3/iso-639-3_20120614.tab'
18
+ }
19
+
20
+ KEYS_TO_STANDARDS = {
21
+ :iso_639_1 => :iso_639_1,
22
+ :iso_639_2 => :iso_639_2,
23
+ :iso_639_2_term => :iso_639_2,
24
+ :iso_639_3 => :iso_639_3,
25
+ :bcp_47 => :bcp_47,
26
+ :bcp_47_alt => :bcp_47
27
+ }
28
+
29
+ def initialize(input_path, output_path)
30
+ @input_path = input_path
31
+ @output_path = output_path
32
+ end
33
+
34
+ def import(import_yaml = false)
35
+ prepare_data
36
+ import_data(import_yaml)
37
+ end
38
+
39
+ private
40
+
41
+ def prepare_data
42
+ INPUT_DATA.each do |file, url|
43
+ TwitterCldr::Resources.download_if_necessary(File.join(@input_path, file), url)
44
+ end
45
+ end
46
+
47
+ def import_data(import_yaml)
48
+ result = import_iso_639
49
+ result = import_bcp_47(result)
50
+
51
+ language_codes = Hash[result.inject({}) { |memo, (key, value)| memo[key] = Hash[value.sort]; memo }.sort]
52
+
53
+ language_codes_table = build_table(language_codes)
54
+
55
+ write('language_codes_table.dump', 'wb', Marshal.dump(language_codes_table))
56
+
57
+ if import_yaml
58
+ write('language_codes.yml', 'w:utf-8', YAML.dump(language_codes))
59
+ write('language_codes_table.yml', 'w:utf-8', YAML.dump(language_codes_table))
60
+ end
61
+ end
62
+
63
+ def write(file, mode, data)
64
+ File.open(File.join(@output_path, file), mode) { |output| output.write(data) }
65
+ end
66
+
67
+ # Generates codes in the following format:
68
+ #
69
+ # {
70
+ # :Albanian => {
71
+ # :iso_639_1 => "sq",
72
+ # :iso_639_2 => "alb", # default (bibliographic) code
73
+ # :iso_639_2_term => "sqi", # terminology code (optional)
74
+ # :iso_639_3 => "sqi"
75
+ # }
76
+ # }
77
+ #
78
+ def import_iso_639(result = {})
79
+ File.open(File.join(@input_path, ISO_639_FILE)) do |file|
80
+ lines = file.lines
81
+ lines.next # skip header
82
+
83
+ lines.each do |line|
84
+ entry = line.chomp.gsub(/"(.*)"/) { $1.gsub("\t", '') }
85
+ data = Hash[ISO_639_COLUMNS.zip(entry.split("\t"))]
86
+
87
+ # either bibliographic and terminology codes are the same (:bt_equiv is empty)
88
+ # or :iso_639_2 contains terminology code and :bt_equiv contains bibliographic code
89
+ # skip 'collection' scope
90
+ if (data[:bt_equiv].empty? || !data[:b_code].empty?) && data[:name] != 'Reserved for local use' && data[:scope] != 'C'
91
+ h = result[data[:name].to_sym] ||= {}
92
+
93
+ set_iso_639_data(h, :iso_639_1, data[:iso_639_1])
94
+
95
+ if data[:bt_equiv].empty?
96
+ set_iso_639_data(h, :iso_639_2, data[:iso_639_2])
97
+ else
98
+ set_iso_639_data(h, :iso_639_2, data[:bt_equiv])
99
+ set_iso_639_data(h, :iso_639_2_term, data[:iso_639_2])
100
+ end
101
+
102
+ set_iso_639_data(h, :iso_639_3, data[:iso_639_3])
103
+ end
104
+ end
105
+ end
106
+
107
+ result
108
+ end
109
+
110
+ def set_iso_639_data(data, key, value)
111
+ data[key] = value.to_sym unless value.nil? || value.empty?
112
+ end
113
+
114
+ # Generates codes in the following format:
115
+ #
116
+ # {
117
+ # :Bangka => {
118
+ # :bcp_47 => "mfb", # preferred code
119
+ # :bcp_47_alt => "ms-mfb" # alternative code (optional)
120
+ # }
121
+ # }
122
+ def import_bcp_47(result = {})
123
+ File.open(File.join(@input_path, BCP_47_FILE)) do |file|
124
+ lines = file.lines
125
+ lines.next # skip header
126
+
127
+ data = {}
128
+ entry = ''
129
+
130
+ lines.each do |line|
131
+ line.chomp!
132
+
133
+ if line == '%%'
134
+ process_bcp_47_entry(entry, data)
135
+ process_bcp_47_data(data, result)
136
+ else
137
+ if line.include?(':')
138
+ process_bcp_47_entry(entry, data)
139
+ entry = line
140
+ else
141
+ entry += line
142
+ end
143
+ end
144
+ end
145
+
146
+ process_bcp_47_entry(entry, data)
147
+ process_bcp_47_data(data, result)
148
+ end
149
+
150
+ result
151
+ end
152
+
153
+ def process_bcp_47_entry(entry, data)
154
+ return if entry.nil? || entry.empty?
155
+
156
+ key, value = entry.chomp.split(':', 2).map(&:strip)
157
+
158
+ if key == 'Description'
159
+ (data['names'] ||= []) << value.to_sym
160
+ else
161
+ data[key.downcase] = value
162
+ end
163
+
164
+ entry.clear
165
+ end
166
+
167
+ def process_bcp_47_data(data, result)
168
+ if !data.empty? && %w[language extlang].include?(data['type']) && !data['names'].include?('Private use') && data['scope'] != 'collection'
169
+ existing_names = data['names'].select { |name| result.has_key?(name) }
170
+
171
+ prefered = data['preferred-value']
172
+ alternative = [data['prefix'], data['subtag']].compact.join('-')
173
+
174
+ bcp_47 = {}
175
+
176
+ bcp_47[:bcp_47] = (prefered || alternative).to_sym
177
+ bcp_47[:bcp_47_alt] = alternative.to_sym if prefered
178
+
179
+ existing_names.each do |name|
180
+ result[name.to_sym].merge!(bcp_47)
181
+ end
182
+
183
+ bcp_47.merge!(result[existing_names.first]) unless existing_names.empty?
184
+
185
+ (data['names'] - existing_names).each do |name|
186
+ result[name.to_sym] = bcp_47.dup
187
+ end
188
+ end
189
+
190
+ data.clear
191
+ end
192
+
193
+ def build_table(language_codes_map)
194
+ # can't use Hash with default proc here, because we won't be able to marshal this hash later in this case
195
+ table = ([:name] + KEYS_TO_STANDARDS.values.uniq.sort_by(&:to_s)).inject({}) do |memo, key|
196
+ memo.merge!(key => {})
197
+ end
198
+
199
+ language_codes_map.each do |name, codes|
200
+ table[:name][name] = { :name => name }.merge(codes)
201
+ end
202
+
203
+ table[:name].values.each do |data|
204
+ KEYS_TO_STANDARDS.each do |key, standard|
205
+ table[standard][data[key].to_sym] = data if data[key]
206
+ end
207
+ end
208
+
209
+ table.each do |key, codes|
210
+ table[key] = Hash[codes.sort]
211
+ end
212
+ end
213
+
214
+ ISO_639_COLUMNS = [
215
+ :code, # Code
216
+ :status, # Status
217
+ :partner_agency, # Partner Agency
218
+ :iso_639_3, # 639_3
219
+ :iso_639_2, # 639_2 (alpha-3 bibliographic/terminology code)
220
+ :b_code, # alpha-3 bibliographic code if iso_639_2 contains terminology code
221
+ :bt_equiv, # bt_equiv (alpha-3 bibliographic/terminology equivalent)
222
+ :iso_639_1, # 639_1
223
+ :name, # Reference_Name
224
+ :scope, # Element_Scope
225
+ :type, # Language_Type
226
+ :docs # Documentation
227
+ ]
228
+
229
+ end
230
+
231
+ end
232
+ end