twitter_cldr 1.7.0 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/History.txt +6 -0
  2. data/README.md +61 -5
  3. data/Rakefile +64 -60
  4. data/js/lib/twitter_cldr_js.rb +0 -2
  5. data/lib/twitter_cldr/core_ext.rb +12 -12
  6. data/lib/twitter_cldr/formatters/calendars/timespan_formatter.rb +13 -11
  7. data/lib/twitter_cldr/localized/localized_array.rb +33 -0
  8. data/lib/twitter_cldr/localized/localized_date.rb +23 -0
  9. data/lib/twitter_cldr/localized/localized_datetime.rb +63 -0
  10. data/lib/twitter_cldr/localized/localized_number.rb +50 -0
  11. data/lib/twitter_cldr/localized/localized_object.rb +38 -0
  12. data/lib/twitter_cldr/localized/localized_string.rb +41 -0
  13. data/lib/twitter_cldr/localized/localized_symbol.rb +20 -0
  14. data/lib/twitter_cldr/localized/localized_time.rb +23 -0
  15. data/lib/twitter_cldr/localized/localized_timespan.rb +26 -0
  16. data/lib/twitter_cldr/localized.rb +18 -0
  17. data/lib/twitter_cldr/normalization.rb +23 -0
  18. data/lib/twitter_cldr/resources/{tries_dumper.rb → collation_tries_dumper.rb} +1 -1
  19. data/lib/twitter_cldr/resources/composition_exclusions_importer.rb +1 -1
  20. data/lib/twitter_cldr/resources/language_codes_importer.rb +232 -0
  21. data/lib/twitter_cldr/resources/locales_resources_importer.rb +1 -1
  22. data/lib/twitter_cldr/resources/phone_codes_importer.rb +1 -1
  23. data/lib/twitter_cldr/resources/postal_codes_importer.rb +1 -1
  24. data/lib/twitter_cldr/resources/tailoring_importer.rb +12 -3
  25. data/lib/twitter_cldr/resources/unicode_data_importer.rb +3 -1
  26. data/lib/twitter_cldr/resources.rb +2 -1
  27. data/lib/twitter_cldr/shared/calendar.rb +2 -6
  28. data/lib/twitter_cldr/shared/language_codes.rb +75 -0
  29. data/lib/twitter_cldr/shared/languages.rb +4 -11
  30. data/lib/twitter_cldr/shared.rb +8 -7
  31. data/lib/twitter_cldr/tokenizers/base.rb +2 -8
  32. data/lib/twitter_cldr/utils.rb +8 -0
  33. data/lib/twitter_cldr/version.rb +1 -1
  34. data/lib/twitter_cldr.rb +5 -4
  35. data/resources/custom/locales/cs/units.yml +3 -3
  36. data/resources/custom/locales/pl/units.yml +4 -4
  37. data/resources/custom/locales/pt/units.yml +2 -2
  38. data/resources/shared/language_codes_table.dump +0 -0
  39. data/spec/core_ext_spec.rb +19 -0
  40. data/spec/{core_ext/array_spec.rb → localized/localized_array_spec.rb} +1 -1
  41. data/spec/{core_ext/calendars/date_spec.rb → localized/localized_date_spec.rb} +24 -44
  42. data/spec/localized/localized_datetime_spec.rb +81 -0
  43. data/spec/{core_ext/numbers → localized}/localized_number_spec.rb +34 -1
  44. data/spec/localized/localized_object_spec.rb +89 -0
  45. data/spec/{core_ext/string_spec.rb → localized/localized_string_spec.rb} +16 -33
  46. data/spec/{core_ext/symbol_spec.rb → localized/localized_symbol_spec.rb} +3 -1
  47. data/spec/localized/localized_time_spec.rb +70 -0
  48. data/spec/normalization_spec.rb +42 -0
  49. data/spec/readme_spec.rb +51 -5
  50. data/spec/shared/language_codes_spec.rb +161 -0
  51. data/spec/shared/phone_codes_spec.rb +2 -2
  52. data/spec/shared/postal_codes_spec.rb +2 -2
  53. data/spec/spec_helper.rb +2 -0
  54. data/spec/tokenizers/base_spec.rb +15 -6
  55. data/spec/utils_spec.rb +18 -2
  56. data/twitter_cldr.gemspec +2 -1
  57. metadata +28 -44
  58. data/lib/twitter_cldr/core_ext/array.rb +0 -35
  59. data/lib/twitter_cldr/core_ext/calendars/date.rb +0 -25
  60. data/lib/twitter_cldr/core_ext/calendars/datetime.rb +0 -65
  61. data/lib/twitter_cldr/core_ext/calendars/time.rb +0 -25
  62. data/lib/twitter_cldr/core_ext/calendars/timespan.rb +0 -24
  63. data/lib/twitter_cldr/core_ext/localized_object.rb +0 -25
  64. data/lib/twitter_cldr/core_ext/numbers/bignum.rb +0 -8
  65. data/lib/twitter_cldr/core_ext/numbers/fixnum.rb +0 -8
  66. data/lib/twitter_cldr/core_ext/numbers/float.rb +0 -8
  67. data/lib/twitter_cldr/core_ext/numbers/localized_number.rb +0 -54
  68. data/lib/twitter_cldr/core_ext/string.rb +0 -51
  69. data/lib/twitter_cldr/core_ext/symbol.rb +0 -22
  70. data/spec/core_ext/calendars/datetime_spec.rb +0 -90
  71. data/spec/core_ext/calendars/time_spec.rb +0 -90
  72. data/spec/core_ext/calendars_spec.rb +0 -34
  73. data/spec/core_ext/numbers/bignum_spec.rb +0 -25
  74. data/spec/core_ext/numbers/fixnum_spec.rb +0 -25
  75. data/spec/core_ext/numbers/float_spec.rb +0 -25
  76. data/spec/core_ext/numbers_spec.rb +0 -39
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedNumber < LocalizedObject
10
+ TYPES = [:decimal, :currency, :percent]
11
+ DEFAULT_TYPE = :decimal
12
+
13
+ attr_reader :type
14
+
15
+ def initialize(obj, locale, options = {})
16
+ @options = options.dup
17
+
18
+ @type = @options.delete(:type) || DEFAULT_TYPE
19
+ raise ArgumentError.new("type #{@type} is not supported") unless @type && TYPES.include?(@type.to_sym)
20
+
21
+ super(obj, locale, @options)
22
+ end
23
+
24
+ TYPES.each do |type|
25
+ define_method "to_#{type}" do
26
+ to_type(type)
27
+ end
28
+ end
29
+
30
+ def to_s(options = {})
31
+ @formatter.format(@base_obj, options)
32
+ end
33
+
34
+ def plural_rule
35
+ TwitterCldr::Formatters::Plurals::Rules.rule_for(@base_obj, @locale)
36
+ end
37
+
38
+ protected
39
+
40
+ def formatter_const
41
+ TwitterCldr::Formatters.const_get("#{@type.to_s.capitalize}Formatter")
42
+ end
43
+
44
+ def to_type(target_type)
45
+ self.class.new(@base_obj, @locale, @options.merge(:type => target_type))
46
+ end
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,38 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedObject
10
+
11
+ attr_reader :locale, :base_obj, :formatter
12
+
13
+ def initialize(obj, locale, options = {})
14
+ @base_obj = obj
15
+ @locale = TwitterCldr.convert_locale(locale)
16
+ @locale = TwitterCldr::DEFAULT_LOCALE unless TwitterCldr.supported_locale?(@locale)
17
+
18
+ options = options.dup
19
+ options[:locale] = @locale
20
+
21
+ @formatter = formatter_const.new(options) if formatter_const
22
+ end
23
+
24
+ def formatter_const
25
+ raise NotImplementedError
26
+ end
27
+
28
+ def self.localize(klass)
29
+ klass.class_eval <<-LOCALIZE, __FILE__, __LINE__ + 1
30
+ def localize(locale = TwitterCldr.get_locale, options = {})
31
+ #{self}.new(self, locale, options)
32
+ end
33
+ LOCALIZE
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedString < LocalizedObject
10
+
11
+ # Uses wrapped string object as a format specification and returns the result of applying it to +args+ (see
12
+ # +TwitterCldr::Utils.interpolate+ method for interpolation syntax).
13
+ #
14
+ # If +args+ is a Hash than pluralization is performed before interpolation (see +PluralFormatter+ class for
15
+ # pluralization specification).
16
+ #
17
+ def %(args)
18
+ pluralized = args.is_a?(Hash) ? @formatter.format(@base_obj, args) : @base_obj
19
+ TwitterCldr::Utils.interpolate(pluralized, args)
20
+ end
21
+
22
+ def formatter_const
23
+ TwitterCldr::Formatters::PluralFormatter
24
+ end
25
+
26
+ def normalize(options = {})
27
+ TwitterCldr::Normalization.normalize(@base_obj, options).localize(@locale)
28
+ end
29
+
30
+ def code_points
31
+ TwitterCldr::Utils::CodePoints.from_string(@base_obj)
32
+ end
33
+
34
+ def to_s
35
+ @base_obj.dup
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,20 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedSymbol < LocalizedObject
10
+ def as_language_code
11
+ TwitterCldr::Shared::Languages.from_code_for_locale(@base_obj, @locale)
12
+ end
13
+
14
+ def formatter_const
15
+ nil
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,23 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedTime < LocalizedDateTime
10
+ def to_datetime(date)
11
+ date_obj = date.is_a?(LocalizedDate) ? date.base_obj : date
12
+ LocalizedDateTime.new(DateTime.parse("#{date_obj.strftime("%Y-%m-%d")}T#{@base_obj.strftime("%H:%M:%S%z")}"), @locale, :calendar_type => @calendar_type)
13
+ end
14
+
15
+ protected
16
+
17
+ def formatter_const
18
+ TwitterCldr::Formatters::TimeFormatter
19
+ end
20
+ end
21
+
22
+ end
23
+ end
@@ -0,0 +1,26 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+
9
+ class LocalizedTimespan < LocalizedObject
10
+ def initialize(seconds, options = {})
11
+ super(seconds, options[:locale], options)
12
+ end
13
+
14
+ def to_s(options = {})
15
+ @formatter.format(@base_obj, options)
16
+ end
17
+
18
+ protected
19
+
20
+ def formatter_const
21
+ TwitterCldr::Formatters::TimespanFormatter
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Localized
8
+ autoload :LocalizedArray, 'twitter_cldr/localized/localized_array'
9
+ autoload :LocalizedDate, 'twitter_cldr/localized/localized_date'
10
+ autoload :LocalizedDateTime, 'twitter_cldr/localized/localized_datetime'
11
+ autoload :LocalizedNumber, 'twitter_cldr/localized/localized_number'
12
+ autoload :LocalizedObject, 'twitter_cldr/localized/localized_object'
13
+ autoload :LocalizedString, 'twitter_cldr/localized/localized_string'
14
+ autoload :LocalizedSymbol, 'twitter_cldr/localized/localized_symbol'
15
+ autoload :LocalizedTime, 'twitter_cldr/localized/localized_time'
16
+ autoload :LocalizedTimespan, 'twitter_cldr/localized/localized_timespan'
17
+ end
18
+ end
@@ -11,5 +11,28 @@ module TwitterCldr
11
11
  autoload :NFD, 'twitter_cldr/normalization/nfd'
12
12
  autoload :NFKC, 'twitter_cldr/normalization/nfkc'
13
13
  autoload :NFKD, 'twitter_cldr/normalization/nfkd'
14
+
15
+ VALID_NORMALIZERS = [:NFD, :NFKD, :NFC, :NFKC]
16
+ DEFAULT_NORMALIZER = :NFD
17
+
18
+ class << self
19
+
20
+ def normalize(string, options = {})
21
+ normalizer(options[:using] || DEFAULT_NORMALIZER).normalize(string)
22
+ end
23
+
24
+ private
25
+
26
+ def normalizer(normalizer_name)
27
+ const_name = normalizer_name.to_s.upcase.to_sym
28
+
29
+ if VALID_NORMALIZERS.include?(const_name)
30
+ const_get(const_name)
31
+ else
32
+ raise ArgumentError.new("#{normalizer_name.inspect} is not a valid normalizer (valid normalizers are #{VALID_NORMALIZERS.join(', ')})")
33
+ end
34
+ end
35
+
36
+ end
14
37
  end
15
38
  end
@@ -6,7 +6,7 @@
6
6
  module TwitterCldr
7
7
  module Resources
8
8
 
9
- module TriesDumper
9
+ module CollationTriesDumper
10
10
 
11
11
  class << self
12
12
 
@@ -3,7 +3,7 @@
3
3
  # Copyright 2012 Twitter, Inc
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
- require 'lib/twitter_cldr/resources/download'
6
+ require 'twitter_cldr/resources/download'
7
7
 
8
8
  module TwitterCldr
9
9
  module Resources
@@ -0,0 +1,232 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'twitter_cldr/resources/download'
7
+
8
+ module TwitterCldr
9
+ module Resources
10
+
11
+ class LanguageCodesImporter
12
+
13
+ BCP_47_FILE, ISO_639_FILE = %w[bcp-47.txt iso-639.txt]
14
+
15
+ INPUT_DATA = {
16
+ BCP_47_FILE => 'http://www.iana.org/assignments/language-subtag-registry',
17
+ ISO_639_FILE => 'http://www.sil.org/iso639-3/iso-639-3_20120614.tab'
18
+ }
19
+
20
+ KEYS_TO_STANDARDS = {
21
+ :iso_639_1 => :iso_639_1,
22
+ :iso_639_2 => :iso_639_2,
23
+ :iso_639_2_term => :iso_639_2,
24
+ :iso_639_3 => :iso_639_3,
25
+ :bcp_47 => :bcp_47,
26
+ :bcp_47_alt => :bcp_47
27
+ }
28
+
29
+ def initialize(input_path, output_path)
30
+ @input_path = input_path
31
+ @output_path = output_path
32
+ end
33
+
34
+ def import(import_yaml = false)
35
+ prepare_data
36
+ import_data(import_yaml)
37
+ end
38
+
39
+ private
40
+
41
+ def prepare_data
42
+ INPUT_DATA.each do |file, url|
43
+ TwitterCldr::Resources.download_if_necessary(File.join(@input_path, file), url)
44
+ end
45
+ end
46
+
47
+ def import_data(import_yaml)
48
+ result = import_iso_639
49
+ result = import_bcp_47(result)
50
+
51
+ language_codes = Hash[result.inject({}) { |memo, (key, value)| memo[key] = Hash[value.sort]; memo }.sort]
52
+
53
+ language_codes_table = build_table(language_codes)
54
+
55
+ write('language_codes_table.dump', 'wb', Marshal.dump(language_codes_table))
56
+
57
+ if import_yaml
58
+ write('language_codes.yml', 'w:utf-8', YAML.dump(language_codes))
59
+ write('language_codes_table.yml', 'w:utf-8', YAML.dump(language_codes_table))
60
+ end
61
+ end
62
+
63
+ def write(file, mode, data)
64
+ File.open(File.join(@output_path, file), mode) { |output| output.write(data) }
65
+ end
66
+
67
+ # Generates codes in the following format:
68
+ #
69
+ # {
70
+ # :Albanian => {
71
+ # :iso_639_1 => "sq",
72
+ # :iso_639_2 => "alb", # default (bibliographic) code
73
+ # :iso_639_2_term => "sqi", # terminology code (optional)
74
+ # :iso_639_3 => "sqi"
75
+ # }
76
+ # }
77
+ #
78
+ def import_iso_639(result = {})
79
+ File.open(File.join(@input_path, ISO_639_FILE)) do |file|
80
+ lines = file.lines
81
+ lines.next # skip header
82
+
83
+ lines.each do |line|
84
+ entry = line.chomp.gsub(/"(.*)"/) { $1.gsub("\t", '') }
85
+ data = Hash[ISO_639_COLUMNS.zip(entry.split("\t"))]
86
+
87
+ # either bibliographic and terminology codes are the same (:bt_equiv is empty)
88
+ # or :iso_639_2 contains terminology code and :bt_equiv contains bibliographic code
89
+ # skip 'collection' scope
90
+ if (data[:bt_equiv].empty? || !data[:b_code].empty?) && data[:name] != 'Reserved for local use' && data[:scope] != 'C'
91
+ h = result[data[:name].to_sym] ||= {}
92
+
93
+ set_iso_639_data(h, :iso_639_1, data[:iso_639_1])
94
+
95
+ if data[:bt_equiv].empty?
96
+ set_iso_639_data(h, :iso_639_2, data[:iso_639_2])
97
+ else
98
+ set_iso_639_data(h, :iso_639_2, data[:bt_equiv])
99
+ set_iso_639_data(h, :iso_639_2_term, data[:iso_639_2])
100
+ end
101
+
102
+ set_iso_639_data(h, :iso_639_3, data[:iso_639_3])
103
+ end
104
+ end
105
+ end
106
+
107
+ result
108
+ end
109
+
110
+ def set_iso_639_data(data, key, value)
111
+ data[key] = value.to_sym unless value.nil? || value.empty?
112
+ end
113
+
114
+ # Generates codes in the following format:
115
+ #
116
+ # {
117
+ # :Bangka => {
118
+ # :bcp_47 => "mfb", # preferred code
119
+ # :bcp_47_alt => "ms-mfb" # alternative code (optional)
120
+ # }
121
+ # }
122
+ def import_bcp_47(result = {})
123
+ File.open(File.join(@input_path, BCP_47_FILE)) do |file|
124
+ lines = file.lines
125
+ lines.next # skip header
126
+
127
+ data = {}
128
+ entry = ''
129
+
130
+ lines.each do |line|
131
+ line.chomp!
132
+
133
+ if line == '%%'
134
+ process_bcp_47_entry(entry, data)
135
+ process_bcp_47_data(data, result)
136
+ else
137
+ if line.include?(':')
138
+ process_bcp_47_entry(entry, data)
139
+ entry = line
140
+ else
141
+ entry += line
142
+ end
143
+ end
144
+ end
145
+
146
+ process_bcp_47_entry(entry, data)
147
+ process_bcp_47_data(data, result)
148
+ end
149
+
150
+ result
151
+ end
152
+
153
+ def process_bcp_47_entry(entry, data)
154
+ return if entry.nil? || entry.empty?
155
+
156
+ key, value = entry.chomp.split(':', 2).map(&:strip)
157
+
158
+ if key == 'Description'
159
+ (data['names'] ||= []) << value.to_sym
160
+ else
161
+ data[key.downcase] = value
162
+ end
163
+
164
+ entry.clear
165
+ end
166
+
167
+ def process_bcp_47_data(data, result)
168
+ if !data.empty? && %w[language extlang].include?(data['type']) && !data['names'].include?('Private use') && data['scope'] != 'collection'
169
+ existing_names = data['names'].select { |name| result.has_key?(name) }
170
+
171
+ prefered = data['preferred-value']
172
+ alternative = [data['prefix'], data['subtag']].compact.join('-')
173
+
174
+ bcp_47 = {}
175
+
176
+ bcp_47[:bcp_47] = (prefered || alternative).to_sym
177
+ bcp_47[:bcp_47_alt] = alternative.to_sym if prefered
178
+
179
+ existing_names.each do |name|
180
+ result[name.to_sym].merge!(bcp_47)
181
+ end
182
+
183
+ bcp_47.merge!(result[existing_names.first]) unless existing_names.empty?
184
+
185
+ (data['names'] - existing_names).each do |name|
186
+ result[name.to_sym] = bcp_47.dup
187
+ end
188
+ end
189
+
190
+ data.clear
191
+ end
192
+
193
+ def build_table(language_codes_map)
194
+ # can't use Hash with default proc here, because we won't be able to marshal this hash later in this case
195
+ table = ([:name] + KEYS_TO_STANDARDS.values.uniq.sort_by(&:to_s)).inject({}) do |memo, key|
196
+ memo.merge!(key => {})
197
+ end
198
+
199
+ language_codes_map.each do |name, codes|
200
+ table[:name][name] = { :name => name }.merge(codes)
201
+ end
202
+
203
+ table[:name].values.each do |data|
204
+ KEYS_TO_STANDARDS.each do |key, standard|
205
+ table[standard][data[key].to_sym] = data if data[key]
206
+ end
207
+ end
208
+
209
+ table.each do |key, codes|
210
+ table[key] = Hash[codes.sort]
211
+ end
212
+ end
213
+
214
+ ISO_639_COLUMNS = [
215
+ :code, # Code
216
+ :status, # Status
217
+ :partner_agency, # Partner Agency
218
+ :iso_639_3, # 639_3
219
+ :iso_639_2, # 639_2 (alpha-3 bibliographic/terminology code)
220
+ :b_code, # alpha-3 bibliographic code if iso_639_2 contains terminology code
221
+ :bt_equiv, # bt_equiv (alpha-3 bibliographic/terminology equivalent)
222
+ :iso_639_1, # 639_1
223
+ :name, # Reference_Name
224
+ :scope, # Element_Scope
225
+ :type, # Language_Type
226
+ :docs # Documentation
227
+ ]
228
+
229
+ end
230
+
231
+ end
232
+ end