twitter_cldr 1.4.1 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. data/NOTICE +36 -2
  2. data/README.md +2 -2
  3. data/lib/twitter_cldr/collation/collator.rb +143 -0
  4. data/lib/twitter_cldr/collation/implicit_collation_elements.rb +188 -0
  5. data/lib/twitter_cldr/collation/sort_key.rb +199 -0
  6. data/lib/twitter_cldr/collation/trie.rb +73 -0
  7. data/lib/twitter_cldr/collation/trie_builder.rb +56 -0
  8. data/lib/twitter_cldr/collation.rb +14 -0
  9. data/lib/twitter_cldr/core_ext/localized_object.rb +3 -2
  10. data/lib/twitter_cldr/core_ext/string.rb +1 -1
  11. data/lib/twitter_cldr/formatters/calendars/datetime_formatter.rb +89 -72
  12. data/lib/twitter_cldr/normalization/base.rb +22 -0
  13. data/lib/twitter_cldr/normalization/hangul.rb +68 -0
  14. data/lib/twitter_cldr/{normalizers → normalization}/nfc.rb +2 -2
  15. data/lib/twitter_cldr/{normalizers → normalization}/nfd.rb +1 -1
  16. data/lib/twitter_cldr/{normalizers → normalization}/nfkc.rb +5 -17
  17. data/lib/twitter_cldr/{normalizers → normalization}/nfkd.rb +3 -18
  18. data/lib/twitter_cldr/normalization.rb +15 -0
  19. data/lib/twitter_cldr/shared/code_point.rb +5 -3
  20. data/lib/twitter_cldr/tokenizers/base.rb +15 -1
  21. data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +6 -1
  22. data/lib/twitter_cldr/utils/code_points.rb +1 -1
  23. data/lib/twitter_cldr/version.rb +2 -2
  24. data/lib/twitter_cldr.rb +9 -8
  25. data/resources/collation/FractionalUCA_SHORT.txt +41593 -0
  26. data/resources/locales/af/calendars.yml +164 -0
  27. data/resources/locales/af/languages.yml +173 -0
  28. data/resources/locales/af/numbers.yml +42 -0
  29. data/resources/locales/af/plurals.yml +2 -0
  30. data/resources/locales/af/units.yml +88 -0
  31. data/resources/locales/ar/calendars.yml +9 -0
  32. data/resources/locales/ar/numbers.yml +15 -2
  33. data/resources/locales/ca/calendars.yml +228 -0
  34. data/resources/locales/ca/languages.yml +510 -0
  35. data/resources/locales/ca/numbers.yml +43 -0
  36. data/resources/locales/ca/plurals.yml +2 -0
  37. data/resources/locales/ca/units.yml +93 -0
  38. data/resources/locales/cs/calendars.yml +229 -0
  39. data/resources/locales/cs/languages.yml +471 -0
  40. data/resources/locales/cs/numbers.yml +44 -0
  41. data/resources/locales/cs/plurals.yml +2 -0
  42. data/resources/locales/cs/units.yml +114 -0
  43. data/resources/locales/da/calendars.yml +10 -0
  44. data/resources/locales/da/numbers.yml +13 -0
  45. data/resources/locales/de/calendars.yml +9 -0
  46. data/resources/locales/de/numbers.yml +13 -0
  47. data/resources/locales/el/calendars.yml +227 -0
  48. data/resources/locales/el/languages.yml +519 -0
  49. data/resources/locales/el/numbers.yml +42 -0
  50. data/resources/locales/el/plurals.yml +2 -0
  51. data/resources/locales/el/units.yml +107 -0
  52. data/resources/locales/en/calendars.yml +10 -0
  53. data/resources/locales/en/numbers.yml +13 -0
  54. data/resources/locales/es/calendars.yml +9 -0
  55. data/resources/locales/es/numbers.yml +13 -0
  56. data/resources/locales/eu/calendars.yml +173 -0
  57. data/resources/locales/eu/languages.yml +161 -0
  58. data/resources/locales/eu/numbers.yml +43 -0
  59. data/resources/locales/eu/plurals.yml +2 -0
  60. data/resources/locales/eu/units.yml +91 -0
  61. data/resources/locales/fa/calendars.yml +10 -0
  62. data/resources/locales/fa/numbers.yml +13 -0
  63. data/resources/locales/fi/calendars.yml +10 -0
  64. data/resources/locales/fi/numbers.yml +14 -1
  65. data/resources/locales/fil/calendars.yml +8 -0
  66. data/resources/locales/fil/numbers.yml +13 -0
  67. data/resources/locales/fr/calendars.yml +9 -0
  68. data/resources/locales/fr/numbers.yml +14 -1
  69. data/resources/locales/he/calendars.yml +9 -0
  70. data/resources/locales/he/numbers.yml +13 -0
  71. data/resources/locales/hi/calendars.yml +8 -0
  72. data/resources/locales/hi/numbers.yml +13 -0
  73. data/resources/locales/hu/calendars.yml +10 -0
  74. data/resources/locales/hu/numbers.yml +15 -2
  75. data/resources/locales/id/calendars.yml +8 -0
  76. data/resources/locales/id/numbers.yml +16 -3
  77. data/resources/locales/it/calendars.yml +9 -0
  78. data/resources/locales/it/numbers.yml +13 -0
  79. data/resources/locales/ja/calendars.yml +9 -0
  80. data/resources/locales/ja/numbers.yml +13 -0
  81. data/resources/locales/ko/calendars.yml +9 -0
  82. data/resources/locales/ko/numbers.yml +13 -0
  83. data/resources/locales/ms/calendars.yml +8 -0
  84. data/resources/locales/ms/numbers.yml +16 -3
  85. data/resources/locales/nb/calendars.yml +234 -0
  86. data/resources/locales/{no → nb}/languages.yml +25 -4
  87. data/resources/locales/nb/numbers.yml +43 -0
  88. data/resources/locales/nb/plurals.yml +2 -0
  89. data/resources/locales/nb/units.yml +87 -0
  90. data/resources/locales/nl/calendars.yml +10 -0
  91. data/resources/locales/nl/numbers.yml +13 -0
  92. data/resources/locales/pl/calendars.yml +9 -0
  93. data/resources/locales/pl/numbers.yml +14 -1
  94. data/resources/locales/pt/calendars.yml +9 -0
  95. data/resources/locales/pt/numbers.yml +13 -0
  96. data/resources/locales/ru/calendars.yml +10 -0
  97. data/resources/locales/ru/numbers.yml +14 -1
  98. data/resources/locales/sv/calendars.yml +10 -0
  99. data/resources/locales/sv/numbers.yml +14 -1
  100. data/resources/locales/th/calendars.yml +67 -57
  101. data/resources/locales/th/numbers.yml +13 -0
  102. data/resources/locales/tr/calendars.yml +9 -0
  103. data/resources/locales/tr/numbers.yml +13 -0
  104. data/resources/locales/uk/calendars.yml +199 -0
  105. data/resources/locales/uk/languages.yml +519 -0
  106. data/resources/locales/uk/numbers.yml +45 -0
  107. data/resources/locales/uk/plurals.yml +2 -0
  108. data/resources/locales/uk/units.yml +135 -0
  109. data/resources/locales/ur/calendars.yml +9 -0
  110. data/resources/locales/ur/numbers.yml +13 -0
  111. data/resources/locales/zh/calendars.yml +8 -0
  112. data/resources/locales/zh/numbers.yml +13 -0
  113. data/resources/locales/zh-Hant/calendars.yml +8 -0
  114. data/resources/locales/zh-Hant/numbers.yml +16 -3
  115. data/resources/locales/zh-Hant/plurals.yml +2 -0
  116. data/resources/unicode_data/hangul_blocks.yml +21 -0
  117. data/spec/collation/CollationTest_CLDR_NON_IGNORABLE_Short.txt +714 -0
  118. data/spec/collation/collation_spec.rb +93 -0
  119. data/spec/collation/collator_spec.rb +117 -0
  120. data/spec/collation/implicit_collation_elements_spec.rb +24 -0
  121. data/spec/collation/sort_key_spec.rb +56 -0
  122. data/spec/collation/trie_builder_spec.rb +114 -0
  123. data/spec/collation/trie_spec.rb +97 -0
  124. data/spec/core_ext/calendars/datetime_spec.rb +5 -0
  125. data/spec/core_ext/calendars_spec.rb +34 -0
  126. data/spec/core_ext/numbers_spec.rb +39 -0
  127. data/spec/core_ext/string_spec.rb +4 -4
  128. data/spec/formatters/calendars/datetime_formatter_spec.rb +92 -2
  129. data/spec/{normalizers → normalization}/NormalizationTestShort.txt +0 -0
  130. data/spec/{normalizers → normalization}/base_spec.rb +1 -1
  131. data/spec/normalization/hangul_spec.rb +42 -0
  132. data/spec/{normalizers → normalization}/normalization_spec.rb +15 -16
  133. data/spec/readme_spec.rb +2 -2
  134. data/spec/shared/code_point_spec.rb +42 -30
  135. data/spec/shared/resources_spec.rb +30 -6
  136. data/spec/tokenizers/base_spec.rb +17 -0
  137. data/spec/twitter_cldr_spec.rb +1 -1
  138. metadata +71 -83
  139. data/lib/twitter_cldr/normalizers/base.rb +0 -34
  140. data/lib/twitter_cldr/normalizers.rb +0 -14
  141. data/resources/locales/no/calendars.yml +0 -127
  142. data/resources/locales/no/numbers.yml +0 -29
  143. data/resources/locales/no/plurals.yml +0 -1
  144. data/resources/unicode_data/blocks_hangul.yml +0 -46
  145. data/spec/normalizers/NormalizationTest.txt +0 -18431
@@ -0,0 +1,56 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Collation
8
+
9
+ # Builds a collation elements Trie from the file containing a fractional collation elements table.
10
+ #
11
+ class TrieBuilder
12
+
13
+ FRACTIONAL_UCA_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
14
+
15
+ def self.load_trie(file_path)
16
+ new(file_path).build
17
+ end
18
+
19
+ def initialize(resource)
20
+ @file_path = File.join(TwitterCldr::RESOURCES_DIR, resource)
21
+ end
22
+
23
+ def build
24
+ parse_trie(load_collation_elements_table)
25
+ end
26
+
27
+ private
28
+
29
+ def parse_trie(table)
30
+ trie = TwitterCldr::Collation::Trie.new
31
+
32
+ table.lines.each do |line|
33
+ trie.add(parse_code_points($1), parse_collation_element($2)) if FRACTIONAL_UCA_REGEXP =~ line
34
+ end
35
+
36
+ trie
37
+ end
38
+
39
+ def load_collation_elements_table
40
+ open(@file_path, 'r')
41
+ end
42
+
43
+ def parse_code_points(string)
44
+ string.split.map { |cp| cp.to_i(16) }
45
+ end
46
+
47
+ def parse_collation_element(string)
48
+ string.scan(/\[.*?\]/).map do |match|
49
+ match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Collation
8
+ autoload :Collator, 'twitter_cldr/collation/collator'
9
+ autoload :ImplicitCollationElements, 'twitter_cldr/collation/implicit_collation_elements'
10
+ autoload :SortKey, 'twitter_cldr/collation/sort_key'
11
+ autoload :Trie, 'twitter_cldr/collation/trie'
12
+ autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
13
+ end
14
+ end
@@ -9,10 +9,11 @@ module TwitterCldr
9
9
 
10
10
  def initialize(obj, locale, options = {})
11
11
  @base_obj = obj
12
- @locale = locale
12
+ @locale = TwitterCldr.convert_locale(locale)
13
+ @locale = TwitterCldr::DEFAULT_LOCALE unless TwitterCldr.supported_locale?(@locale)
13
14
 
14
15
  options = options.dup
15
- options[:locale] ||= @locale
16
+ options[:locale] = @locale
16
17
 
17
18
  @formatter = formatter_const.new(options) if formatter_const
18
19
  end
@@ -32,7 +32,7 @@ module TwitterCldr
32
32
  options[:using] ||= :NFD
33
33
 
34
34
  if VALID_NORMALIZERS.include?(options[:using])
35
- normalizer_const = TwitterCldr::Normalizers.const_get(options[:using])
35
+ normalizer_const = TwitterCldr::Normalization.const_get(options[:using])
36
36
  LocalizedString.new(normalizer_const.normalize(@base_obj), @locale)
37
37
  else
38
38
  raise ArgumentError.new("Invalid normalization form specified with :using option. Choices are [#{VALID_NORMALIZERS.map(&:to_s).join(", ")}]")
@@ -51,7 +51,13 @@ module TwitterCldr
51
51
  protected
52
52
 
53
53
  def era(date, pattern, length)
54
- raise NotImplementedError
54
+ choices = case length
55
+ when 1..3
56
+ @tokenizer.calendar[:eras][:abbr]
57
+ else
58
+ @tokenizer.calendar[:eras][:name]
59
+ end
60
+ choices[date.year < 0 ? 0 : 1]
55
61
  end
56
62
 
57
63
  def year(date, pattern, length)
@@ -72,100 +78,111 @@ module TwitterCldr
72
78
  def quarter(date, pattern, length)
73
79
  quarter = (date.month.to_i - 1) / 3 + 1
74
80
  case length
75
- when 1
76
- quarter.to_s
77
- when 2
78
- quarter.to_s.rjust(length, '0')
79
- when 3
80
- @tokenizer.calendar[:quarters][:format][:abbreviated][quarter]
81
- when 4
82
- @tokenizer.calendar[:quarters][:format][:wide][quarter]
81
+ when 1
82
+ quarter.to_s
83
+ when 2
84
+ quarter.to_s.rjust(length, '0')
85
+ when 3
86
+ @tokenizer.calendar[:quarters][:format][:abbreviated][quarter]
87
+ when 4
88
+ @tokenizer.calendar[:quarters][:format][:wide][quarter]
83
89
  end
84
90
  end
85
91
 
86
92
  def quarter_stand_alone(date, pattern, length)
87
93
  quarter = (date.month.to_i - 1) / 3 + 1
88
94
  case length
89
- when 1
90
- quarter.to_s
91
- when 2
92
- quarter.to_s.rjust(length, '0')
93
- when 3
94
- raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
95
- # @tokenizer.calendar[:quarters][:'stand-alone'][:abbreviated][key]
96
- when 4
97
- raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
98
- # @tokenizer.calendar[:quarters][:'stand-alone'][:wide][key]
99
- when 5
100
- @tokenizer.calendar[:quarters][:'stand-alone'][:narrow][quarter]
95
+ when 1
96
+ quarter.to_s
97
+ when 2
98
+ quarter.to_s.rjust(length, '0')
99
+ when 3
100
+ raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
101
+ # @tokenizer.calendar[:quarters][:'stand-alone'][:abbreviated][key]
102
+ when 4
103
+ raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
104
+ # @tokenizer.calendar[:quarters][:'stand-alone'][:wide][key]
105
+ when 5
106
+ @tokenizer.calendar[:quarters][:'stand-alone'][:narrow][quarter]
101
107
  end
102
108
  end
103
109
 
104
110
  def month(date, pattern, length)
105
111
  case length
106
- when 1
107
- date.month.to_s
108
- when 2
109
- date.month.to_s.rjust(length, '0')
110
- when 3
111
- @tokenizer.calendar[:months][:format][:abbreviated][date.month]
112
- when 4
113
- @tokenizer.calendar[:months][:format][:wide][date.month]
114
- when 5
115
- raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
116
- # @tokenizer.calendar[:months][:format][:narrow][date.month]
117
- else
118
- # raise unknown date format
112
+ when 1
113
+ date.month.to_s
114
+ when 2
115
+ date.month.to_s.rjust(length, '0')
116
+ when 3
117
+ @tokenizer.calendar[:months][:format][:abbreviated][date.month]
118
+ when 4
119
+ @tokenizer.calendar[:months][:format][:wide][date.month]
120
+ when 5
121
+ raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
122
+ # @tokenizer.calendar[:months][:format][:narrow][date.month]
123
+ else
124
+ # raise unknown date format
119
125
  end
120
126
  end
121
127
 
122
128
  def month_stand_alone(date, pattern, length)
123
129
  case length
124
- when 1
125
- date.month.to_s
126
- when 2
127
- date.month.to_s.rjust(length, '0')
128
- when 3
129
- raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
130
- @tokenizer.calendar[:months][:'stand-alone'][:abbreviated][date.month]
131
- when 4
132
- raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
133
- @tokenizer.calendar[:months][:'stand-alone'][:wide][date.month]
134
- when 5
135
- @tokenizer.calendar[:months][:'stand-alone'][:narrow][date.month]
136
- else
137
- # raise unknown date format
130
+ when 1
131
+ date.month.to_s
132
+ when 2
133
+ date.month.to_s.rjust(length, '0')
134
+ when 3
135
+ raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
136
+ @tokenizer.calendar[:months][:'stand-alone'][:abbreviated][date.month]
137
+ when 4
138
+ raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
139
+ @tokenizer.calendar[:months][:'stand-alone'][:wide][date.month]
140
+ when 5
141
+ @tokenizer.calendar[:months][:'stand-alone'][:narrow][date.month]
142
+ else
143
+ # raise unknown date format
138
144
  end
139
145
  end
140
146
 
141
147
  def day(date, pattern, length)
142
148
  case length
143
- when 1
144
- date.day.to_s
145
- when 2
146
- date.day.to_s.rjust(length, '0')
149
+ when 1
150
+ date.day.to_s
151
+ when 2
152
+ date.day.to_s.rjust(length, '0')
147
153
  end
148
154
  end
149
155
 
150
156
  def weekday(date, pattern, length)
151
157
  key = WEEKDAY_KEYS[date.wday]
152
158
  case length
153
- when 1..3
154
- @tokenizer.calendar[:days][:format][:abbreviated][key]
155
- when 4
156
- @tokenizer.calendar[:days][:format][:wide][key]
157
- when 5
158
- @tokenizer.calendar[:days][:'stand-alone'][:narrow][key]
159
+ when 1..3
160
+ @tokenizer.calendar[:days][:format][:abbreviated][key]
161
+ when 4
162
+ @tokenizer.calendar[:days][:format][:wide][key]
163
+ when 5
164
+ @tokenizer.calendar[:days][:'stand-alone'][:narrow][key]
159
165
  end
160
166
  end
161
167
 
162
168
  def weekday_local(date, pattern, length)
163
169
  # "Like E except adds a numeric value depending on the local starting day of the week"
164
- raise NotImplementedError, 'need to defer a country to lookup the local first day of week from weekdata'
170
+ # CLDR does not contain data as to which day is the first day of the week, so we will assume Monday (Ruby default)
171
+ case length
172
+ when 1..2
173
+ date.cwday.to_s
174
+ else
175
+ weekday(date, pattern, length)
176
+ end
165
177
  end
166
178
 
167
179
  def weekday_local_stand_alone(date, pattern, length)
168
- raise NotImplementedError, 'need to defer a country to lookup the local first day of week from weekdata'
180
+ case length
181
+ when 1
182
+ weekday_local(date, pattern, length)
183
+ else
184
+ weekday(date, pattern, length)
185
+ end
169
186
  end
170
187
 
171
188
  def period(time, pattern, length)
@@ -178,14 +195,14 @@ module TwitterCldr
178
195
  def hour(time, pattern, length)
179
196
  hour = time.hour
180
197
  hour = case pattern[0, 1]
181
- when 'h' # [1-12]
182
- hour > 12 ? (hour - 12) : (hour == 0 ? 12 : hour)
183
- when 'H' # [0-23]
184
- hour
185
- when 'K' # [0-11]
186
- hour > 11 ? hour - 12 : hour
187
- when 'k' # [1-24]
188
- hour == 0 ? 24 : hour
198
+ when 'h' # [1-12]
199
+ hour > 12 ? (hour - 12) : (hour == 0 ? 12 : hour)
200
+ when 'H' # [0-23]
201
+ hour
202
+ when 'K' # [0-11]
203
+ hour > 11 ? hour - 12 : hour
204
+ when 'k' # [1-24]
205
+ hour == 0 ? 24 : hour
189
206
  end
190
207
  length == 1 ? hour.to_s : hour.to_s.rjust(length, '0')
191
208
  end
@@ -205,10 +222,10 @@ module TwitterCldr
205
222
 
206
223
  def timezone(time, pattern, length)
207
224
  case length
208
- when 1..3
209
- time.zone
210
- else
211
- "UTC #{time.strftime("%z")}"
225
+ when 1..3
226
+ time.zone
227
+ else
228
+ "UTC #{time.strftime("%z")}"
212
229
  end
213
230
  end
214
231
 
@@ -0,0 +1,22 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Normalization
8
+ class Base
9
+
10
+ class << self
11
+
12
+ def combining_class_for(code_point)
13
+ TwitterCldr::Shared::CodePoint.for_hex(code_point).combining_class.to_i
14
+ rescue NoMethodError
15
+ 0
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,68 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Normalization
8
+ module Hangul
9
+
10
+ class << self
11
+
12
+ # Special composition for Hangul syllables. Documented in Section 3.12 at
13
+ # http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
14
+ #
15
+ def compose(code_points)
16
+ l = code_points.first - LBASE
17
+ v = code_points[1] - VBASE
18
+ t = code_points[2] ? code_points[2] - TBASE : 0 # T part may be missing, that's ok
19
+
20
+ SBASE + l * NCOUNT + v * TCOUNT + t
21
+ end
22
+
23
+ # Special decomposition for Hangul syllables. Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
24
+ # Also see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm#Hangul_Implicit_CEs
25
+ #
26
+ def decompose(code_point)
27
+ l = code_point - SBASE
28
+
29
+ t = l % TCOUNT
30
+ l /= TCOUNT
31
+ v = l % VCOUNT
32
+ l /= VCOUNT
33
+
34
+ result = []
35
+
36
+ result << LBASE + l
37
+ result << VBASE + v
38
+ result << TBASE + t if t > 0
39
+
40
+ result
41
+ end
42
+
43
+ def hangul_syllable?(code_point)
44
+ (SBASE...SLIMIT).include?(code_point)
45
+ end
46
+
47
+ SBASE = 0xAC00
48
+ LBASE = 0x1100
49
+ VBASE = 0x1161
50
+ TBASE = 0x11A7
51
+
52
+ LCOUNT = 19
53
+ VCOUNT = 21
54
+ TCOUNT = 28
55
+
56
+ NCOUNT = VCOUNT * TCOUNT # 588
57
+ SCOUNT = LCOUNT * NCOUNT # 11172
58
+
59
+ LLIMIT = LBASE + LCOUNT # 0x1113 = 4371
60
+ VLIMIT = VBASE + VCOUNT # 0x1176 = 4470
61
+ TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547
62
+ SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
63
+
64
+ end
65
+
66
+ end
67
+ end
68
+ end
@@ -4,7 +4,7 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- module Normalizers
7
+ module Normalization
8
8
 
9
9
  # Implements normalization of a Unicode string to Normalization Form C (NFC).
10
10
  # This normalization includes canonical decomposition followed by canonical composition.
@@ -14,7 +14,7 @@ module TwitterCldr
14
14
  class << self
15
15
 
16
16
  def normalize_code_points(code_points)
17
- compose(TwitterCldr::Normalizers::NFD.normalize_code_points(code_points))
17
+ compose(TwitterCldr::Normalization::NFD.normalize_code_points(code_points))
18
18
  end
19
19
 
20
20
  end
@@ -4,7 +4,7 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- module Normalizers
7
+ module Normalization
8
8
 
9
9
  # Implements normalization of a Unicode string to Normalization Form D (NFD).
10
10
  # This normalization includes only canonical decomposition.
@@ -4,7 +4,7 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- module Normalizers
7
+ module Normalization
8
8
 
9
9
  # Implements normalization of a Unicode string to Normalization Form KC (NFKC).
10
10
  # This normalization form includes compatibility decomposition followed by compatibility composition.
@@ -20,7 +20,7 @@ module TwitterCldr
20
20
  end
21
21
 
22
22
  def normalize_code_points(code_points)
23
- compose(TwitterCldr::Normalizers::NFKD.normalize_code_points(code_points))
23
+ compose(TwitterCldr::Normalization::NFKD.normalize_code_points(code_points))
24
24
  end
25
25
 
26
26
  protected
@@ -44,7 +44,7 @@ module TwitterCldr
44
44
  end
45
45
 
46
46
  if hangul_code_points.size > 1 && !next_hangul_type
47
- hangul_code_points.size.times { final.pop }
47
+ final.pop(hangul_code_points.size)
48
48
  final << compose_hangul(hangul_code_points)
49
49
  hangul_code_points.clear
50
50
  end
@@ -55,23 +55,11 @@ module TwitterCldr
55
55
  end
56
56
 
57
57
  def valid_hangul_sequence?(buffer_size, hangul_type)
58
- case [buffer_size, hangul_type]
59
- when [0, :lparts], [1, :vparts], [2, :tparts]
60
- true
61
- else
62
- false
63
- end
58
+ [[0, :lparts], [1, :vparts], [2, :tparts]].include?([buffer_size, hangul_type])
64
59
  end
65
60
 
66
- # Special composition for Hangul syllables. Documented in Section 3.12 at
67
- # http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
68
- #
69
61
  def compose_hangul(code_points)
70
- l_index = code_points.first.hex - HANGUL_DECOMPOSITION_CONSTANTS[:LBase]
71
- v_index = code_points[1].hex - HANGUL_DECOMPOSITION_CONSTANTS[:VBase]
72
- t_index = code_points[2] ? code_points[2].hex - HANGUL_DECOMPOSITION_CONSTANTS[:TBase] : 0 # tpart may be missing, that's ok
73
- lv_index = (l_index * HANGUL_DECOMPOSITION_CONSTANTS[:NCount]) + (v_index * HANGUL_DECOMPOSITION_CONSTANTS[:TCount])
74
- (HANGUL_DECOMPOSITION_CONSTANTS[:SBase] + lv_index + t_index).to_s(16).upcase.rjust(4, "0")
62
+ TwitterCldr::Normalization::Hangul.compose(code_points.map { |cp| cp.hex }).to_s(16).upcase.rjust(4, "0")
75
63
  end
76
64
 
77
65
  # Implements composition of Unicode code points following the guidelines here:
@@ -4,12 +4,12 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- # Normalizers module includes algorithm for Unicode normalization. Basic information on this topic can be found in the
7
+ # Normalization module includes algorithm for Unicode normalization. Basic information on this topic can be found in the
8
8
  # Unicode Standard Annex #15 "Unicode Normalization Forms" at http://www.unicode.org/reports/tr15/. More detailed
9
9
  # description is given in the section "3.11 Normalization Forms" of the Unicode Standard core specification. The
10
10
  # latest version at the moment (for Unicode 6.1) is available at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf.
11
11
  #
12
- module Normalizers
12
+ module Normalization
13
13
 
14
14
  # Implements normalization of a Unicode string to Normalization Form KD (NFKD).
15
15
  # This normalization form includes only compatibility decomposition.
@@ -73,23 +73,8 @@ module TwitterCldr
73
73
  unicode_data.decomposition.split
74
74
  end
75
75
 
76
- # Special decomposition for Hangul syllables. Documented in Section 3.12 at
77
- # http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
78
- #
79
76
  def decompose_hangul(code_point)
80
- s_index = code_point.hex - HANGUL_DECOMPOSITION_CONSTANTS[:SBase]
81
-
82
- l_index = s_index / HANGUL_DECOMPOSITION_CONSTANTS[:NCount]
83
- v_index = (s_index % HANGUL_DECOMPOSITION_CONSTANTS[:NCount]) / HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
84
- t_index = s_index % HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
85
-
86
- result = []
87
-
88
- result << (HANGUL_DECOMPOSITION_CONSTANTS[:LBase] + l_index).to_s(16).upcase
89
- result << (HANGUL_DECOMPOSITION_CONSTANTS[:VBase] + v_index).to_s(16).upcase
90
- result << (HANGUL_DECOMPOSITION_CONSTANTS[:TBase] + t_index).to_s(16).upcase if t_index > 0
91
-
92
- result
77
+ TwitterCldr::Normalization::Hangul.decompose(code_point.hex).map { |e| e.to_s(16).upcase }
93
78
  end
94
79
 
95
80
  # Performs the Canonical Ordering Algorithm by stable sorting of every subsequence of combining code points
@@ -0,0 +1,15 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Normalization
8
+ autoload :Base, 'twitter_cldr/normalization/base'
9
+ autoload :Hangul, 'twitter_cldr/normalization/hangul'
10
+ autoload :NFC, 'twitter_cldr/normalization/nfc'
11
+ autoload :NFD, 'twitter_cldr/normalization/nfd'
12
+ autoload :NFKC, 'twitter_cldr/normalization/nfkc'
13
+ autoload :NFKD, 'twitter_cldr/normalization/nfkd'
14
+ end
15
+ end
@@ -38,7 +38,9 @@ module TwitterCldr
38
38
  class << self
39
39
 
40
40
  def for_hex(code_point)
41
- target = get_block(code_point.rjust(4, "0").upcase)
41
+ code_point = code_point.rjust(4, '0').upcase
42
+
43
+ target = get_block(code_point)
42
44
 
43
45
  if target && target.first
44
46
  block_data = TwitterCldr.get_resource(:unicode_data, target.first)
@@ -63,7 +65,7 @@ module TwitterCldr
63
65
  def hangul_type(code_point)
64
66
  if code_point
65
67
  code_point_int = code_point.hex
66
- [:lparts, :vparts, :tparts, :compositions, :decompositions].each do |type|
68
+ [:lparts, :vparts, :tparts, :compositions].each do |type|
67
69
  hangul_blocks[type].each do |range|
68
70
  return type if range.include?(code_point_int)
69
71
  end
@@ -80,7 +82,7 @@ module TwitterCldr
80
82
  protected
81
83
 
82
84
  def hangul_blocks
83
- @hangul_blocks ||= TwitterCldr.get_resource(:unicode_data, :blocks_hangul)
85
+ @hangul_blocks ||= TwitterCldr.get_resource(:unicode_data, :hangul_blocks)
84
86
  end
85
87
 
86
88
  def composition_exclusions
@@ -11,7 +11,7 @@ module TwitterCldr
11
11
  attr_accessor :type, :placeholders
12
12
 
13
13
  def initialize(options = {})
14
- @locale = (options[:locale] || TwitterCldr::DEFAULT_LOCALE).to_sym
14
+ @locale = TwitterCldr.convert_locale(options[:locale] || TwitterCldr::DEFAULT_LOCALE)
15
15
  self.init_resources
16
16
  self.init_placeholders
17
17
  end
@@ -100,6 +100,20 @@ module TwitterCldr
100
100
  end
101
101
  end
102
102
 
103
+ # expands all path symbols
104
+ def expand(current, haystack)
105
+ if current.is_a?(Symbol)
106
+ expand(traverse(current.to_s.split('.').map(&:to_sym), haystack), haystack)
107
+ elsif current.is_a?(Hash)
108
+ current.inject({}) do |ret, (key, val)|
109
+ ret[key] = expand(val, haystack)
110
+ ret
111
+ end
112
+ else
113
+ current
114
+ end
115
+ end
116
+
103
117
  def expand_pattern(format_str, type)
104
118
  if format_str.is_a?(Symbol)
105
119
  # symbols mean another path was given
@@ -45,7 +45,8 @@ module TwitterCldr
45
45
  end
46
46
 
47
47
  def init_resources
48
- @resource = TwitterCldr.get_locale_resource(@locale, :calendars)[TwitterCldr.convert_locale(@locale)]
48
+ @resource = TwitterCldr.get_locale_resource(@locale, :calendars)[@locale]
49
+ @resource = expand(@resource, @resource)
49
50
 
50
51
  @resource[:calendars].each_pair do |calendar_type, options|
51
52
  next if calendar_type == DEFAULT_CALENDAR_TYPE
@@ -78,6 +79,10 @@ module TwitterCldr
78
79
  def pattern_for(resource)
79
80
  resource.is_a?(Hash) ? resource[:pattern] : resource
80
81
  end
82
+
83
+ def path_map
84
+ PATH_MAP
85
+ end
81
86
  end
82
87
  end
83
88
  end
@@ -10,7 +10,7 @@ module TwitterCldr
10
10
  class << self
11
11
 
12
12
  def to_char(code_point)
13
- [code_point.upcase.hex].pack('U*')
13
+ [code_point.hex].pack('U*')
14
14
  end
15
15
 
16
16
  def from_char(char)
@@ -4,5 +4,5 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- VERSION = "1.4.1"
8
- end
7
+ VERSION = "1.5.0"
8
+ end