twitter_cldr 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/NOTICE +36 -2
- data/README.md +2 -2
- data/lib/twitter_cldr/collation/collator.rb +143 -0
- data/lib/twitter_cldr/collation/implicit_collation_elements.rb +188 -0
- data/lib/twitter_cldr/collation/sort_key.rb +199 -0
- data/lib/twitter_cldr/collation/trie.rb +73 -0
- data/lib/twitter_cldr/collation/trie_builder.rb +56 -0
- data/lib/twitter_cldr/collation.rb +14 -0
- data/lib/twitter_cldr/core_ext/localized_object.rb +3 -2
- data/lib/twitter_cldr/core_ext/string.rb +1 -1
- data/lib/twitter_cldr/formatters/calendars/datetime_formatter.rb +89 -72
- data/lib/twitter_cldr/normalization/base.rb +22 -0
- data/lib/twitter_cldr/normalization/hangul.rb +68 -0
- data/lib/twitter_cldr/{normalizers → normalization}/nfc.rb +2 -2
- data/lib/twitter_cldr/{normalizers → normalization}/nfd.rb +1 -1
- data/lib/twitter_cldr/{normalizers → normalization}/nfkc.rb +5 -17
- data/lib/twitter_cldr/{normalizers → normalization}/nfkd.rb +3 -18
- data/lib/twitter_cldr/normalization.rb +15 -0
- data/lib/twitter_cldr/shared/code_point.rb +5 -3
- data/lib/twitter_cldr/tokenizers/base.rb +15 -1
- data/lib/twitter_cldr/tokenizers/calendars/datetime_tokenizer.rb +6 -1
- data/lib/twitter_cldr/utils/code_points.rb +1 -1
- data/lib/twitter_cldr/version.rb +2 -2
- data/lib/twitter_cldr.rb +9 -8
- data/resources/collation/FractionalUCA_SHORT.txt +41593 -0
- data/resources/locales/af/calendars.yml +164 -0
- data/resources/locales/af/languages.yml +173 -0
- data/resources/locales/af/numbers.yml +42 -0
- data/resources/locales/af/plurals.yml +2 -0
- data/resources/locales/af/units.yml +88 -0
- data/resources/locales/ar/calendars.yml +9 -0
- data/resources/locales/ar/numbers.yml +15 -2
- data/resources/locales/ca/calendars.yml +228 -0
- data/resources/locales/ca/languages.yml +510 -0
- data/resources/locales/ca/numbers.yml +43 -0
- data/resources/locales/ca/plurals.yml +2 -0
- data/resources/locales/ca/units.yml +93 -0
- data/resources/locales/cs/calendars.yml +229 -0
- data/resources/locales/cs/languages.yml +471 -0
- data/resources/locales/cs/numbers.yml +44 -0
- data/resources/locales/cs/plurals.yml +2 -0
- data/resources/locales/cs/units.yml +114 -0
- data/resources/locales/da/calendars.yml +10 -0
- data/resources/locales/da/numbers.yml +13 -0
- data/resources/locales/de/calendars.yml +9 -0
- data/resources/locales/de/numbers.yml +13 -0
- data/resources/locales/el/calendars.yml +227 -0
- data/resources/locales/el/languages.yml +519 -0
- data/resources/locales/el/numbers.yml +42 -0
- data/resources/locales/el/plurals.yml +2 -0
- data/resources/locales/el/units.yml +107 -0
- data/resources/locales/en/calendars.yml +10 -0
- data/resources/locales/en/numbers.yml +13 -0
- data/resources/locales/es/calendars.yml +9 -0
- data/resources/locales/es/numbers.yml +13 -0
- data/resources/locales/eu/calendars.yml +173 -0
- data/resources/locales/eu/languages.yml +161 -0
- data/resources/locales/eu/numbers.yml +43 -0
- data/resources/locales/eu/plurals.yml +2 -0
- data/resources/locales/eu/units.yml +91 -0
- data/resources/locales/fa/calendars.yml +10 -0
- data/resources/locales/fa/numbers.yml +13 -0
- data/resources/locales/fi/calendars.yml +10 -0
- data/resources/locales/fi/numbers.yml +14 -1
- data/resources/locales/fil/calendars.yml +8 -0
- data/resources/locales/fil/numbers.yml +13 -0
- data/resources/locales/fr/calendars.yml +9 -0
- data/resources/locales/fr/numbers.yml +14 -1
- data/resources/locales/he/calendars.yml +9 -0
- data/resources/locales/he/numbers.yml +13 -0
- data/resources/locales/hi/calendars.yml +8 -0
- data/resources/locales/hi/numbers.yml +13 -0
- data/resources/locales/hu/calendars.yml +10 -0
- data/resources/locales/hu/numbers.yml +15 -2
- data/resources/locales/id/calendars.yml +8 -0
- data/resources/locales/id/numbers.yml +16 -3
- data/resources/locales/it/calendars.yml +9 -0
- data/resources/locales/it/numbers.yml +13 -0
- data/resources/locales/ja/calendars.yml +9 -0
- data/resources/locales/ja/numbers.yml +13 -0
- data/resources/locales/ko/calendars.yml +9 -0
- data/resources/locales/ko/numbers.yml +13 -0
- data/resources/locales/ms/calendars.yml +8 -0
- data/resources/locales/ms/numbers.yml +16 -3
- data/resources/locales/nb/calendars.yml +234 -0
- data/resources/locales/{no → nb}/languages.yml +25 -4
- data/resources/locales/nb/numbers.yml +43 -0
- data/resources/locales/nb/plurals.yml +2 -0
- data/resources/locales/nb/units.yml +87 -0
- data/resources/locales/nl/calendars.yml +10 -0
- data/resources/locales/nl/numbers.yml +13 -0
- data/resources/locales/pl/calendars.yml +9 -0
- data/resources/locales/pl/numbers.yml +14 -1
- data/resources/locales/pt/calendars.yml +9 -0
- data/resources/locales/pt/numbers.yml +13 -0
- data/resources/locales/ru/calendars.yml +10 -0
- data/resources/locales/ru/numbers.yml +14 -1
- data/resources/locales/sv/calendars.yml +10 -0
- data/resources/locales/sv/numbers.yml +14 -1
- data/resources/locales/th/calendars.yml +67 -57
- data/resources/locales/th/numbers.yml +13 -0
- data/resources/locales/tr/calendars.yml +9 -0
- data/resources/locales/tr/numbers.yml +13 -0
- data/resources/locales/uk/calendars.yml +199 -0
- data/resources/locales/uk/languages.yml +519 -0
- data/resources/locales/uk/numbers.yml +45 -0
- data/resources/locales/uk/plurals.yml +2 -0
- data/resources/locales/uk/units.yml +135 -0
- data/resources/locales/ur/calendars.yml +9 -0
- data/resources/locales/ur/numbers.yml +13 -0
- data/resources/locales/zh/calendars.yml +8 -0
- data/resources/locales/zh/numbers.yml +13 -0
- data/resources/locales/zh-Hant/calendars.yml +8 -0
- data/resources/locales/zh-Hant/numbers.yml +16 -3
- data/resources/locales/zh-Hant/plurals.yml +2 -0
- data/resources/unicode_data/hangul_blocks.yml +21 -0
- data/spec/collation/CollationTest_CLDR_NON_IGNORABLE_Short.txt +714 -0
- data/spec/collation/collation_spec.rb +93 -0
- data/spec/collation/collator_spec.rb +117 -0
- data/spec/collation/implicit_collation_elements_spec.rb +24 -0
- data/spec/collation/sort_key_spec.rb +56 -0
- data/spec/collation/trie_builder_spec.rb +114 -0
- data/spec/collation/trie_spec.rb +97 -0
- data/spec/core_ext/calendars/datetime_spec.rb +5 -0
- data/spec/core_ext/calendars_spec.rb +34 -0
- data/spec/core_ext/numbers_spec.rb +39 -0
- data/spec/core_ext/string_spec.rb +4 -4
- data/spec/formatters/calendars/datetime_formatter_spec.rb +92 -2
- data/spec/{normalizers → normalization}/NormalizationTestShort.txt +0 -0
- data/spec/{normalizers → normalization}/base_spec.rb +1 -1
- data/spec/normalization/hangul_spec.rb +42 -0
- data/spec/{normalizers → normalization}/normalization_spec.rb +15 -16
- data/spec/readme_spec.rb +2 -2
- data/spec/shared/code_point_spec.rb +42 -30
- data/spec/shared/resources_spec.rb +30 -6
- data/spec/tokenizers/base_spec.rb +17 -0
- data/spec/twitter_cldr_spec.rb +1 -1
- metadata +71 -83
- data/lib/twitter_cldr/normalizers/base.rb +0 -34
- data/lib/twitter_cldr/normalizers.rb +0 -14
- data/resources/locales/no/calendars.yml +0 -127
- data/resources/locales/no/numbers.yml +0 -29
- data/resources/locales/no/plurals.yml +0 -1
- data/resources/unicode_data/blocks_hangul.yml +0 -46
- data/spec/normalizers/NormalizationTest.txt +0 -18431
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Collation
|
8
|
+
|
9
|
+
# Builds a collation elements Trie from the file containing a fractional collation elements table.
|
10
|
+
#
|
11
|
+
class TrieBuilder
|
12
|
+
|
13
|
+
FRACTIONAL_UCA_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
|
14
|
+
|
15
|
+
def self.load_trie(file_path)
|
16
|
+
new(file_path).build
|
17
|
+
end
|
18
|
+
|
19
|
+
def initialize(resource)
|
20
|
+
@file_path = File.join(TwitterCldr::RESOURCES_DIR, resource)
|
21
|
+
end
|
22
|
+
|
23
|
+
def build
|
24
|
+
parse_trie(load_collation_elements_table)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def parse_trie(table)
|
30
|
+
trie = TwitterCldr::Collation::Trie.new
|
31
|
+
|
32
|
+
table.lines.each do |line|
|
33
|
+
trie.add(parse_code_points($1), parse_collation_element($2)) if FRACTIONAL_UCA_REGEXP =~ line
|
34
|
+
end
|
35
|
+
|
36
|
+
trie
|
37
|
+
end
|
38
|
+
|
39
|
+
def load_collation_elements_table
|
40
|
+
open(@file_path, 'r')
|
41
|
+
end
|
42
|
+
|
43
|
+
def parse_code_points(string)
|
44
|
+
string.split.map { |cp| cp.to_i(16) }
|
45
|
+
end
|
46
|
+
|
47
|
+
def parse_collation_element(string)
|
48
|
+
string.scan(/\[.*?\]/).map do |match|
|
49
|
+
match[1..-2].gsub(/\s/, '').split(',', -1).map { |bytes| bytes.to_i(16) }
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Collation
|
8
|
+
autoload :Collator, 'twitter_cldr/collation/collator'
|
9
|
+
autoload :ImplicitCollationElements, 'twitter_cldr/collation/implicit_collation_elements'
|
10
|
+
autoload :SortKey, 'twitter_cldr/collation/sort_key'
|
11
|
+
autoload :Trie, 'twitter_cldr/collation/trie'
|
12
|
+
autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
|
13
|
+
end
|
14
|
+
end
|
@@ -9,10 +9,11 @@ module TwitterCldr
|
|
9
9
|
|
10
10
|
def initialize(obj, locale, options = {})
|
11
11
|
@base_obj = obj
|
12
|
-
@locale = locale
|
12
|
+
@locale = TwitterCldr.convert_locale(locale)
|
13
|
+
@locale = TwitterCldr::DEFAULT_LOCALE unless TwitterCldr.supported_locale?(@locale)
|
13
14
|
|
14
15
|
options = options.dup
|
15
|
-
options[:locale]
|
16
|
+
options[:locale] = @locale
|
16
17
|
|
17
18
|
@formatter = formatter_const.new(options) if formatter_const
|
18
19
|
end
|
@@ -32,7 +32,7 @@ module TwitterCldr
|
|
32
32
|
options[:using] ||= :NFD
|
33
33
|
|
34
34
|
if VALID_NORMALIZERS.include?(options[:using])
|
35
|
-
normalizer_const = TwitterCldr::
|
35
|
+
normalizer_const = TwitterCldr::Normalization.const_get(options[:using])
|
36
36
|
LocalizedString.new(normalizer_const.normalize(@base_obj), @locale)
|
37
37
|
else
|
38
38
|
raise ArgumentError.new("Invalid normalization form specified with :using option. Choices are [#{VALID_NORMALIZERS.map(&:to_s).join(", ")}]")
|
@@ -51,7 +51,13 @@ module TwitterCldr
|
|
51
51
|
protected
|
52
52
|
|
53
53
|
def era(date, pattern, length)
|
54
|
-
|
54
|
+
choices = case length
|
55
|
+
when 1..3
|
56
|
+
@tokenizer.calendar[:eras][:abbr]
|
57
|
+
else
|
58
|
+
@tokenizer.calendar[:eras][:name]
|
59
|
+
end
|
60
|
+
choices[date.year < 0 ? 0 : 1]
|
55
61
|
end
|
56
62
|
|
57
63
|
def year(date, pattern, length)
|
@@ -72,100 +78,111 @@ module TwitterCldr
|
|
72
78
|
def quarter(date, pattern, length)
|
73
79
|
quarter = (date.month.to_i - 1) / 3 + 1
|
74
80
|
case length
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
81
|
+
when 1
|
82
|
+
quarter.to_s
|
83
|
+
when 2
|
84
|
+
quarter.to_s.rjust(length, '0')
|
85
|
+
when 3
|
86
|
+
@tokenizer.calendar[:quarters][:format][:abbreviated][quarter]
|
87
|
+
when 4
|
88
|
+
@tokenizer.calendar[:quarters][:format][:wide][quarter]
|
83
89
|
end
|
84
90
|
end
|
85
91
|
|
86
92
|
def quarter_stand_alone(date, pattern, length)
|
87
93
|
quarter = (date.month.to_i - 1) / 3 + 1
|
88
94
|
case length
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
95
|
+
when 1
|
96
|
+
quarter.to_s
|
97
|
+
when 2
|
98
|
+
quarter.to_s.rjust(length, '0')
|
99
|
+
when 3
|
100
|
+
raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
|
101
|
+
# @tokenizer.calendar[:quarters][:'stand-alone'][:abbreviated][key]
|
102
|
+
when 4
|
103
|
+
raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
|
104
|
+
# @tokenizer.calendar[:quarters][:'stand-alone'][:wide][key]
|
105
|
+
when 5
|
106
|
+
@tokenizer.calendar[:quarters][:'stand-alone'][:narrow][quarter]
|
101
107
|
end
|
102
108
|
end
|
103
109
|
|
104
110
|
def month(date, pattern, length)
|
105
111
|
case length
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
112
|
+
when 1
|
113
|
+
date.month.to_s
|
114
|
+
when 2
|
115
|
+
date.month.to_s.rjust(length, '0')
|
116
|
+
when 3
|
117
|
+
@tokenizer.calendar[:months][:format][:abbreviated][date.month]
|
118
|
+
when 4
|
119
|
+
@tokenizer.calendar[:months][:format][:wide][date.month]
|
120
|
+
when 5
|
121
|
+
raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
|
122
|
+
# @tokenizer.calendar[:months][:format][:narrow][date.month]
|
123
|
+
else
|
124
|
+
# raise unknown date format
|
119
125
|
end
|
120
126
|
end
|
121
127
|
|
122
128
|
def month_stand_alone(date, pattern, length)
|
123
129
|
case length
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
130
|
+
when 1
|
131
|
+
date.month.to_s
|
132
|
+
when 2
|
133
|
+
date.month.to_s.rjust(length, '0')
|
134
|
+
when 3
|
135
|
+
raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
|
136
|
+
@tokenizer.calendar[:months][:'stand-alone'][:abbreviated][date.month]
|
137
|
+
when 4
|
138
|
+
raise NotImplementedError, 'requires cldr\'s "multiple inheritance"'
|
139
|
+
@tokenizer.calendar[:months][:'stand-alone'][:wide][date.month]
|
140
|
+
when 5
|
141
|
+
@tokenizer.calendar[:months][:'stand-alone'][:narrow][date.month]
|
142
|
+
else
|
143
|
+
# raise unknown date format
|
138
144
|
end
|
139
145
|
end
|
140
146
|
|
141
147
|
def day(date, pattern, length)
|
142
148
|
case length
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
149
|
+
when 1
|
150
|
+
date.day.to_s
|
151
|
+
when 2
|
152
|
+
date.day.to_s.rjust(length, '0')
|
147
153
|
end
|
148
154
|
end
|
149
155
|
|
150
156
|
def weekday(date, pattern, length)
|
151
157
|
key = WEEKDAY_KEYS[date.wday]
|
152
158
|
case length
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
+
when 1..3
|
160
|
+
@tokenizer.calendar[:days][:format][:abbreviated][key]
|
161
|
+
when 4
|
162
|
+
@tokenizer.calendar[:days][:format][:wide][key]
|
163
|
+
when 5
|
164
|
+
@tokenizer.calendar[:days][:'stand-alone'][:narrow][key]
|
159
165
|
end
|
160
166
|
end
|
161
167
|
|
162
168
|
def weekday_local(date, pattern, length)
|
163
169
|
# "Like E except adds a numeric value depending on the local starting day of the week"
|
164
|
-
|
170
|
+
# CLDR does not contain data as to which day is the first day of the week, so we will assume Monday (Ruby default)
|
171
|
+
case length
|
172
|
+
when 1..2
|
173
|
+
date.cwday.to_s
|
174
|
+
else
|
175
|
+
weekday(date, pattern, length)
|
176
|
+
end
|
165
177
|
end
|
166
178
|
|
167
179
|
def weekday_local_stand_alone(date, pattern, length)
|
168
|
-
|
180
|
+
case length
|
181
|
+
when 1
|
182
|
+
weekday_local(date, pattern, length)
|
183
|
+
else
|
184
|
+
weekday(date, pattern, length)
|
185
|
+
end
|
169
186
|
end
|
170
187
|
|
171
188
|
def period(time, pattern, length)
|
@@ -178,14 +195,14 @@ module TwitterCldr
|
|
178
195
|
def hour(time, pattern, length)
|
179
196
|
hour = time.hour
|
180
197
|
hour = case pattern[0, 1]
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
198
|
+
when 'h' # [1-12]
|
199
|
+
hour > 12 ? (hour - 12) : (hour == 0 ? 12 : hour)
|
200
|
+
when 'H' # [0-23]
|
201
|
+
hour
|
202
|
+
when 'K' # [0-11]
|
203
|
+
hour > 11 ? hour - 12 : hour
|
204
|
+
when 'k' # [1-24]
|
205
|
+
hour == 0 ? 24 : hour
|
189
206
|
end
|
190
207
|
length == 1 ? hour.to_s : hour.to_s.rjust(length, '0')
|
191
208
|
end
|
@@ -205,10 +222,10 @@ module TwitterCldr
|
|
205
222
|
|
206
223
|
def timezone(time, pattern, length)
|
207
224
|
case length
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
225
|
+
when 1..3
|
226
|
+
time.zone
|
227
|
+
else
|
228
|
+
"UTC #{time.strftime("%z")}"
|
212
229
|
end
|
213
230
|
end
|
214
231
|
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Normalization
|
8
|
+
class Base
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
def combining_class_for(code_point)
|
13
|
+
TwitterCldr::Shared::CodePoint.for_hex(code_point).combining_class.to_i
|
14
|
+
rescue NoMethodError
|
15
|
+
0
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Normalization
|
8
|
+
module Hangul
|
9
|
+
|
10
|
+
class << self
|
11
|
+
|
12
|
+
# Special composition for Hangul syllables. Documented in Section 3.12 at
|
13
|
+
# http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
14
|
+
#
|
15
|
+
def compose(code_points)
|
16
|
+
l = code_points.first - LBASE
|
17
|
+
v = code_points[1] - VBASE
|
18
|
+
t = code_points[2] ? code_points[2] - TBASE : 0 # T part may be missing, that's ok
|
19
|
+
|
20
|
+
SBASE + l * NCOUNT + v * TCOUNT + t
|
21
|
+
end
|
22
|
+
|
23
|
+
# Special decomposition for Hangul syllables. Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
24
|
+
# Also see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm#Hangul_Implicit_CEs
|
25
|
+
#
|
26
|
+
def decompose(code_point)
|
27
|
+
l = code_point - SBASE
|
28
|
+
|
29
|
+
t = l % TCOUNT
|
30
|
+
l /= TCOUNT
|
31
|
+
v = l % VCOUNT
|
32
|
+
l /= VCOUNT
|
33
|
+
|
34
|
+
result = []
|
35
|
+
|
36
|
+
result << LBASE + l
|
37
|
+
result << VBASE + v
|
38
|
+
result << TBASE + t if t > 0
|
39
|
+
|
40
|
+
result
|
41
|
+
end
|
42
|
+
|
43
|
+
def hangul_syllable?(code_point)
|
44
|
+
(SBASE...SLIMIT).include?(code_point)
|
45
|
+
end
|
46
|
+
|
47
|
+
SBASE = 0xAC00
|
48
|
+
LBASE = 0x1100
|
49
|
+
VBASE = 0x1161
|
50
|
+
TBASE = 0x11A7
|
51
|
+
|
52
|
+
LCOUNT = 19
|
53
|
+
VCOUNT = 21
|
54
|
+
TCOUNT = 28
|
55
|
+
|
56
|
+
NCOUNT = VCOUNT * TCOUNT # 588
|
57
|
+
SCOUNT = LCOUNT * NCOUNT # 11172
|
58
|
+
|
59
|
+
LLIMIT = LBASE + LCOUNT # 0x1113 = 4371
|
60
|
+
VLIMIT = VBASE + VCOUNT # 0x1176 = 4470
|
61
|
+
TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547
|
62
|
+
SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -4,7 +4,7 @@
|
|
4
4
|
# http://www.apache.org/licenses/LICENSE-2.0
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
|
-
module
|
7
|
+
module Normalization
|
8
8
|
|
9
9
|
# Implements normalization of a Unicode string to Normalization Form C (NFC).
|
10
10
|
# This normalization includes canonical decomposition followed by canonical composition.
|
@@ -14,7 +14,7 @@ module TwitterCldr
|
|
14
14
|
class << self
|
15
15
|
|
16
16
|
def normalize_code_points(code_points)
|
17
|
-
compose(TwitterCldr::
|
17
|
+
compose(TwitterCldr::Normalization::NFD.normalize_code_points(code_points))
|
18
18
|
end
|
19
19
|
|
20
20
|
end
|
@@ -4,7 +4,7 @@
|
|
4
4
|
# http://www.apache.org/licenses/LICENSE-2.0
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
|
-
module
|
7
|
+
module Normalization
|
8
8
|
|
9
9
|
# Implements normalization of a Unicode string to Normalization Form KC (NFKC).
|
10
10
|
# This normalization form includes compatibility decomposition followed by compatibility composition.
|
@@ -20,7 +20,7 @@ module TwitterCldr
|
|
20
20
|
end
|
21
21
|
|
22
22
|
def normalize_code_points(code_points)
|
23
|
-
compose(TwitterCldr::
|
23
|
+
compose(TwitterCldr::Normalization::NFKD.normalize_code_points(code_points))
|
24
24
|
end
|
25
25
|
|
26
26
|
protected
|
@@ -44,7 +44,7 @@ module TwitterCldr
|
|
44
44
|
end
|
45
45
|
|
46
46
|
if hangul_code_points.size > 1 && !next_hangul_type
|
47
|
-
hangul_code_points.size
|
47
|
+
final.pop(hangul_code_points.size)
|
48
48
|
final << compose_hangul(hangul_code_points)
|
49
49
|
hangul_code_points.clear
|
50
50
|
end
|
@@ -55,23 +55,11 @@ module TwitterCldr
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def valid_hangul_sequence?(buffer_size, hangul_type)
|
58
|
-
|
59
|
-
when [0, :lparts], [1, :vparts], [2, :tparts]
|
60
|
-
true
|
61
|
-
else
|
62
|
-
false
|
63
|
-
end
|
58
|
+
[[0, :lparts], [1, :vparts], [2, :tparts]].include?([buffer_size, hangul_type])
|
64
59
|
end
|
65
60
|
|
66
|
-
# Special composition for Hangul syllables. Documented in Section 3.12 at
|
67
|
-
# http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
68
|
-
#
|
69
61
|
def compose_hangul(code_points)
|
70
|
-
|
71
|
-
v_index = code_points[1].hex - HANGUL_DECOMPOSITION_CONSTANTS[:VBase]
|
72
|
-
t_index = code_points[2] ? code_points[2].hex - HANGUL_DECOMPOSITION_CONSTANTS[:TBase] : 0 # tpart may be missing, that's ok
|
73
|
-
lv_index = (l_index * HANGUL_DECOMPOSITION_CONSTANTS[:NCount]) + (v_index * HANGUL_DECOMPOSITION_CONSTANTS[:TCount])
|
74
|
-
(HANGUL_DECOMPOSITION_CONSTANTS[:SBase] + lv_index + t_index).to_s(16).upcase.rjust(4, "0")
|
62
|
+
TwitterCldr::Normalization::Hangul.compose(code_points.map { |cp| cp.hex }).to_s(16).upcase.rjust(4, "0")
|
75
63
|
end
|
76
64
|
|
77
65
|
# Implements composition of Unicode code points following the guidelines here:
|
@@ -4,12 +4,12 @@
|
|
4
4
|
# http://www.apache.org/licenses/LICENSE-2.0
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
|
-
#
|
7
|
+
# Normalization module includes algorithm for Unicode normalization. Basic information on this topic can be found in the
|
8
8
|
# Unicode Standard Annex #15 "Unicode Normalization Forms" at http://www.unicode.org/reports/tr15/. More detailed
|
9
9
|
# description is given in the section "3.11 Normalization Forms" of the Unicode Standard core specification. The
|
10
10
|
# latest version at the moment (for Unicode 6.1) is available at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf.
|
11
11
|
#
|
12
|
-
module
|
12
|
+
module Normalization
|
13
13
|
|
14
14
|
# Implements normalization of a Unicode string to Normalization Form KD (NFKD).
|
15
15
|
# This normalization form includes only compatibility decomposition.
|
@@ -73,23 +73,8 @@ module TwitterCldr
|
|
73
73
|
unicode_data.decomposition.split
|
74
74
|
end
|
75
75
|
|
76
|
-
# Special decomposition for Hangul syllables. Documented in Section 3.12 at
|
77
|
-
# http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
78
|
-
#
|
79
76
|
def decompose_hangul(code_point)
|
80
|
-
|
81
|
-
|
82
|
-
l_index = s_index / HANGUL_DECOMPOSITION_CONSTANTS[:NCount]
|
83
|
-
v_index = (s_index % HANGUL_DECOMPOSITION_CONSTANTS[:NCount]) / HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
|
84
|
-
t_index = s_index % HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
|
85
|
-
|
86
|
-
result = []
|
87
|
-
|
88
|
-
result << (HANGUL_DECOMPOSITION_CONSTANTS[:LBase] + l_index).to_s(16).upcase
|
89
|
-
result << (HANGUL_DECOMPOSITION_CONSTANTS[:VBase] + v_index).to_s(16).upcase
|
90
|
-
result << (HANGUL_DECOMPOSITION_CONSTANTS[:TBase] + t_index).to_s(16).upcase if t_index > 0
|
91
|
-
|
92
|
-
result
|
77
|
+
TwitterCldr::Normalization::Hangul.decompose(code_point.hex).map { |e| e.to_s(16).upcase }
|
93
78
|
end
|
94
79
|
|
95
80
|
# Performs the Canonical Ordering Algorithm by stable sorting of every subsequence of combining code points
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Normalization
|
8
|
+
autoload :Base, 'twitter_cldr/normalization/base'
|
9
|
+
autoload :Hangul, 'twitter_cldr/normalization/hangul'
|
10
|
+
autoload :NFC, 'twitter_cldr/normalization/nfc'
|
11
|
+
autoload :NFD, 'twitter_cldr/normalization/nfd'
|
12
|
+
autoload :NFKC, 'twitter_cldr/normalization/nfkc'
|
13
|
+
autoload :NFKD, 'twitter_cldr/normalization/nfkd'
|
14
|
+
end
|
15
|
+
end
|
@@ -38,7 +38,9 @@ module TwitterCldr
|
|
38
38
|
class << self
|
39
39
|
|
40
40
|
def for_hex(code_point)
|
41
|
-
|
41
|
+
code_point = code_point.rjust(4, '0').upcase
|
42
|
+
|
43
|
+
target = get_block(code_point)
|
42
44
|
|
43
45
|
if target && target.first
|
44
46
|
block_data = TwitterCldr.get_resource(:unicode_data, target.first)
|
@@ -63,7 +65,7 @@ module TwitterCldr
|
|
63
65
|
def hangul_type(code_point)
|
64
66
|
if code_point
|
65
67
|
code_point_int = code_point.hex
|
66
|
-
[:lparts, :vparts, :tparts, :compositions
|
68
|
+
[:lparts, :vparts, :tparts, :compositions].each do |type|
|
67
69
|
hangul_blocks[type].each do |range|
|
68
70
|
return type if range.include?(code_point_int)
|
69
71
|
end
|
@@ -80,7 +82,7 @@ module TwitterCldr
|
|
80
82
|
protected
|
81
83
|
|
82
84
|
def hangul_blocks
|
83
|
-
@hangul_blocks ||= TwitterCldr.get_resource(:unicode_data, :
|
85
|
+
@hangul_blocks ||= TwitterCldr.get_resource(:unicode_data, :hangul_blocks)
|
84
86
|
end
|
85
87
|
|
86
88
|
def composition_exclusions
|
@@ -11,7 +11,7 @@ module TwitterCldr
|
|
11
11
|
attr_accessor :type, :placeholders
|
12
12
|
|
13
13
|
def initialize(options = {})
|
14
|
-
@locale = (options[:locale] || TwitterCldr::DEFAULT_LOCALE)
|
14
|
+
@locale = TwitterCldr.convert_locale(options[:locale] || TwitterCldr::DEFAULT_LOCALE)
|
15
15
|
self.init_resources
|
16
16
|
self.init_placeholders
|
17
17
|
end
|
@@ -100,6 +100,20 @@ module TwitterCldr
|
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
103
|
+
# expands all path symbols
|
104
|
+
def expand(current, haystack)
|
105
|
+
if current.is_a?(Symbol)
|
106
|
+
expand(traverse(current.to_s.split('.').map(&:to_sym), haystack), haystack)
|
107
|
+
elsif current.is_a?(Hash)
|
108
|
+
current.inject({}) do |ret, (key, val)|
|
109
|
+
ret[key] = expand(val, haystack)
|
110
|
+
ret
|
111
|
+
end
|
112
|
+
else
|
113
|
+
current
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
103
117
|
def expand_pattern(format_str, type)
|
104
118
|
if format_str.is_a?(Symbol)
|
105
119
|
# symbols mean another path was given
|
@@ -45,7 +45,8 @@ module TwitterCldr
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def init_resources
|
48
|
-
@resource = TwitterCldr.get_locale_resource(@locale, :calendars)[
|
48
|
+
@resource = TwitterCldr.get_locale_resource(@locale, :calendars)[@locale]
|
49
|
+
@resource = expand(@resource, @resource)
|
49
50
|
|
50
51
|
@resource[:calendars].each_pair do |calendar_type, options|
|
51
52
|
next if calendar_type == DEFAULT_CALENDAR_TYPE
|
@@ -78,6 +79,10 @@ module TwitterCldr
|
|
78
79
|
def pattern_for(resource)
|
79
80
|
resource.is_a?(Hash) ? resource[:pattern] : resource
|
80
81
|
end
|
82
|
+
|
83
|
+
def path_map
|
84
|
+
PATH_MAP
|
85
|
+
end
|
81
86
|
end
|
82
87
|
end
|
83
88
|
end
|
data/lib/twitter_cldr/version.rb
CHANGED