twitter_cldr 2.4.3 → 3.0.0.beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +3 -2
- data/README.md +6 -6
- data/Rakefile +8 -0
- data/lib/twitter_cldr/collation/collator.rb +4 -2
- data/lib/twitter_cldr/collation/sort_key_builder.rb +20 -8
- data/lib/twitter_cldr/normalization.rb +7 -6
- data/lib/twitter_cldr/normalization/base.rb +10 -1
- data/lib/twitter_cldr/normalization/hangul.rb +36 -25
- data/lib/twitter_cldr/normalization/nfkc.rb +7 -1
- data/lib/twitter_cldr/normalization/nfkd.rb +8 -5
- data/lib/twitter_cldr/normalization/quick_check.rb +41 -0
- data/lib/twitter_cldr/resources.rb +1 -0
- data/lib/twitter_cldr/resources/normalization_quick_check_importer.rb +86 -0
- data/lib/twitter_cldr/shared/code_point.rb +41 -15
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/custom/locales/en-GB/units.yml +1 -1
- data/resources/unicode_data/nfc_quick_check.yml +293 -0
- data/resources/unicode_data/nfd_quick_check.yml +909 -0
- data/resources/unicode_data/nfkc_quick_check.yml +989 -0
- data/resources/unicode_data/nfkd_quick_check.yml +1537 -0
- data/spec/collation/collator_spec.rb +19 -5
- data/spec/collation/sort_key_builder_spec.rb +31 -9
- data/spec/normalization/normalization_spec.rb +4 -0
- data/spec/shared/code_point_spec.rb +9 -4
- data/spec/utils/yaml/yaml_spec.rb +52 -63
- data/twitter_cldr.gemspec +1 -0
- metadata +171 -151
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 937a3b56e72068594f42c3c09d38da78c33e2a2c
|
4
|
+
data.tar.gz: d2f86005a0d9c50ba0ac90f4553e779544bd4ddf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eab5fae457ea2a42ac4a9943763eadda90c9505874f454a6fcf35b94af94f11aa09197d087965e3b06f1e1dffe7d389a59fb3c09c9792369d004ac35590d132d
|
7
|
+
data.tar.gz: 803107aec40257f79a901a02e1252e58b80f539590053641b579442d142d82a5ab78b4a0f6a0e298d4c2cf3512c29ac2dced848410b5c4ae0d6ef4a33d4ec0eb
|
data/History.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
-
==
|
1
|
+
== 3.0.0
|
2
2
|
|
3
|
-
*
|
3
|
+
* Adding maximum_level option to SortKeyBuilder to limit the size of collation sort keys (@jrochkind).
|
4
|
+
* Significant performance enhancements for normalization, estimated ~70% speed improvement.
|
4
5
|
|
5
6
|
== 2.4.2
|
6
7
|
|
data/README.md
CHANGED
@@ -86,7 +86,7 @@ In addition to formatting regular decimals, TwitterCLDR supports short and long
|
|
86
86
|
|
87
87
|
### Dates and Times
|
88
88
|
|
89
|
-
`
|
89
|
+
`Time`, and `DateTime` objects are supported. `Date` objects are supported transiently:
|
90
90
|
|
91
91
|
```ruby
|
92
92
|
DateTime.now.localize(:es).to_full_s # "lunes, 12 de diciembre de 2011 21:44:57 UTC -08:00"
|
@@ -94,15 +94,15 @@ DateTime.now.localize(:es).to_long_s # "12 de diciembre de 2011 21
|
|
94
94
|
DateTime.now.localize(:es).to_medium_s # "12/12/2011 21:44:57"
|
95
95
|
DateTime.now.localize(:es).to_short_s # "12/12/11 21:44"
|
96
96
|
|
97
|
-
Date.today.localize(:es).to_full_s # "lunes 12 de diciembre de 2011"
|
98
|
-
Date.today.localize(:es).to_long_s # "12 de diciembre de 2011"
|
99
|
-
Date.today.localize(:es).to_medium_s # "12/12/2011"
|
100
|
-
Date.today.localize(:es).to_short_s # "12/12/11"
|
101
|
-
|
102
97
|
Time.now.localize(:es).to_full_s # "21:44:57 UTC -0800"
|
103
98
|
Time.now.localize(:es).to_long_s # "21:44:57 UTC"
|
104
99
|
Time.now.localize(:es).to_medium_s # "21:44:57"
|
105
100
|
Time.now.localize(:es).to_short_s # "21:44"
|
101
|
+
|
102
|
+
DateTime.now.localize(:es).to_date.to_full_s # "lunes 12 de diciembre de 2011"
|
103
|
+
DateTime.now.localize(:es).to_date.to_long_s # "12 de diciembre de 2011"
|
104
|
+
DateTime.now.localize(:es).to_date.to_medium_s # "12/12/2011"
|
105
|
+
DateTime.now.localize(:es).to_date.to_short_s # "12/12/11"
|
106
106
|
```
|
107
107
|
|
108
108
|
The default CLDR data set only includes 4 date formats, full, long, medium, and short. See below for a list of additional formats.
|
data/Rakefile
CHANGED
@@ -133,4 +133,12 @@ namespace :update do
|
|
133
133
|
task :canonical_compositions do
|
134
134
|
TwitterCldr::Resources::CanonicalCompositionsUpdater.new('./resources/unicode_data').update
|
135
135
|
end
|
136
|
+
|
137
|
+
desc 'Import normalization quick check data'
|
138
|
+
task :normalization_quick_check do
|
139
|
+
TwitterCldr::Resources::NormalizationQuickCheckImporter.new(
|
140
|
+
'./vendor',
|
141
|
+
'./resources/unicode_data'
|
142
|
+
).import
|
143
|
+
end
|
136
144
|
end
|
@@ -32,8 +32,10 @@ module TwitterCldr
|
|
32
32
|
string_a == string_b ? 0 : get_sort_key(string_a) <=> get_sort_key(string_b)
|
33
33
|
end
|
34
34
|
|
35
|
-
|
36
|
-
|
35
|
+
# Second arg options, supports an option :maximum_level, to
|
36
|
+
# pass on to SortKeyBuilder :maximum_level.
|
37
|
+
def get_sort_key(string_or_code_points, method_options = {})
|
38
|
+
TwitterCldr::Collation::SortKeyBuilder.build(get_collation_elements(string_or_code_points), :case_first => @options[:case_first], :maximum_level => method_options[:maximum_level])
|
37
39
|
end
|
38
40
|
|
39
41
|
def get_collation_elements(string_or_code_points)
|
@@ -17,7 +17,8 @@ module TwitterCldr
|
|
17
17
|
|
18
18
|
LEVEL_SEPARATOR = 1 # separate levels in a sort key '01' bytes
|
19
19
|
|
20
|
-
VALID_CASE_FIRST_OPTIONS
|
20
|
+
VALID_CASE_FIRST_OPTIONS = [nil, :lower, :upper]
|
21
|
+
VALID_MAXIMUM_LEVEL_OPTIONS = [nil, 1, 2, 3]
|
21
22
|
|
22
23
|
attr_reader :collation_elements, :case_first
|
23
24
|
|
@@ -26,25 +27,36 @@ module TwitterCldr
|
|
26
27
|
# Arguments:
|
27
28
|
#
|
28
29
|
# collation_elements - an array of collation elements, represented as arrays of integer weights.
|
29
|
-
#
|
30
|
+
# options - hash of options:
|
31
|
+
# case_first - optional case-first sorting order setting: :upper, :lower, nil (discard case bits).
|
32
|
+
# maximum_level - only append weights to maximum level specified (1 or 2), can be useful for searching/matching applications
|
30
33
|
#
|
31
34
|
# An instance of the class is created only to prevent passing of @collation_elements and @bytes_array from one
|
32
35
|
# method into another while forming the sort key.
|
33
36
|
#
|
34
|
-
def self.build(collation_elements,
|
35
|
-
new(collation_elements,
|
37
|
+
def self.build(collation_elements, options = nil)
|
38
|
+
new(collation_elements, options).bytes_array
|
36
39
|
end
|
37
40
|
|
38
41
|
# Arguments:
|
39
42
|
#
|
40
43
|
# collation_elements - an array of collation elements, represented as arrays of integer weights.
|
41
|
-
#
|
44
|
+
# options - hash of options:
|
45
|
+
# case_first - optional case-first sorting order setting: :upper, :lower, nil (discard case bits).
|
46
|
+
# maximum_level - only append weights to maximum level specified (1 or 2), can be useful for searching/matching applications
|
42
47
|
#
|
43
|
-
def initialize(collation_elements,
|
48
|
+
def initialize(collation_elements, options = {})
|
49
|
+
raise ArgumentError, "second argument should be an options hash, not `#{options}`. Do you mean `:case_first => #{options}`?" unless options.kind_of? Hash
|
50
|
+
|
51
|
+
case_first = options[:case_first]
|
44
52
|
raise ArgumentError, "invalid case-first options '#{case_first.inspect}'" unless VALID_CASE_FIRST_OPTIONS.include?(case_first)
|
45
53
|
|
54
|
+
maximum_level = options[:maximum_level]
|
55
|
+
raise ArgumentError, "invalid maximum_level option 'options[:maximum_level]'" unless VALID_MAXIMUM_LEVEL_OPTIONS.include?(maximum_level)
|
56
|
+
|
46
57
|
@collation_elements = collation_elements
|
47
58
|
@case_first = case_first
|
59
|
+
@maximum_level = maximum_level
|
48
60
|
|
49
61
|
init_tertiary_constants
|
50
62
|
end
|
@@ -59,8 +71,8 @@ module TwitterCldr
|
|
59
71
|
@bytes_array = []
|
60
72
|
|
61
73
|
append_primary_bytes
|
62
|
-
append_secondary_bytes
|
63
|
-
append_tertiary_bytes
|
74
|
+
append_secondary_bytes unless @maximum_level && (@maximum_level < 2)
|
75
|
+
append_tertiary_bytes unless @maximum_level && (@maximum_level < 3)
|
64
76
|
|
65
77
|
@bytes_array
|
66
78
|
end
|
@@ -5,12 +5,13 @@
|
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Normalization
|
8
|
-
autoload :Base,
|
9
|
-
autoload :Hangul,
|
10
|
-
autoload :
|
11
|
-
autoload :
|
12
|
-
autoload :
|
13
|
-
autoload :
|
8
|
+
autoload :Base, 'twitter_cldr/normalization/base'
|
9
|
+
autoload :Hangul, 'twitter_cldr/normalization/hangul'
|
10
|
+
autoload :QuickCheck, 'twitter_cldr/normalization/quick_check'
|
11
|
+
autoload :NFC, 'twitter_cldr/normalization/nfc'
|
12
|
+
autoload :NFD, 'twitter_cldr/normalization/nfd'
|
13
|
+
autoload :NFKC, 'twitter_cldr/normalization/nfkc'
|
14
|
+
autoload :NFKD, 'twitter_cldr/normalization/nfkd'
|
14
15
|
|
15
16
|
VALID_NORMALIZERS = [:NFD, :NFKD, :NFC, :NFKC]
|
16
17
|
DEFAULT_NORMALIZER = :NFD
|
@@ -3,6 +3,8 @@
|
|
3
3
|
# Copyright 2012 Twitter, Inc
|
4
4
|
# http://www.apache.org/licenses/LICENSE-2.0
|
5
5
|
|
6
|
+
require 'hamster'
|
7
|
+
|
6
8
|
module TwitterCldr
|
7
9
|
module Normalization
|
8
10
|
class Base
|
@@ -16,11 +18,18 @@ module TwitterCldr
|
|
16
18
|
end
|
17
19
|
|
18
20
|
def combining_class_for(code_point)
|
19
|
-
|
21
|
+
combining_class_cache[code_point] ||=
|
22
|
+
TwitterCldr::Shared::CodePoint.find(code_point).combining_class.to_i
|
20
23
|
rescue NoMethodError
|
21
24
|
0
|
22
25
|
end
|
23
26
|
|
27
|
+
protected
|
28
|
+
|
29
|
+
def combining_class_cache
|
30
|
+
@combining_class_cache ||= {}
|
31
|
+
end
|
32
|
+
|
24
33
|
end
|
25
34
|
|
26
35
|
end
|
@@ -9,6 +9,23 @@ module TwitterCldr
|
|
9
9
|
|
10
10
|
class << self
|
11
11
|
|
12
|
+
SBASE = 0xAC00
|
13
|
+
LBASE = 0x1100
|
14
|
+
VBASE = 0x1161
|
15
|
+
TBASE = 0x11A7
|
16
|
+
|
17
|
+
LCOUNT = 19
|
18
|
+
VCOUNT = 21
|
19
|
+
TCOUNT = 28
|
20
|
+
|
21
|
+
NCOUNT = VCOUNT * TCOUNT # 588
|
22
|
+
SCOUNT = LCOUNT * NCOUNT # 11172
|
23
|
+
|
24
|
+
LLIMIT = LBASE + LCOUNT # 0x1113 = 4371
|
25
|
+
VLIMIT = VBASE + VCOUNT # 0x1176 = 4470
|
26
|
+
TLIMIT = TBASE + TCOUNT # 0x11C3 = 4547
|
27
|
+
SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
|
28
|
+
|
12
29
|
# Special composition for Hangul syllables. Documented in Section 3.12 at
|
13
30
|
# http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
|
14
31
|
#
|
@@ -24,45 +41,39 @@ module TwitterCldr
|
|
24
41
|
# Also see http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm#Hangul_Implicit_CEs
|
25
42
|
#
|
26
43
|
def decompose(code_point)
|
27
|
-
|
44
|
+
decomposition_cache[code_point] ||= begin
|
45
|
+
l = code_point - SBASE
|
28
46
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
47
|
+
t = l % TCOUNT
|
48
|
+
l /= TCOUNT
|
49
|
+
v = l % VCOUNT
|
50
|
+
l /= VCOUNT
|
33
51
|
|
34
|
-
|
52
|
+
result = []
|
35
53
|
|
36
|
-
|
37
|
-
|
38
|
-
|
54
|
+
result << LBASE + l
|
55
|
+
result << VBASE + v
|
56
|
+
result << TBASE + t if t > 0
|
39
57
|
|
40
|
-
|
58
|
+
result
|
59
|
+
end
|
41
60
|
end
|
42
61
|
|
43
62
|
def hangul_syllable?(code_point)
|
44
63
|
(SBASE...SLIMIT).include?(code_point)
|
45
64
|
end
|
46
65
|
|
47
|
-
|
48
|
-
LBASE = 0x1100
|
49
|
-
VBASE = 0x1161
|
50
|
-
TBASE = 0x11A7
|
66
|
+
private
|
51
67
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
NCOUNT = VCOUNT * TCOUNT # 588
|
57
|
-
SCOUNT = LCOUNT * NCOUNT # 11172
|
68
|
+
def syllable_cache
|
69
|
+
@syllable_cache ||= {}
|
70
|
+
end
|
58
71
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
SLIMIT = SBASE + SCOUNT # 0xD7A4 = 55204
|
72
|
+
def decomposition_cache
|
73
|
+
@decomposition_cache ||= {}
|
74
|
+
end
|
63
75
|
|
64
76
|
end
|
65
|
-
|
66
77
|
end
|
67
78
|
end
|
68
79
|
end
|
@@ -13,6 +13,12 @@ module TwitterCldr
|
|
13
13
|
|
14
14
|
class << self
|
15
15
|
|
16
|
+
VALID_HANGUL_SEQUENCES = [
|
17
|
+
[0, :lparts],
|
18
|
+
[1, :vparts],
|
19
|
+
[2, :tparts]
|
20
|
+
]
|
21
|
+
|
16
22
|
def normalize_code_points(code_points)
|
17
23
|
compose(TwitterCldr::Normalization::NFKD.normalize_code_points(code_points))
|
18
24
|
end
|
@@ -49,7 +55,7 @@ module TwitterCldr
|
|
49
55
|
end
|
50
56
|
|
51
57
|
def valid_hangul_sequence?(buffer_size, hangul_type)
|
52
|
-
|
58
|
+
VALID_HANGUL_SEQUENCES.include?([buffer_size, hangul_type])
|
53
59
|
end
|
54
60
|
|
55
61
|
def compose_hangul(code_points)
|
@@ -16,7 +16,6 @@ module TwitterCldr
|
|
16
16
|
#
|
17
17
|
class NFKD < Base
|
18
18
|
|
19
|
-
|
20
19
|
class << self
|
21
20
|
|
22
21
|
def normalize_code_points(code_points)
|
@@ -26,14 +25,19 @@ module TwitterCldr
|
|
26
25
|
protected
|
27
26
|
|
28
27
|
def decompose(code_points)
|
29
|
-
code_points.
|
28
|
+
code_points.inject(Hamster.list) do |ret, code_point|
|
29
|
+
decompose_recursively(code_point).each do |decomp_cp|
|
30
|
+
ret = ret.cons(decomp_cp)
|
31
|
+
end
|
32
|
+
ret
|
33
|
+
end.reverse.to_a
|
30
34
|
end
|
31
35
|
|
32
36
|
# Recursively decomposes a given code point with the values in its Decomposition Mapping property.
|
33
37
|
#
|
34
38
|
def decompose_recursively(code_point)
|
35
39
|
unicode_data = TwitterCldr::Shared::CodePoint.find(code_point)
|
36
|
-
return code_point unless unicode_data
|
40
|
+
return [code_point] unless unicode_data
|
37
41
|
|
38
42
|
if unicode_data.hangul_type == :compositions
|
39
43
|
decompose_hangul(code_point)
|
@@ -48,7 +52,7 @@ module TwitterCldr
|
|
48
52
|
if decompose?(unicode_data)
|
49
53
|
unicode_data.decomposition.map { |code_point| decompose_recursively(code_point) }.flatten
|
50
54
|
else
|
51
|
-
unicode_data.code_point
|
55
|
+
[unicode_data.code_point]
|
52
56
|
end
|
53
57
|
end
|
54
58
|
|
@@ -82,7 +86,6 @@ module TwitterCldr
|
|
82
86
|
end
|
83
87
|
|
84
88
|
result.concat(stable_sort(accum)) unless accum.empty?
|
85
|
-
|
86
89
|
result.map { |cp_with_cc| cp_with_cc[0] }
|
87
90
|
end
|
88
91
|
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Normalization
|
8
|
+
|
9
|
+
# This class isn't used anywhere because it was found that it negatively
|
10
|
+
# affects normalization performance.
|
11
|
+
module QuickCheck
|
12
|
+
|
13
|
+
class << self
|
14
|
+
|
15
|
+
def requires_normalization?(code_point, algorithm)
|
16
|
+
key = TwitterCldr::Utils.compute_cache_key(code_point, algorithm)
|
17
|
+
requires_cache[key] = if requires_cache[key].nil?
|
18
|
+
resource_for(algorithm).any? do |range|
|
19
|
+
range.include?(code_point)
|
20
|
+
end
|
21
|
+
else
|
22
|
+
requires_cache[key]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def requires_cache
|
29
|
+
@requires_cache ||= {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def resource_for(algorithm)
|
33
|
+
@resources ||= {}
|
34
|
+
@resources[algorithm] ||= TwitterCldr.get_resource("unicode_data", "#{algorithm.to_s.downcase}_quick_check")
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -18,5 +18,6 @@ module TwitterCldr
|
|
18
18
|
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
|
19
19
|
autoload :UnicodeDataImporter, 'twitter_cldr/resources/unicode_data_importer'
|
20
20
|
autoload :BidiTestImporter, 'twitter_cldr/resources/bidi_test_importer'
|
21
|
+
autoload :NormalizationQuickCheckImporter, 'twitter_cldr/resources/normalization_quick_check_importer'
|
21
22
|
end
|
22
23
|
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'twitter_cldr/resources/download'
|
7
|
+
|
8
|
+
module TwitterCldr
|
9
|
+
module Resources
|
10
|
+
|
11
|
+
class NormalizationQuickCheckImporter
|
12
|
+
|
13
|
+
PROPS_FILE_URL = "ftp://ftp.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt"
|
14
|
+
|
15
|
+
# Arguments:
|
16
|
+
#
|
17
|
+
# input_path - path to a directory containing DerivedNormalizationProps.txt
|
18
|
+
# output_path - output directory for imported YAML files
|
19
|
+
#
|
20
|
+
def initialize(input_path, output_path)
|
21
|
+
@input_path = input_path
|
22
|
+
@output_path = output_path
|
23
|
+
end
|
24
|
+
|
25
|
+
def import
|
26
|
+
parse_props_file.each_pair do |algorithm, code_point_list|
|
27
|
+
File.open(File.join(@output_path, "#{algorithm.downcase}_quick_check.yml"), "w+") do |f|
|
28
|
+
f.write(YAML.dump(rangify(partition_prop_list(code_point_list))))
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def rangify(lists)
|
36
|
+
lists.map { |list| (list.first..list.last) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def partition_prop_list(list)
|
40
|
+
last_item = 0
|
41
|
+
list.inject([]) do |ret, item|
|
42
|
+
(item - last_item == 1) ? ret[-1] << item : ret << [item]
|
43
|
+
last_item = item
|
44
|
+
ret
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_props_file
|
49
|
+
check_table = {}
|
50
|
+
cur_type = nil
|
51
|
+
|
52
|
+
File.open(props_file) do |input|
|
53
|
+
input.each_line do |line|
|
54
|
+
cur_type = nil if line =~ /=Maybe/
|
55
|
+
type = line.scan(/#\s*Property:\s*(NF[KDC]+)_Quick_Check/).flatten
|
56
|
+
|
57
|
+
if type.size > 0
|
58
|
+
cur_type = type.first
|
59
|
+
check_table[cur_type] = []
|
60
|
+
end
|
61
|
+
|
62
|
+
if check_table.size > 0 && line[0...1] != "#" && !line.strip.empty? && cur_type
|
63
|
+
start, finish = line.scan(/(\h+(\.\.\h+)?)/).first.first.split("..").map { |num| num.to_i(16) }
|
64
|
+
|
65
|
+
if finish
|
66
|
+
check_table[cur_type] += (start..finish).to_a
|
67
|
+
else
|
68
|
+
check_table[cur_type] << start
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
break if line =~ /={5,}/ && check_table.size >= 4 && check_table.all? { |key, val| val.size > 0 }
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
check_table
|
77
|
+
end
|
78
|
+
|
79
|
+
def props_file
|
80
|
+
TwitterCldr::Resources.download_if_necessary(File.join(@input_path, 'DerivedNormalizationProps.txt'), PROPS_FILE_URL)
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|