twitter_cldr 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Rakefile +8 -3
- data/lib/twitter_cldr/collation/collator.rb +9 -11
- data/lib/twitter_cldr/collation/trie.rb +8 -0
- data/lib/twitter_cldr/collation/trie_builder.rb +12 -15
- data/lib/twitter_cldr/collation/trie_loader.rb +50 -0
- data/lib/twitter_cldr/collation/trie_with_fallback.rb +6 -4
- data/lib/twitter_cldr/collation.rb +1 -0
- data/lib/twitter_cldr/resources/tailoring_importer.rb +203 -0
- data/lib/twitter_cldr/resources/tries_dumper.rb +43 -0
- data/lib/twitter_cldr/resources.rb +3 -2
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tries/af.dump +0 -0
- data/resources/collation/tries/ar.dump +0 -0
- data/resources/collation/tries/ca.dump +0 -0
- data/resources/collation/tries/cs.dump +0 -0
- data/resources/collation/tries/da.dump +0 -0
- data/resources/collation/tries/de.dump +0 -0
- data/resources/collation/tries/default.dump +0 -0
- data/resources/collation/tries/el.dump +0 -0
- data/resources/collation/tries/en.dump +0 -0
- data/resources/collation/tries/es.dump +0 -0
- data/resources/collation/tries/eu.dump +0 -0
- data/resources/collation/tries/fa.dump +0 -0
- data/resources/collation/tries/fi.dump +0 -0
- data/resources/collation/tries/fil.dump +0 -0
- data/resources/collation/tries/fr.dump +0 -0
- data/resources/collation/tries/he.dump +0 -0
- data/resources/collation/tries/hi.dump +0 -0
- data/resources/collation/tries/hu.dump +0 -0
- data/resources/collation/tries/id.dump +0 -0
- data/resources/collation/tries/it.dump +0 -0
- data/resources/collation/tries/ja.dump +0 -0
- data/resources/collation/tries/ko.dump +0 -0
- data/resources/collation/tries/ms.dump +0 -0
- data/resources/collation/tries/nb.dump +0 -0
- data/resources/collation/tries/nl.dump +0 -0
- data/resources/collation/tries/pl.dump +0 -0
- data/resources/collation/tries/pt.dump +0 -0
- data/resources/collation/tries/ru.dump +0 -0
- data/resources/collation/tries/sv.dump +0 -0
- data/resources/collation/tries/th.dump +0 -0
- data/resources/collation/tries/tr.dump +0 -0
- data/resources/collation/tries/uk.dump +0 -0
- data/resources/collation/tries/ur.dump +0 -0
- data/resources/collation/tries/zh-Hant.dump +0 -0
- data/resources/collation/tries/zh.dump +0 -0
- data/spec/collation/collation_spec.rb +4 -2
- data/spec/collation/collator_spec.rb +36 -30
- data/spec/collation/tailoring_spec.rb +3 -1
- data/spec/collation/tailoring_tests/ja.txt +6 -5
- data/spec/collation/trie_builder_spec.rb +21 -26
- data/spec/collation/trie_dumps_spec.rb +26 -0
- data/spec/collation/trie_loader_spec.rb +72 -0
- data/spec/collation/trie_spec.rb +14 -0
- data/spec/collation/trie_with_fallback_spec.rb +6 -0
- data/spec/normalization/normalization_spec.rb +2 -2
- metadata +43 -21
- data/lib/twitter_cldr/resources/import/tailoring.rb +0 -202
- data/lib/twitter_cldr/resources/import.rb +0 -12
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -11,6 +11,8 @@ require 'digest'
|
|
11
11
|
require 'rspec/core/rake_task'
|
12
12
|
require 'rubygems/package_task'
|
13
13
|
|
14
|
+
require './lib/twitter_cldr'
|
15
|
+
|
14
16
|
Bundler::GemHelper.install_tasks
|
15
17
|
|
16
18
|
task :default => :spec
|
@@ -42,9 +44,7 @@ namespace :resources do
|
|
42
44
|
namespace :update do
|
43
45
|
desc 'Import tailoring resources from CLDR data (should be executed using JRuby 1.7 in 1.9 mode)'
|
44
46
|
task :tailoring do
|
45
|
-
|
46
|
-
|
47
|
-
importer = TwitterCldr::Resources::Import::Tailoring.new(
|
47
|
+
importer = TwitterCldr::Resources::TailoringImporter.new(
|
48
48
|
ENV.fetch('CLDR_DATA_PATH', '../cldr-tailoring/'),
|
49
49
|
'./resources/collation/tailoring',
|
50
50
|
ENV.fetch('ICU4J_JAR_PATH', '../icu4j-49_1.jar')
|
@@ -52,6 +52,11 @@ namespace :resources do
|
|
52
52
|
|
53
53
|
TwitterCldr.supported_locales.each { |locale| importer.import(locale) }
|
54
54
|
end
|
55
|
+
|
56
|
+
desc 'Updates default and tailoring tries dumps'
|
57
|
+
task :tries do
|
58
|
+
TwitterCldr::Resources::TriesDumper.update_dumps
|
59
|
+
end
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
@@ -11,8 +11,6 @@ module TwitterCldr
|
|
11
11
|
#
|
12
12
|
class Collator
|
13
13
|
|
14
|
-
FRACTIONAL_UCA_SHORT_RESOURCE = 'collation/FractionalUCA_SHORT.txt'
|
15
|
-
|
16
14
|
attr_accessor :locale
|
17
15
|
|
18
16
|
def initialize(locale = nil)
|
@@ -53,7 +51,7 @@ module TwitterCldr
|
|
53
51
|
end
|
54
52
|
|
55
53
|
def load_trie
|
56
|
-
@locale ? self.class.
|
54
|
+
@locale ? self.class.tailored_trie(@locale) : self.class.default_trie
|
57
55
|
end
|
58
56
|
|
59
57
|
def get_integer_code_points(code_points)
|
@@ -138,24 +136,24 @@ module TwitterCldr
|
|
138
136
|
|
139
137
|
class << self
|
140
138
|
|
141
|
-
# Loads and memoizes the default
|
139
|
+
# Loads and memoizes the default fractional collation elements trie.
|
142
140
|
#
|
143
|
-
def
|
144
|
-
@
|
141
|
+
def default_trie
|
142
|
+
@default_trie ||= TwitterCldr::Collation::TrieLoader.load_default_trie.lock
|
145
143
|
end
|
146
144
|
|
147
|
-
def
|
148
|
-
|
145
|
+
def tailored_trie(locale)
|
146
|
+
tailored_tries_cache[locale]
|
149
147
|
end
|
150
148
|
|
151
149
|
private
|
152
150
|
|
153
|
-
def
|
154
|
-
@
|
151
|
+
def tailored_tries_cache
|
152
|
+
@tailored_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
|
155
153
|
end
|
156
154
|
|
157
155
|
def load_tailored_trie(locale)
|
158
|
-
TwitterCldr::Collation::
|
156
|
+
TwitterCldr::Collation::TrieLoader.load_tailored_trie(locale, default_trie).lock
|
159
157
|
end
|
160
158
|
|
161
159
|
end
|
@@ -10,35 +10,32 @@ module TwitterCldr
|
|
10
10
|
#
|
11
11
|
module TrieBuilder
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
COLLATION_ELEMENT_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
|
14
|
+
|
15
|
+
FRACTIONAL_UCA_SHORT_PATH = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'FractionalUCA_SHORT.txt')
|
15
16
|
|
16
17
|
class << self
|
17
18
|
|
18
|
-
def
|
19
|
-
|
19
|
+
def load_default_trie
|
20
|
+
File.open(FRACTIONAL_UCA_SHORT_PATH, 'r') { |table| parse_collation_elements_table(table) }
|
20
21
|
end
|
21
22
|
|
22
23
|
def load_tailored_trie(locale, fallback)
|
23
24
|
build_tailored_trie(tailoring_data(locale), fallback)
|
24
25
|
end
|
25
26
|
|
26
|
-
def parse_trie(table, trie = TwitterCldr::Collation::Trie.new)
|
27
|
-
table.lines.each do |line|
|
28
|
-
trie.set(parse_code_points($1), parse_collation_element($2)) if FCE_REGEXP =~ line
|
29
|
-
end
|
30
|
-
|
31
|
-
trie
|
32
|
-
end
|
33
|
-
|
34
27
|
def tailoring_data(locale)
|
35
28
|
TwitterCldr.get_resource(:collation, :tailoring, locale)
|
36
29
|
end
|
37
30
|
|
38
31
|
private
|
39
32
|
|
40
|
-
def
|
41
|
-
|
33
|
+
def parse_collation_elements_table(table, trie = TwitterCldr::Collation::Trie.new)
|
34
|
+
table.lines.each do |line|
|
35
|
+
trie.set(parse_code_points($1), parse_collation_element($2)) if COLLATION_ELEMENT_REGEXP =~ line
|
36
|
+
end
|
37
|
+
|
38
|
+
trie
|
42
39
|
end
|
43
40
|
|
44
41
|
def parse_code_points(string)
|
@@ -54,7 +51,7 @@ module TwitterCldr
|
|
54
51
|
def build_tailored_trie(tailoring_data, fallback)
|
55
52
|
trie = TwitterCldr::Collation::TrieWithFallback.new(fallback)
|
56
53
|
|
57
|
-
|
54
|
+
parse_collation_elements_table(tailoring_data[:tailored_table], trie)
|
58
55
|
copy_expansions(trie, fallback, parse_suppressed_starters(tailoring_data[:suppressed_contractions]))
|
59
56
|
|
60
57
|
trie
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Collation
|
8
|
+
|
9
|
+
# Builds a fractional collation elements Trie from the file containing a fractional collation elements table.
|
10
|
+
#
|
11
|
+
module TrieLoader
|
12
|
+
|
13
|
+
DUMPS_DIR = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'tries')
|
14
|
+
|
15
|
+
DEFAULT_TRIE_LOCALE = :default
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
def load_default_trie
|
20
|
+
load_trie
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_tailored_trie(locale, fallback)
|
24
|
+
trie = load_trie(locale)
|
25
|
+
trie.fallback = fallback
|
26
|
+
trie
|
27
|
+
end
|
28
|
+
|
29
|
+
def dump_path(locale)
|
30
|
+
File.join(DUMPS_DIR, "#{locale}.dump")
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def load_trie(locale = DEFAULT_TRIE_LOCALE)
|
36
|
+
load_dump(locale) do |dump|
|
37
|
+
Marshal.load(dump)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_dump(locale, &block)
|
42
|
+
File.open(dump_path(locale), 'r', &block)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -26,17 +26,19 @@ module TwitterCldr
|
|
26
26
|
#
|
27
27
|
# This special behavior of the #find_prefix method allows 'hiding' fallback keys that contain more than one element
|
28
28
|
# by adding their one element prefixes to the trie itself. This feature is useful for some applications, e.g., for
|
29
|
-
# suppressing contractions in a tailored
|
29
|
+
# suppressing contractions in a tailored fractional collation elements trie.
|
30
30
|
#
|
31
31
|
class TrieWithFallback < TwitterCldr::Collation::Trie
|
32
32
|
|
33
|
+
attr_accessor :fallback
|
34
|
+
|
33
35
|
def initialize(fallback)
|
34
36
|
super()
|
35
|
-
|
37
|
+
self.fallback = fallback
|
36
38
|
end
|
37
39
|
|
38
40
|
def get(key)
|
39
|
-
super ||
|
41
|
+
super || fallback.get(key)
|
40
42
|
end
|
41
43
|
|
42
44
|
def find_prefix(key)
|
@@ -45,7 +47,7 @@ module TwitterCldr
|
|
45
47
|
if prefix_size > 0
|
46
48
|
[value, prefix_size, suffixes]
|
47
49
|
else
|
48
|
-
|
50
|
+
fallback.find_prefix(key)
|
49
51
|
end
|
50
52
|
end
|
51
53
|
|
@@ -10,6 +10,7 @@ module TwitterCldr
|
|
10
10
|
autoload :SortKeyBuilder, 'twitter_cldr/collation/sort_key_builder'
|
11
11
|
autoload :Trie, 'twitter_cldr/collation/trie'
|
12
12
|
autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
|
13
|
+
autoload :TrieLoader, 'twitter_cldr/collation/trie_loader'
|
13
14
|
autoload :TrieWithFallback, 'twitter_cldr/collation/trie_with_fallback'
|
14
15
|
end
|
15
16
|
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'yaml'
|
8
|
+
require 'java'
|
9
|
+
|
10
|
+
module TwitterCldr
|
11
|
+
module Resources
|
12
|
+
# This class should be used with JRuby 1.7 in 1.9 mode and ICU4J version 49.1 (available at
|
13
|
+
# http://download.icu-project.org/files/icu4j/49.1/icu4j-49_1.jar).
|
14
|
+
#
|
15
|
+
class TailoringImporter
|
16
|
+
|
17
|
+
SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
|
18
|
+
SIMPLE_RULES = %w[p s t i]
|
19
|
+
LEVEL_RULE_REGEXP = /^(p|s|t|i)(c?)$/
|
20
|
+
|
21
|
+
IGNORED_TAGS = %w[reset text #comment]
|
22
|
+
|
23
|
+
LAST_BYTE_MASK = 0xFF
|
24
|
+
|
25
|
+
LOCALES_MAP = {
|
26
|
+
:'zh-Hant' => :'zh_Hant',
|
27
|
+
:id => :root,
|
28
|
+
:it => :root,
|
29
|
+
:ms => :root,
|
30
|
+
:nl => :root,
|
31
|
+
:pt => :root
|
32
|
+
}
|
33
|
+
|
34
|
+
EMPTY_TAILORING_DATA = { 'collator_options' => {}, 'tailored_table' => '', 'suppressed_contractions' => '' }
|
35
|
+
|
36
|
+
class ImportError < RuntimeError; end
|
37
|
+
|
38
|
+
# Arguments:
|
39
|
+
#
|
40
|
+
# input_path - path to a directory containing CLDR tailoring data (available at
|
41
|
+
# http://unicode.org/cldr/trac/browser/tags/release-21/common/collation/
|
42
|
+
# or as a part of CLDR release at http://cldr.unicode.org/index/downloads)
|
43
|
+
#
|
44
|
+
# output_path - output directory for imported YAML files
|
45
|
+
#
|
46
|
+
# icu4j_path - path to ICU4J jar file
|
47
|
+
#
|
48
|
+
def initialize(input_path, output_path, icu4j_path)
|
49
|
+
require icu4j_path
|
50
|
+
|
51
|
+
@input_path = input_path
|
52
|
+
@output_path = output_path
|
53
|
+
end
|
54
|
+
|
55
|
+
def import(locale)
|
56
|
+
print "Importing %8s\t--\t" % locale
|
57
|
+
|
58
|
+
if tailoring_present?(locale)
|
59
|
+
dump(locale, tailoring_data(locale))
|
60
|
+
puts "Done."
|
61
|
+
else
|
62
|
+
dump(locale, EMPTY_TAILORING_DATA)
|
63
|
+
puts "Missing (generated empty tailoring resource)."
|
64
|
+
end
|
65
|
+
rescue ImportError => e
|
66
|
+
puts "Error: #{e.message}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def dump(locale, data)
|
72
|
+
File.open(resource_file_path(locale), 'w') { |file| YAML.dump(data, file) }
|
73
|
+
end
|
74
|
+
|
75
|
+
def tailoring_present?(locale)
|
76
|
+
File.file?(locale_file_path(locale))
|
77
|
+
end
|
78
|
+
|
79
|
+
def translated_locale(locale)
|
80
|
+
LOCALES_MAP.fetch(locale, locale)
|
81
|
+
end
|
82
|
+
|
83
|
+
def locale_file_path(locale)
|
84
|
+
File.join(@input_path, "#{translated_locale(locale)}.xml")
|
85
|
+
end
|
86
|
+
|
87
|
+
def resource_file_path(locale)
|
88
|
+
File.join(@output_path, "#{locale}.yml")
|
89
|
+
end
|
90
|
+
|
91
|
+
def tailoring_data(locale)
|
92
|
+
doc = File.open(locale_file_path(locale)) { |file| Nokogiri::XML(file) }
|
93
|
+
collations = doc.at_xpath('//collations')
|
94
|
+
|
95
|
+
collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
|
96
|
+
aliased_locale = collation_alias && collation_alias.attr('source')
|
97
|
+
|
98
|
+
return tailoring_data(aliased_locale) if aliased_locale
|
99
|
+
|
100
|
+
standard_tailoring = collations.at_xpath('collation[@type="standard"]')
|
101
|
+
|
102
|
+
{
|
103
|
+
'collator_options' => parse_collator_options(standard_tailoring),
|
104
|
+
'tailored_table' => parse_tailorings(standard_tailoring, locale),
|
105
|
+
'suppressed_contractions' => parse_suppressed_contractions(standard_tailoring)
|
106
|
+
}
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_tailorings(data, locale)
|
110
|
+
rules = data && data.at_xpath('rules')
|
111
|
+
|
112
|
+
return '' unless rules
|
113
|
+
|
114
|
+
collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
|
115
|
+
|
116
|
+
rules.children.map do |child|
|
117
|
+
validate_tailoring_rule(child)
|
118
|
+
|
119
|
+
if child.name =~ LEVEL_RULE_REGEXP
|
120
|
+
if $2.empty?
|
121
|
+
table_entry_for_rule(collator, child.text)
|
122
|
+
else
|
123
|
+
child.text.chars.map { |char| table_entry_for_rule(collator, char) }
|
124
|
+
end
|
125
|
+
elsif child.name == 'x'
|
126
|
+
context = ''
|
127
|
+
child.children.each_with_object([]) do |c, memo|
|
128
|
+
if SIMPLE_RULES.include?(c.name)
|
129
|
+
memo << table_entry_for_rule(collator, context + c.text)
|
130
|
+
elsif c.name == 'context'
|
131
|
+
context = c.text
|
132
|
+
elsif c.name != 'extend'
|
133
|
+
raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
|
134
|
+
end
|
135
|
+
end
|
136
|
+
else
|
137
|
+
raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
|
138
|
+
end
|
139
|
+
end.flatten.compact.join("\n")
|
140
|
+
end
|
141
|
+
|
142
|
+
def table_entry_for_rule(collator, tailored_value)
|
143
|
+
code_points = get_code_points(tailored_value)
|
144
|
+
|
145
|
+
collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
|
146
|
+
ce.map { |l| l.to_s(16).upcase }.join(', ')
|
147
|
+
end
|
148
|
+
|
149
|
+
"#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
|
150
|
+
end
|
151
|
+
|
152
|
+
def parse_suppressed_contractions(data)
|
153
|
+
node = data && data.at_xpath('suppress_contractions')
|
154
|
+
node ? Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(node.text)).to_a.join : ''
|
155
|
+
end
|
156
|
+
|
157
|
+
def parse_collator_options(data)
|
158
|
+
options = {}
|
159
|
+
|
160
|
+
if data
|
161
|
+
case_first_setting = data.at_xpath('settings[@caseFirst]')
|
162
|
+
options['case_first'] = case_first_setting.attr('caseFirst').to_sym if case_first_setting
|
163
|
+
end
|
164
|
+
|
165
|
+
options
|
166
|
+
end
|
167
|
+
|
168
|
+
def validate_tailoring_rule(rule)
|
169
|
+
return if IGNORED_TAGS.include?(rule.name)
|
170
|
+
|
171
|
+
raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
|
172
|
+
end
|
173
|
+
|
174
|
+
def get_collation_elements(collator, string)
|
175
|
+
iter = collator.get_collation_element_iterator(string)
|
176
|
+
|
177
|
+
collation_elements = []
|
178
|
+
ce = iter.next
|
179
|
+
|
180
|
+
while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
|
181
|
+
p1 = (ce >> 24) & LAST_BYTE_MASK
|
182
|
+
p2 = (ce >> 16) & LAST_BYTE_MASK
|
183
|
+
|
184
|
+
primary = p2.zero? ? p1 : (p1 << 8) + p2
|
185
|
+
secondary = (ce >> 8) & LAST_BYTE_MASK
|
186
|
+
tertiarly = ce & LAST_BYTE_MASK
|
187
|
+
|
188
|
+
collation_elements << [primary, secondary, tertiarly]
|
189
|
+
|
190
|
+
ce = iter.next
|
191
|
+
end
|
192
|
+
|
193
|
+
collation_elements
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_code_points(string)
|
197
|
+
TwitterCldr::Normalization::NFD.normalize_code_points(TwitterCldr::Utils::CodePoints.from_string(string))
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Resources
|
8
|
+
|
9
|
+
module TriesDumper
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
def update_dumps
|
14
|
+
update_default_trie_dump
|
15
|
+
TwitterCldr.supported_locales.each { |locale| update_tailoring_trie_dump(locale) }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def update_default_trie_dump
|
21
|
+
save_trie_dump(TwitterCldr::Collation::TrieLoader::DEFAULT_TRIE_LOCALE, default_trie)
|
22
|
+
end
|
23
|
+
|
24
|
+
def update_tailoring_trie_dump(locale)
|
25
|
+
save_trie_dump(locale, TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, @default_trie))
|
26
|
+
end
|
27
|
+
|
28
|
+
def save_trie_dump(locale, trie)
|
29
|
+
path = TwitterCldr::Collation::TrieLoader.dump_path(locale)
|
30
|
+
FileUtils.mkdir_p(File.dirname(path))
|
31
|
+
|
32
|
+
File.open(path, 'w') { |f| f.write(Marshal.dump(trie)) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def default_trie
|
36
|
+
@default_trie ||= TwitterCldr::Collation::TrieBuilder.load_default_trie
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -5,7 +5,8 @@
|
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Resources
|
8
|
-
autoload :Loader,
|
9
|
-
autoload :
|
8
|
+
autoload :Loader, 'twitter_cldr/resources/loader'
|
9
|
+
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
|
10
|
+
autoload :TriesDumper, 'twitter_cldr/resources/tries_dumper'
|
10
11
|
end
|
11
12
|
end
|
data/lib/twitter_cldr/version.rb
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -31,7 +31,7 @@ describe 'Unicode Collation Algorithm' do
|
|
31
31
|
|
32
32
|
previous_sort_key = previous_code_points = previous_hex_code_points = nil
|
33
33
|
|
34
|
-
open(file_path, 'r:utf-8') do |file|
|
34
|
+
File.open(file_path, 'r:utf-8') do |file|
|
35
35
|
file.each do |line|
|
36
36
|
next unless /^([0-9A-F ]+);/ =~ line
|
37
37
|
|
@@ -79,7 +79,9 @@ END
|
|
79
79
|
|
80
80
|
print 'extracting CollationTest_CLDR_NON_IGNORABLE.txt ... '
|
81
81
|
Zip::ZipFile.open(zip_file.path) do |zip|
|
82
|
-
open(FULL_COLLATION_TEST_PATH, 'w')
|
82
|
+
File.open(FULL_COLLATION_TEST_PATH, 'w') do |file|
|
83
|
+
file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt'))
|
84
|
+
end
|
83
85
|
end
|
84
86
|
zip_file.unlink
|
85
87
|
|