twitter_cldr 1.6.1 → 1.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Rakefile +8 -3
- data/lib/twitter_cldr/collation/collator.rb +9 -11
- data/lib/twitter_cldr/collation/trie.rb +8 -0
- data/lib/twitter_cldr/collation/trie_builder.rb +12 -15
- data/lib/twitter_cldr/collation/trie_loader.rb +50 -0
- data/lib/twitter_cldr/collation/trie_with_fallback.rb +6 -4
- data/lib/twitter_cldr/collation.rb +1 -0
- data/lib/twitter_cldr/resources/tailoring_importer.rb +203 -0
- data/lib/twitter_cldr/resources/tries_dumper.rb +43 -0
- data/lib/twitter_cldr/resources.rb +3 -2
- data/lib/twitter_cldr/version.rb +1 -1
- data/resources/collation/tries/af.dump +0 -0
- data/resources/collation/tries/ar.dump +0 -0
- data/resources/collation/tries/ca.dump +0 -0
- data/resources/collation/tries/cs.dump +0 -0
- data/resources/collation/tries/da.dump +0 -0
- data/resources/collation/tries/de.dump +0 -0
- data/resources/collation/tries/default.dump +0 -0
- data/resources/collation/tries/el.dump +0 -0
- data/resources/collation/tries/en.dump +0 -0
- data/resources/collation/tries/es.dump +0 -0
- data/resources/collation/tries/eu.dump +0 -0
- data/resources/collation/tries/fa.dump +0 -0
- data/resources/collation/tries/fi.dump +0 -0
- data/resources/collation/tries/fil.dump +0 -0
- data/resources/collation/tries/fr.dump +0 -0
- data/resources/collation/tries/he.dump +0 -0
- data/resources/collation/tries/hi.dump +0 -0
- data/resources/collation/tries/hu.dump +0 -0
- data/resources/collation/tries/id.dump +0 -0
- data/resources/collation/tries/it.dump +0 -0
- data/resources/collation/tries/ja.dump +0 -0
- data/resources/collation/tries/ko.dump +0 -0
- data/resources/collation/tries/ms.dump +0 -0
- data/resources/collation/tries/nb.dump +0 -0
- data/resources/collation/tries/nl.dump +0 -0
- data/resources/collation/tries/pl.dump +0 -0
- data/resources/collation/tries/pt.dump +0 -0
- data/resources/collation/tries/ru.dump +0 -0
- data/resources/collation/tries/sv.dump +0 -0
- data/resources/collation/tries/th.dump +0 -0
- data/resources/collation/tries/tr.dump +0 -0
- data/resources/collation/tries/uk.dump +0 -0
- data/resources/collation/tries/ur.dump +0 -0
- data/resources/collation/tries/zh-Hant.dump +0 -0
- data/resources/collation/tries/zh.dump +0 -0
- data/spec/collation/collation_spec.rb +4 -2
- data/spec/collation/collator_spec.rb +36 -30
- data/spec/collation/tailoring_spec.rb +3 -1
- data/spec/collation/tailoring_tests/ja.txt +6 -5
- data/spec/collation/trie_builder_spec.rb +21 -26
- data/spec/collation/trie_dumps_spec.rb +26 -0
- data/spec/collation/trie_loader_spec.rb +72 -0
- data/spec/collation/trie_spec.rb +14 -0
- data/spec/collation/trie_with_fallback_spec.rb +6 -0
- data/spec/normalization/normalization_spec.rb +2 -2
- metadata +43 -21
- data/lib/twitter_cldr/resources/import/tailoring.rb +0 -202
- data/lib/twitter_cldr/resources/import.rb +0 -12
data/History.txt
CHANGED
data/Rakefile
CHANGED
@@ -11,6 +11,8 @@ require 'digest'
|
|
11
11
|
require 'rspec/core/rake_task'
|
12
12
|
require 'rubygems/package_task'
|
13
13
|
|
14
|
+
require './lib/twitter_cldr'
|
15
|
+
|
14
16
|
Bundler::GemHelper.install_tasks
|
15
17
|
|
16
18
|
task :default => :spec
|
@@ -42,9 +44,7 @@ namespace :resources do
|
|
42
44
|
namespace :update do
|
43
45
|
desc 'Import tailoring resources from CLDR data (should be executed using JRuby 1.7 in 1.9 mode)'
|
44
46
|
task :tailoring do
|
45
|
-
|
46
|
-
|
47
|
-
importer = TwitterCldr::Resources::Import::Tailoring.new(
|
47
|
+
importer = TwitterCldr::Resources::TailoringImporter.new(
|
48
48
|
ENV.fetch('CLDR_DATA_PATH', '../cldr-tailoring/'),
|
49
49
|
'./resources/collation/tailoring',
|
50
50
|
ENV.fetch('ICU4J_JAR_PATH', '../icu4j-49_1.jar')
|
@@ -52,6 +52,11 @@ namespace :resources do
|
|
52
52
|
|
53
53
|
TwitterCldr.supported_locales.each { |locale| importer.import(locale) }
|
54
54
|
end
|
55
|
+
|
56
|
+
desc 'Updates default and tailoring tries dumps'
|
57
|
+
task :tries do
|
58
|
+
TwitterCldr::Resources::TriesDumper.update_dumps
|
59
|
+
end
|
55
60
|
end
|
56
61
|
end
|
57
62
|
|
@@ -11,8 +11,6 @@ module TwitterCldr
|
|
11
11
|
#
|
12
12
|
class Collator
|
13
13
|
|
14
|
-
FRACTIONAL_UCA_SHORT_RESOURCE = 'collation/FractionalUCA_SHORT.txt'
|
15
|
-
|
16
14
|
attr_accessor :locale
|
17
15
|
|
18
16
|
def initialize(locale = nil)
|
@@ -53,7 +51,7 @@ module TwitterCldr
|
|
53
51
|
end
|
54
52
|
|
55
53
|
def load_trie
|
56
|
-
@locale ? self.class.
|
54
|
+
@locale ? self.class.tailored_trie(@locale) : self.class.default_trie
|
57
55
|
end
|
58
56
|
|
59
57
|
def get_integer_code_points(code_points)
|
@@ -138,24 +136,24 @@ module TwitterCldr
|
|
138
136
|
|
139
137
|
class << self
|
140
138
|
|
141
|
-
# Loads and memoizes the default
|
139
|
+
# Loads and memoizes the default fractional collation elements trie.
|
142
140
|
#
|
143
|
-
def
|
144
|
-
@
|
141
|
+
def default_trie
|
142
|
+
@default_trie ||= TwitterCldr::Collation::TrieLoader.load_default_trie.lock
|
145
143
|
end
|
146
144
|
|
147
|
-
def
|
148
|
-
|
145
|
+
def tailored_trie(locale)
|
146
|
+
tailored_tries_cache[locale]
|
149
147
|
end
|
150
148
|
|
151
149
|
private
|
152
150
|
|
153
|
-
def
|
154
|
-
@
|
151
|
+
def tailored_tries_cache
|
152
|
+
@tailored_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
|
155
153
|
end
|
156
154
|
|
157
155
|
def load_tailored_trie(locale)
|
158
|
-
TwitterCldr::Collation::
|
156
|
+
TwitterCldr::Collation::TrieLoader.load_tailored_trie(locale, default_trie).lock
|
159
157
|
end
|
160
158
|
|
161
159
|
end
|
@@ -10,35 +10,32 @@ module TwitterCldr
|
|
10
10
|
#
|
11
11
|
module TrieBuilder
|
12
12
|
|
13
|
-
|
14
|
-
|
13
|
+
COLLATION_ELEMENT_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
|
14
|
+
|
15
|
+
FRACTIONAL_UCA_SHORT_PATH = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'FractionalUCA_SHORT.txt')
|
15
16
|
|
16
17
|
class << self
|
17
18
|
|
18
|
-
def
|
19
|
-
|
19
|
+
def load_default_trie
|
20
|
+
File.open(FRACTIONAL_UCA_SHORT_PATH, 'r') { |table| parse_collation_elements_table(table) }
|
20
21
|
end
|
21
22
|
|
22
23
|
def load_tailored_trie(locale, fallback)
|
23
24
|
build_tailored_trie(tailoring_data(locale), fallback)
|
24
25
|
end
|
25
26
|
|
26
|
-
def parse_trie(table, trie = TwitterCldr::Collation::Trie.new)
|
27
|
-
table.lines.each do |line|
|
28
|
-
trie.set(parse_code_points($1), parse_collation_element($2)) if FCE_REGEXP =~ line
|
29
|
-
end
|
30
|
-
|
31
|
-
trie
|
32
|
-
end
|
33
|
-
|
34
27
|
def tailoring_data(locale)
|
35
28
|
TwitterCldr.get_resource(:collation, :tailoring, locale)
|
36
29
|
end
|
37
30
|
|
38
31
|
private
|
39
32
|
|
40
|
-
def
|
41
|
-
|
33
|
+
def parse_collation_elements_table(table, trie = TwitterCldr::Collation::Trie.new)
|
34
|
+
table.lines.each do |line|
|
35
|
+
trie.set(parse_code_points($1), parse_collation_element($2)) if COLLATION_ELEMENT_REGEXP =~ line
|
36
|
+
end
|
37
|
+
|
38
|
+
trie
|
42
39
|
end
|
43
40
|
|
44
41
|
def parse_code_points(string)
|
@@ -54,7 +51,7 @@ module TwitterCldr
|
|
54
51
|
def build_tailored_trie(tailoring_data, fallback)
|
55
52
|
trie = TwitterCldr::Collation::TrieWithFallback.new(fallback)
|
56
53
|
|
57
|
-
|
54
|
+
parse_collation_elements_table(tailoring_data[:tailored_table], trie)
|
58
55
|
copy_expansions(trie, fallback, parse_suppressed_starters(tailoring_data[:suppressed_contractions]))
|
59
56
|
|
60
57
|
trie
|
@@ -0,0 +1,50 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Collation
|
8
|
+
|
9
|
+
# Builds a fractional collation elements Trie from the file containing a fractional collation elements table.
|
10
|
+
#
|
11
|
+
module TrieLoader
|
12
|
+
|
13
|
+
DUMPS_DIR = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'tries')
|
14
|
+
|
15
|
+
DEFAULT_TRIE_LOCALE = :default
|
16
|
+
|
17
|
+
class << self
|
18
|
+
|
19
|
+
def load_default_trie
|
20
|
+
load_trie
|
21
|
+
end
|
22
|
+
|
23
|
+
def load_tailored_trie(locale, fallback)
|
24
|
+
trie = load_trie(locale)
|
25
|
+
trie.fallback = fallback
|
26
|
+
trie
|
27
|
+
end
|
28
|
+
|
29
|
+
def dump_path(locale)
|
30
|
+
File.join(DUMPS_DIR, "#{locale}.dump")
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def load_trie(locale = DEFAULT_TRIE_LOCALE)
|
36
|
+
load_dump(locale) do |dump|
|
37
|
+
Marshal.load(dump)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_dump(locale, &block)
|
42
|
+
File.open(dump_path(locale), 'r', &block)
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -26,17 +26,19 @@ module TwitterCldr
|
|
26
26
|
#
|
27
27
|
# This special behavior of the #find_prefix method allows 'hiding' fallback keys that contain more than one element
|
28
28
|
# by adding their one element prefixes to the trie itself. This feature is useful for some applications, e.g., for
|
29
|
-
# suppressing contractions in a tailored
|
29
|
+
# suppressing contractions in a tailored fractional collation elements trie.
|
30
30
|
#
|
31
31
|
class TrieWithFallback < TwitterCldr::Collation::Trie
|
32
32
|
|
33
|
+
attr_accessor :fallback
|
34
|
+
|
33
35
|
def initialize(fallback)
|
34
36
|
super()
|
35
|
-
|
37
|
+
self.fallback = fallback
|
36
38
|
end
|
37
39
|
|
38
40
|
def get(key)
|
39
|
-
super ||
|
41
|
+
super || fallback.get(key)
|
40
42
|
end
|
41
43
|
|
42
44
|
def find_prefix(key)
|
@@ -45,7 +47,7 @@ module TwitterCldr
|
|
45
47
|
if prefix_size > 0
|
46
48
|
[value, prefix_size, suffixes]
|
47
49
|
else
|
48
|
-
|
50
|
+
fallback.find_prefix(key)
|
49
51
|
end
|
50
52
|
end
|
51
53
|
|
@@ -10,6 +10,7 @@ module TwitterCldr
|
|
10
10
|
autoload :SortKeyBuilder, 'twitter_cldr/collation/sort_key_builder'
|
11
11
|
autoload :Trie, 'twitter_cldr/collation/trie'
|
12
12
|
autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
|
13
|
+
autoload :TrieLoader, 'twitter_cldr/collation/trie_loader'
|
13
14
|
autoload :TrieWithFallback, 'twitter_cldr/collation/trie_with_fallback'
|
14
15
|
end
|
15
16
|
end
|
@@ -0,0 +1,203 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'yaml'
|
8
|
+
require 'java'
|
9
|
+
|
10
|
+
module TwitterCldr
|
11
|
+
module Resources
|
12
|
+
# This class should be used with JRuby 1.7 in 1.9 mode and ICU4J version 49.1 (available at
|
13
|
+
# http://download.icu-project.org/files/icu4j/49.1/icu4j-49_1.jar).
|
14
|
+
#
|
15
|
+
class TailoringImporter
|
16
|
+
|
17
|
+
SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
|
18
|
+
SIMPLE_RULES = %w[p s t i]
|
19
|
+
LEVEL_RULE_REGEXP = /^(p|s|t|i)(c?)$/
|
20
|
+
|
21
|
+
IGNORED_TAGS = %w[reset text #comment]
|
22
|
+
|
23
|
+
LAST_BYTE_MASK = 0xFF
|
24
|
+
|
25
|
+
LOCALES_MAP = {
|
26
|
+
:'zh-Hant' => :'zh_Hant',
|
27
|
+
:id => :root,
|
28
|
+
:it => :root,
|
29
|
+
:ms => :root,
|
30
|
+
:nl => :root,
|
31
|
+
:pt => :root
|
32
|
+
}
|
33
|
+
|
34
|
+
EMPTY_TAILORING_DATA = { 'collator_options' => {}, 'tailored_table' => '', 'suppressed_contractions' => '' }
|
35
|
+
|
36
|
+
class ImportError < RuntimeError; end
|
37
|
+
|
38
|
+
# Arguments:
|
39
|
+
#
|
40
|
+
# input_path - path to a directory containing CLDR tailoring data (available at
|
41
|
+
# http://unicode.org/cldr/trac/browser/tags/release-21/common/collation/
|
42
|
+
# or as a part of CLDR release at http://cldr.unicode.org/index/downloads)
|
43
|
+
#
|
44
|
+
# output_path - output directory for imported YAML files
|
45
|
+
#
|
46
|
+
# icu4j_path - path to ICU4J jar file
|
47
|
+
#
|
48
|
+
def initialize(input_path, output_path, icu4j_path)
|
49
|
+
require icu4j_path
|
50
|
+
|
51
|
+
@input_path = input_path
|
52
|
+
@output_path = output_path
|
53
|
+
end
|
54
|
+
|
55
|
+
def import(locale)
|
56
|
+
print "Importing %8s\t--\t" % locale
|
57
|
+
|
58
|
+
if tailoring_present?(locale)
|
59
|
+
dump(locale, tailoring_data(locale))
|
60
|
+
puts "Done."
|
61
|
+
else
|
62
|
+
dump(locale, EMPTY_TAILORING_DATA)
|
63
|
+
puts "Missing (generated empty tailoring resource)."
|
64
|
+
end
|
65
|
+
rescue ImportError => e
|
66
|
+
puts "Error: #{e.message}"
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def dump(locale, data)
|
72
|
+
File.open(resource_file_path(locale), 'w') { |file| YAML.dump(data, file) }
|
73
|
+
end
|
74
|
+
|
75
|
+
def tailoring_present?(locale)
|
76
|
+
File.file?(locale_file_path(locale))
|
77
|
+
end
|
78
|
+
|
79
|
+
def translated_locale(locale)
|
80
|
+
LOCALES_MAP.fetch(locale, locale)
|
81
|
+
end
|
82
|
+
|
83
|
+
def locale_file_path(locale)
|
84
|
+
File.join(@input_path, "#{translated_locale(locale)}.xml")
|
85
|
+
end
|
86
|
+
|
87
|
+
def resource_file_path(locale)
|
88
|
+
File.join(@output_path, "#{locale}.yml")
|
89
|
+
end
|
90
|
+
|
91
|
+
def tailoring_data(locale)
|
92
|
+
doc = File.open(locale_file_path(locale)) { |file| Nokogiri::XML(file) }
|
93
|
+
collations = doc.at_xpath('//collations')
|
94
|
+
|
95
|
+
collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
|
96
|
+
aliased_locale = collation_alias && collation_alias.attr('source')
|
97
|
+
|
98
|
+
return tailoring_data(aliased_locale) if aliased_locale
|
99
|
+
|
100
|
+
standard_tailoring = collations.at_xpath('collation[@type="standard"]')
|
101
|
+
|
102
|
+
{
|
103
|
+
'collator_options' => parse_collator_options(standard_tailoring),
|
104
|
+
'tailored_table' => parse_tailorings(standard_tailoring, locale),
|
105
|
+
'suppressed_contractions' => parse_suppressed_contractions(standard_tailoring)
|
106
|
+
}
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_tailorings(data, locale)
|
110
|
+
rules = data && data.at_xpath('rules')
|
111
|
+
|
112
|
+
return '' unless rules
|
113
|
+
|
114
|
+
collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
|
115
|
+
|
116
|
+
rules.children.map do |child|
|
117
|
+
validate_tailoring_rule(child)
|
118
|
+
|
119
|
+
if child.name =~ LEVEL_RULE_REGEXP
|
120
|
+
if $2.empty?
|
121
|
+
table_entry_for_rule(collator, child.text)
|
122
|
+
else
|
123
|
+
child.text.chars.map { |char| table_entry_for_rule(collator, char) }
|
124
|
+
end
|
125
|
+
elsif child.name == 'x'
|
126
|
+
context = ''
|
127
|
+
child.children.each_with_object([]) do |c, memo|
|
128
|
+
if SIMPLE_RULES.include?(c.name)
|
129
|
+
memo << table_entry_for_rule(collator, context + c.text)
|
130
|
+
elsif c.name == 'context'
|
131
|
+
context = c.text
|
132
|
+
elsif c.name != 'extend'
|
133
|
+
raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
|
134
|
+
end
|
135
|
+
end
|
136
|
+
else
|
137
|
+
raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
|
138
|
+
end
|
139
|
+
end.flatten.compact.join("\n")
|
140
|
+
end
|
141
|
+
|
142
|
+
def table_entry_for_rule(collator, tailored_value)
|
143
|
+
code_points = get_code_points(tailored_value)
|
144
|
+
|
145
|
+
collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
|
146
|
+
ce.map { |l| l.to_s(16).upcase }.join(', ')
|
147
|
+
end
|
148
|
+
|
149
|
+
"#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
|
150
|
+
end
|
151
|
+
|
152
|
+
def parse_suppressed_contractions(data)
|
153
|
+
node = data && data.at_xpath('suppress_contractions')
|
154
|
+
node ? Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(node.text)).to_a.join : ''
|
155
|
+
end
|
156
|
+
|
157
|
+
def parse_collator_options(data)
|
158
|
+
options = {}
|
159
|
+
|
160
|
+
if data
|
161
|
+
case_first_setting = data.at_xpath('settings[@caseFirst]')
|
162
|
+
options['case_first'] = case_first_setting.attr('caseFirst').to_sym if case_first_setting
|
163
|
+
end
|
164
|
+
|
165
|
+
options
|
166
|
+
end
|
167
|
+
|
168
|
+
def validate_tailoring_rule(rule)
|
169
|
+
return if IGNORED_TAGS.include?(rule.name)
|
170
|
+
|
171
|
+
raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
|
172
|
+
end
|
173
|
+
|
174
|
+
def get_collation_elements(collator, string)
|
175
|
+
iter = collator.get_collation_element_iterator(string)
|
176
|
+
|
177
|
+
collation_elements = []
|
178
|
+
ce = iter.next
|
179
|
+
|
180
|
+
while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
|
181
|
+
p1 = (ce >> 24) & LAST_BYTE_MASK
|
182
|
+
p2 = (ce >> 16) & LAST_BYTE_MASK
|
183
|
+
|
184
|
+
primary = p2.zero? ? p1 : (p1 << 8) + p2
|
185
|
+
secondary = (ce >> 8) & LAST_BYTE_MASK
|
186
|
+
tertiarly = ce & LAST_BYTE_MASK
|
187
|
+
|
188
|
+
collation_elements << [primary, secondary, tertiarly]
|
189
|
+
|
190
|
+
ce = iter.next
|
191
|
+
end
|
192
|
+
|
193
|
+
collation_elements
|
194
|
+
end
|
195
|
+
|
196
|
+
def get_code_points(string)
|
197
|
+
TwitterCldr::Normalization::NFD.normalize_code_points(TwitterCldr::Utils::CodePoints.from_string(string))
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
203
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# Copyright 2012 Twitter, Inc
|
4
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
5
|
+
|
6
|
+
module TwitterCldr
|
7
|
+
module Resources
|
8
|
+
|
9
|
+
module TriesDumper
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
def update_dumps
|
14
|
+
update_default_trie_dump
|
15
|
+
TwitterCldr.supported_locales.each { |locale| update_tailoring_trie_dump(locale) }
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def update_default_trie_dump
|
21
|
+
save_trie_dump(TwitterCldr::Collation::TrieLoader::DEFAULT_TRIE_LOCALE, default_trie)
|
22
|
+
end
|
23
|
+
|
24
|
+
def update_tailoring_trie_dump(locale)
|
25
|
+
save_trie_dump(locale, TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, @default_trie))
|
26
|
+
end
|
27
|
+
|
28
|
+
def save_trie_dump(locale, trie)
|
29
|
+
path = TwitterCldr::Collation::TrieLoader.dump_path(locale)
|
30
|
+
FileUtils.mkdir_p(File.dirname(path))
|
31
|
+
|
32
|
+
File.open(path, 'w') { |f| f.write(Marshal.dump(trie)) }
|
33
|
+
end
|
34
|
+
|
35
|
+
def default_trie
|
36
|
+
@default_trie ||= TwitterCldr::Collation::TrieBuilder.load_default_trie
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -5,7 +5,8 @@
|
|
5
5
|
|
6
6
|
module TwitterCldr
|
7
7
|
module Resources
|
8
|
-
autoload :Loader,
|
9
|
-
autoload :
|
8
|
+
autoload :Loader, 'twitter_cldr/resources/loader'
|
9
|
+
autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
|
10
|
+
autoload :TriesDumper, 'twitter_cldr/resources/tries_dumper'
|
10
11
|
end
|
11
12
|
end
|
data/lib/twitter_cldr/version.rb
CHANGED
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -31,7 +31,7 @@ describe 'Unicode Collation Algorithm' do
|
|
31
31
|
|
32
32
|
previous_sort_key = previous_code_points = previous_hex_code_points = nil
|
33
33
|
|
34
|
-
open(file_path, 'r:utf-8') do |file|
|
34
|
+
File.open(file_path, 'r:utf-8') do |file|
|
35
35
|
file.each do |line|
|
36
36
|
next unless /^([0-9A-F ]+);/ =~ line
|
37
37
|
|
@@ -79,7 +79,9 @@ END
|
|
79
79
|
|
80
80
|
print 'extracting CollationTest_CLDR_NON_IGNORABLE.txt ... '
|
81
81
|
Zip::ZipFile.open(zip_file.path) do |zip|
|
82
|
-
open(FULL_COLLATION_TEST_PATH, 'w')
|
82
|
+
File.open(FULL_COLLATION_TEST_PATH, 'w') do |file|
|
83
|
+
file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt'))
|
84
|
+
end
|
83
85
|
end
|
84
86
|
zip_file.unlink
|
85
87
|
|