twitter_cldr 1.6.1 → 1.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. data/History.txt +4 -0
  2. data/Rakefile +8 -3
  3. data/lib/twitter_cldr/collation/collator.rb +9 -11
  4. data/lib/twitter_cldr/collation/trie.rb +8 -0
  5. data/lib/twitter_cldr/collation/trie_builder.rb +12 -15
  6. data/lib/twitter_cldr/collation/trie_loader.rb +50 -0
  7. data/lib/twitter_cldr/collation/trie_with_fallback.rb +6 -4
  8. data/lib/twitter_cldr/collation.rb +1 -0
  9. data/lib/twitter_cldr/resources/tailoring_importer.rb +203 -0
  10. data/lib/twitter_cldr/resources/tries_dumper.rb +43 -0
  11. data/lib/twitter_cldr/resources.rb +3 -2
  12. data/lib/twitter_cldr/version.rb +1 -1
  13. data/resources/collation/tries/af.dump +0 -0
  14. data/resources/collation/tries/ar.dump +0 -0
  15. data/resources/collation/tries/ca.dump +0 -0
  16. data/resources/collation/tries/cs.dump +0 -0
  17. data/resources/collation/tries/da.dump +0 -0
  18. data/resources/collation/tries/de.dump +0 -0
  19. data/resources/collation/tries/default.dump +0 -0
  20. data/resources/collation/tries/el.dump +0 -0
  21. data/resources/collation/tries/en.dump +0 -0
  22. data/resources/collation/tries/es.dump +0 -0
  23. data/resources/collation/tries/eu.dump +0 -0
  24. data/resources/collation/tries/fa.dump +0 -0
  25. data/resources/collation/tries/fi.dump +0 -0
  26. data/resources/collation/tries/fil.dump +0 -0
  27. data/resources/collation/tries/fr.dump +0 -0
  28. data/resources/collation/tries/he.dump +0 -0
  29. data/resources/collation/tries/hi.dump +0 -0
  30. data/resources/collation/tries/hu.dump +0 -0
  31. data/resources/collation/tries/id.dump +0 -0
  32. data/resources/collation/tries/it.dump +0 -0
  33. data/resources/collation/tries/ja.dump +0 -0
  34. data/resources/collation/tries/ko.dump +0 -0
  35. data/resources/collation/tries/ms.dump +0 -0
  36. data/resources/collation/tries/nb.dump +0 -0
  37. data/resources/collation/tries/nl.dump +0 -0
  38. data/resources/collation/tries/pl.dump +0 -0
  39. data/resources/collation/tries/pt.dump +0 -0
  40. data/resources/collation/tries/ru.dump +0 -0
  41. data/resources/collation/tries/sv.dump +0 -0
  42. data/resources/collation/tries/th.dump +0 -0
  43. data/resources/collation/tries/tr.dump +0 -0
  44. data/resources/collation/tries/uk.dump +0 -0
  45. data/resources/collation/tries/ur.dump +0 -0
  46. data/resources/collation/tries/zh-Hant.dump +0 -0
  47. data/resources/collation/tries/zh.dump +0 -0
  48. data/spec/collation/collation_spec.rb +4 -2
  49. data/spec/collation/collator_spec.rb +36 -30
  50. data/spec/collation/tailoring_spec.rb +3 -1
  51. data/spec/collation/tailoring_tests/ja.txt +6 -5
  52. data/spec/collation/trie_builder_spec.rb +21 -26
  53. data/spec/collation/trie_dumps_spec.rb +26 -0
  54. data/spec/collation/trie_loader_spec.rb +72 -0
  55. data/spec/collation/trie_spec.rb +14 -0
  56. data/spec/collation/trie_with_fallback_spec.rb +6 -0
  57. data/spec/normalization/normalization_spec.rb +2 -2
  58. metadata +43 -21
  59. data/lib/twitter_cldr/resources/import/tailoring.rb +0 -202
  60. data/lib/twitter_cldr/resources/import.rb +0 -12
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 1.6.2
2
+
3
+ * Collation tries now loaded from marshal dumps, collation running time improved by ~80%.
4
+
1
5
  === 1.6.1
2
6
 
3
7
  * Added case-first collation element tailoring support for languages like Danish.
data/Rakefile CHANGED
@@ -11,6 +11,8 @@ require 'digest'
11
11
  require 'rspec/core/rake_task'
12
12
  require 'rubygems/package_task'
13
13
 
14
+ require './lib/twitter_cldr'
15
+
14
16
  Bundler::GemHelper.install_tasks
15
17
 
16
18
  task :default => :spec
@@ -42,9 +44,7 @@ namespace :resources do
42
44
  namespace :update do
43
45
  desc 'Import tailoring resources from CLDR data (should be executed using JRuby 1.7 in 1.9 mode)'
44
46
  task :tailoring do
45
- require './lib/twitter_cldr'
46
-
47
- importer = TwitterCldr::Resources::Import::Tailoring.new(
47
+ importer = TwitterCldr::Resources::TailoringImporter.new(
48
48
  ENV.fetch('CLDR_DATA_PATH', '../cldr-tailoring/'),
49
49
  './resources/collation/tailoring',
50
50
  ENV.fetch('ICU4J_JAR_PATH', '../icu4j-49_1.jar')
@@ -52,6 +52,11 @@ namespace :resources do
52
52
 
53
53
  TwitterCldr.supported_locales.each { |locale| importer.import(locale) }
54
54
  end
55
+
56
+ desc 'Updates default and tailoring tries dumps'
57
+ task :tries do
58
+ TwitterCldr::Resources::TriesDumper.update_dumps
59
+ end
55
60
  end
56
61
  end
57
62
 
@@ -11,8 +11,6 @@ module TwitterCldr
11
11
  #
12
12
  class Collator
13
13
 
14
- FRACTIONAL_UCA_SHORT_RESOURCE = 'collation/FractionalUCA_SHORT.txt'
15
-
16
14
  attr_accessor :locale
17
15
 
18
16
  def initialize(locale = nil)
@@ -53,7 +51,7 @@ module TwitterCldr
53
51
  end
54
52
 
55
53
  def load_trie
56
- @locale ? self.class.tailored_fce_trie(@locale) : self.class.default_fce_trie
54
+ @locale ? self.class.tailored_trie(@locale) : self.class.default_trie
57
55
  end
58
56
 
59
57
  def get_integer_code_points(code_points)
@@ -138,24 +136,24 @@ module TwitterCldr
138
136
 
139
137
  class << self
140
138
 
141
- # Loads and memoizes the default Fractional Collation Elements trie.
139
+ # Loads and memoizes the default fractional collation elements trie.
142
140
  #
143
- def default_fce_trie
144
- @default_fce_trie ||= TwitterCldr::Collation::TrieBuilder.load_trie(FRACTIONAL_UCA_SHORT_RESOURCE).lock
141
+ def default_trie
142
+ @default_trie ||= TwitterCldr::Collation::TrieLoader.load_default_trie.lock
145
143
  end
146
144
 
147
- def tailored_fce_trie(locale)
148
- tailored_fce_tries_cache[locale]
145
+ def tailored_trie(locale)
146
+ tailored_tries_cache[locale]
149
147
  end
150
148
 
151
149
  private
152
150
 
153
- def tailored_fce_tries_cache
154
- @tailored_fce_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
151
+ def tailored_tries_cache
152
+ @tailored_tries_cache ||= Hash.new { |hash, locale| hash[locale] = load_tailored_trie(locale) }
155
153
  end
156
154
 
157
155
  def load_tailored_trie(locale)
158
- TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, default_fce_trie).lock
156
+ TwitterCldr::Collation::TrieLoader.load_tailored_trie(locale, default_trie).lock
159
157
  end
160
158
 
161
159
  end
@@ -91,6 +91,14 @@ module TwitterCldr
91
91
  [last_with_value.value, last_prefix_size, last_with_value.to_trie]
92
92
  end
93
93
 
94
+ def marshal_dump
95
+ @root
96
+ end
97
+
98
+ def marshal_load(root)
99
+ @root = root
100
+ end
101
+
94
102
  def to_hash
95
103
  @root.subtrie_hash
96
104
  end
@@ -10,35 +10,32 @@ module TwitterCldr
10
10
  #
11
11
  module TrieBuilder
12
12
 
13
- # Fractional collation element regexp
14
- FCE_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
13
+ COLLATION_ELEMENT_REGEXP = /^((?:[0-9A-F]+)(?:\s[0-9A-F]+)*);\s((?:\[.*?\])(?:\[.*?\])*)/
14
+
15
+ FRACTIONAL_UCA_SHORT_PATH = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'FractionalUCA_SHORT.txt')
15
16
 
16
17
  class << self
17
18
 
18
- def load_trie(resource)
19
- parse_trie(load_resource(resource))
19
+ def load_default_trie
20
+ File.open(FRACTIONAL_UCA_SHORT_PATH, 'r') { |table| parse_collation_elements_table(table) }
20
21
  end
21
22
 
22
23
  def load_tailored_trie(locale, fallback)
23
24
  build_tailored_trie(tailoring_data(locale), fallback)
24
25
  end
25
26
 
26
- def parse_trie(table, trie = TwitterCldr::Collation::Trie.new)
27
- table.lines.each do |line|
28
- trie.set(parse_code_points($1), parse_collation_element($2)) if FCE_REGEXP =~ line
29
- end
30
-
31
- trie
32
- end
33
-
34
27
  def tailoring_data(locale)
35
28
  TwitterCldr.get_resource(:collation, :tailoring, locale)
36
29
  end
37
30
 
38
31
  private
39
32
 
40
- def load_resource(resource)
41
- open(File.join(TwitterCldr::RESOURCES_DIR, resource), 'r')
33
+ def parse_collation_elements_table(table, trie = TwitterCldr::Collation::Trie.new)
34
+ table.lines.each do |line|
35
+ trie.set(parse_code_points($1), parse_collation_element($2)) if COLLATION_ELEMENT_REGEXP =~ line
36
+ end
37
+
38
+ trie
42
39
  end
43
40
 
44
41
  def parse_code_points(string)
@@ -54,7 +51,7 @@ module TwitterCldr
54
51
  def build_tailored_trie(tailoring_data, fallback)
55
52
  trie = TwitterCldr::Collation::TrieWithFallback.new(fallback)
56
53
 
57
- parse_trie(tailoring_data[:tailored_table], trie)
54
+ parse_collation_elements_table(tailoring_data[:tailored_table], trie)
58
55
  copy_expansions(trie, fallback, parse_suppressed_starters(tailoring_data[:suppressed_contractions]))
59
56
 
60
57
  trie
@@ -0,0 +1,50 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Collation
8
+
9
+ # Builds a fractional collation elements Trie from the file containing a fractional collation elements table.
10
+ #
11
+ module TrieLoader
12
+
13
+ DUMPS_DIR = File.join(TwitterCldr::RESOURCES_DIR, 'collation', 'tries')
14
+
15
+ DEFAULT_TRIE_LOCALE = :default
16
+
17
+ class << self
18
+
19
+ def load_default_trie
20
+ load_trie
21
+ end
22
+
23
+ def load_tailored_trie(locale, fallback)
24
+ trie = load_trie(locale)
25
+ trie.fallback = fallback
26
+ trie
27
+ end
28
+
29
+ def dump_path(locale)
30
+ File.join(DUMPS_DIR, "#{locale}.dump")
31
+ end
32
+
33
+ private
34
+
35
+ def load_trie(locale = DEFAULT_TRIE_LOCALE)
36
+ load_dump(locale) do |dump|
37
+ Marshal.load(dump)
38
+ end
39
+ end
40
+
41
+ def load_dump(locale, &block)
42
+ File.open(dump_path(locale), 'r', &block)
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+ end
@@ -26,17 +26,19 @@ module TwitterCldr
26
26
  #
27
27
  # This special behavior of the #find_prefix method allows 'hiding' fallback keys that contain more than one element
28
28
  # by adding their one element prefixes to the trie itself. This feature is useful for some applications, e.g., for
29
- # suppressing contractions in a tailored FCE trie.
29
+ # suppressing contractions in a tailored fractional collation elements trie.
30
30
  #
31
31
  class TrieWithFallback < TwitterCldr::Collation::Trie
32
32
 
33
+ attr_accessor :fallback
34
+
33
35
  def initialize(fallback)
34
36
  super()
35
- @fallback = fallback
37
+ self.fallback = fallback
36
38
  end
37
39
 
38
40
  def get(key)
39
- super || @fallback.get(key)
41
+ super || fallback.get(key)
40
42
  end
41
43
 
42
44
  def find_prefix(key)
@@ -45,7 +47,7 @@ module TwitterCldr
45
47
  if prefix_size > 0
46
48
  [value, prefix_size, suffixes]
47
49
  else
48
- @fallback.find_prefix(key)
50
+ fallback.find_prefix(key)
49
51
  end
50
52
  end
51
53
 
@@ -10,6 +10,7 @@ module TwitterCldr
10
10
  autoload :SortKeyBuilder, 'twitter_cldr/collation/sort_key_builder'
11
11
  autoload :Trie, 'twitter_cldr/collation/trie'
12
12
  autoload :TrieBuilder, 'twitter_cldr/collation/trie_builder'
13
+ autoload :TrieLoader, 'twitter_cldr/collation/trie_loader'
13
14
  autoload :TrieWithFallback, 'twitter_cldr/collation/trie_with_fallback'
14
15
  end
15
16
  end
@@ -0,0 +1,203 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ require 'nokogiri'
7
+ require 'yaml'
8
+ require 'java'
9
+
10
+ module TwitterCldr
11
+ module Resources
12
+ # This class should be used with JRuby 1.7 in 1.9 mode and ICU4J version 49.1 (available at
13
+ # http://download.icu-project.org/files/icu4j/49.1/icu4j-49_1.jar).
14
+ #
15
+ class TailoringImporter
16
+
17
+ SUPPORTED_RULES = %w[p s t i pc sc tc ic x]
18
+ SIMPLE_RULES = %w[p s t i]
19
+ LEVEL_RULE_REGEXP = /^(p|s|t|i)(c?)$/
20
+
21
+ IGNORED_TAGS = %w[reset text #comment]
22
+
23
+ LAST_BYTE_MASK = 0xFF
24
+
25
+ LOCALES_MAP = {
26
+ :'zh-Hant' => :'zh_Hant',
27
+ :id => :root,
28
+ :it => :root,
29
+ :ms => :root,
30
+ :nl => :root,
31
+ :pt => :root
32
+ }
33
+
34
+ EMPTY_TAILORING_DATA = { 'collator_options' => {}, 'tailored_table' => '', 'suppressed_contractions' => '' }
35
+
36
+ class ImportError < RuntimeError; end
37
+
38
+ # Arguments:
39
+ #
40
+ # input_path - path to a directory containing CLDR tailoring data (available at
41
+ # http://unicode.org/cldr/trac/browser/tags/release-21/common/collation/
42
+ # or as a part of CLDR release at http://cldr.unicode.org/index/downloads)
43
+ #
44
+ # output_path - output directory for imported YAML files
45
+ #
46
+ # icu4j_path - path to ICU4J jar file
47
+ #
48
+ def initialize(input_path, output_path, icu4j_path)
49
+ require icu4j_path
50
+
51
+ @input_path = input_path
52
+ @output_path = output_path
53
+ end
54
+
55
+ def import(locale)
56
+ print "Importing %8s\t--\t" % locale
57
+
58
+ if tailoring_present?(locale)
59
+ dump(locale, tailoring_data(locale))
60
+ puts "Done."
61
+ else
62
+ dump(locale, EMPTY_TAILORING_DATA)
63
+ puts "Missing (generated empty tailoring resource)."
64
+ end
65
+ rescue ImportError => e
66
+ puts "Error: #{e.message}"
67
+ end
68
+
69
+ private
70
+
71
+ def dump(locale, data)
72
+ File.open(resource_file_path(locale), 'w') { |file| YAML.dump(data, file) }
73
+ end
74
+
75
+ def tailoring_present?(locale)
76
+ File.file?(locale_file_path(locale))
77
+ end
78
+
79
+ def translated_locale(locale)
80
+ LOCALES_MAP.fetch(locale, locale)
81
+ end
82
+
83
+ def locale_file_path(locale)
84
+ File.join(@input_path, "#{translated_locale(locale)}.xml")
85
+ end
86
+
87
+ def resource_file_path(locale)
88
+ File.join(@output_path, "#{locale}.yml")
89
+ end
90
+
91
+ def tailoring_data(locale)
92
+ doc = File.open(locale_file_path(locale)) { |file| Nokogiri::XML(file) }
93
+ collations = doc.at_xpath('//collations')
94
+
95
+ collation_alias = collations.at_xpath('alias[@path="//ldml/collations"]')
96
+ aliased_locale = collation_alias && collation_alias.attr('source')
97
+
98
+ return tailoring_data(aliased_locale) if aliased_locale
99
+
100
+ standard_tailoring = collations.at_xpath('collation[@type="standard"]')
101
+
102
+ {
103
+ 'collator_options' => parse_collator_options(standard_tailoring),
104
+ 'tailored_table' => parse_tailorings(standard_tailoring, locale),
105
+ 'suppressed_contractions' => parse_suppressed_contractions(standard_tailoring)
106
+ }
107
+ end
108
+
109
+ def parse_tailorings(data, locale)
110
+ rules = data && data.at_xpath('rules')
111
+
112
+ return '' unless rules
113
+
114
+ collator = Java::ComIbmIcuText::Collator.get_instance(Java::JavaUtil::Locale.new(locale.to_s))
115
+
116
+ rules.children.map do |child|
117
+ validate_tailoring_rule(child)
118
+
119
+ if child.name =~ LEVEL_RULE_REGEXP
120
+ if $2.empty?
121
+ table_entry_for_rule(collator, child.text)
122
+ else
123
+ child.text.chars.map { |char| table_entry_for_rule(collator, char) }
124
+ end
125
+ elsif child.name == 'x'
126
+ context = ''
127
+ child.children.each_with_object([]) do |c, memo|
128
+ if SIMPLE_RULES.include?(c.name)
129
+ memo << table_entry_for_rule(collator, context + c.text)
130
+ elsif c.name == 'context'
131
+ context = c.text
132
+ elsif c.name != 'extend'
133
+ raise ImportError, "Rule '#{c.name}' inside <x></x> is not supported."
134
+ end
135
+ end
136
+ else
137
+ raise ImportError, "Tag '#{child.name}' is not supported." unless IGNORED_TAGS.include?(child.name)
138
+ end
139
+ end.flatten.compact.join("\n")
140
+ end
141
+
142
+ def table_entry_for_rule(collator, tailored_value)
143
+ code_points = get_code_points(tailored_value)
144
+
145
+ collation_elements = get_collation_elements(collator, tailored_value).map do |ce|
146
+ ce.map { |l| l.to_s(16).upcase }.join(', ')
147
+ end
148
+
149
+ "#{code_points.join(' ')}; [#{collation_elements.join('][')}]"
150
+ end
151
+
152
+ def parse_suppressed_contractions(data)
153
+ node = data && data.at_xpath('suppress_contractions')
154
+ node ? Java::ComIbmIcuText::UnicodeSet.to_array(Java::ComIbmIcuText::UnicodeSet.new(node.text)).to_a.join : ''
155
+ end
156
+
157
+ def parse_collator_options(data)
158
+ options = {}
159
+
160
+ if data
161
+ case_first_setting = data.at_xpath('settings[@caseFirst]')
162
+ options['case_first'] = case_first_setting.attr('caseFirst').to_sym if case_first_setting
163
+ end
164
+
165
+ options
166
+ end
167
+
168
+ def validate_tailoring_rule(rule)
169
+ return if IGNORED_TAGS.include?(rule.name)
170
+
171
+ raise ImportError, "Rule '#{rule.name}' is not supported." unless SUPPORTED_RULES.include?(rule.name)
172
+ end
173
+
174
+ def get_collation_elements(collator, string)
175
+ iter = collator.get_collation_element_iterator(string)
176
+
177
+ collation_elements = []
178
+ ce = iter.next
179
+
180
+ while ce != Java::ComIbmIcuText::CollationElementIterator::NULLORDER
181
+ p1 = (ce >> 24) & LAST_BYTE_MASK
182
+ p2 = (ce >> 16) & LAST_BYTE_MASK
183
+
184
+ primary = p2.zero? ? p1 : (p1 << 8) + p2
185
+ secondary = (ce >> 8) & LAST_BYTE_MASK
186
+ tertiarly = ce & LAST_BYTE_MASK
187
+
188
+ collation_elements << [primary, secondary, tertiarly]
189
+
190
+ ce = iter.next
191
+ end
192
+
193
+ collation_elements
194
+ end
195
+
196
+ def get_code_points(string)
197
+ TwitterCldr::Normalization::NFD.normalize_code_points(TwitterCldr::Utils::CodePoints.from_string(string))
198
+ end
199
+
200
+ end
201
+
202
+ end
203
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+
3
+ # Copyright 2012 Twitter, Inc
4
+ # http://www.apache.org/licenses/LICENSE-2.0
5
+
6
+ module TwitterCldr
7
+ module Resources
8
+
9
+ module TriesDumper
10
+
11
+ class << self
12
+
13
+ def update_dumps
14
+ update_default_trie_dump
15
+ TwitterCldr.supported_locales.each { |locale| update_tailoring_trie_dump(locale) }
16
+ end
17
+
18
+ private
19
+
20
+ def update_default_trie_dump
21
+ save_trie_dump(TwitterCldr::Collation::TrieLoader::DEFAULT_TRIE_LOCALE, default_trie)
22
+ end
23
+
24
+ def update_tailoring_trie_dump(locale)
25
+ save_trie_dump(locale, TwitterCldr::Collation::TrieBuilder.load_tailored_trie(locale, @default_trie))
26
+ end
27
+
28
+ def save_trie_dump(locale, trie)
29
+ path = TwitterCldr::Collation::TrieLoader.dump_path(locale)
30
+ FileUtils.mkdir_p(File.dirname(path))
31
+
32
+ File.open(path, 'w') { |f| f.write(Marshal.dump(trie)) }
33
+ end
34
+
35
+ def default_trie
36
+ @default_trie ||= TwitterCldr::Collation::TrieBuilder.load_default_trie
37
+ end
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -5,7 +5,8 @@
5
5
 
6
6
  module TwitterCldr
7
7
  module Resources
8
- autoload :Loader, 'twitter_cldr/resources/loader'
9
- autoload :Import, 'twitter_cldr/resources/import'
8
+ autoload :Loader, 'twitter_cldr/resources/loader'
9
+ autoload :TailoringImporter, 'twitter_cldr/resources/tailoring_importer'
10
+ autoload :TriesDumper, 'twitter_cldr/resources/tries_dumper'
10
11
  end
11
12
  end
@@ -4,5 +4,5 @@
4
4
  # http://www.apache.org/licenses/LICENSE-2.0
5
5
 
6
6
  module TwitterCldr
7
- VERSION = "1.6.1"
7
+ VERSION = "1.6.2"
8
8
  end
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -31,7 +31,7 @@ describe 'Unicode Collation Algorithm' do
31
31
 
32
32
  previous_sort_key = previous_code_points = previous_hex_code_points = nil
33
33
 
34
- open(file_path, 'r:utf-8') do |file|
34
+ File.open(file_path, 'r:utf-8') do |file|
35
35
  file.each do |line|
36
36
  next unless /^([0-9A-F ]+);/ =~ line
37
37
 
@@ -79,7 +79,9 @@ END
79
79
 
80
80
  print 'extracting CollationTest_CLDR_NON_IGNORABLE.txt ... '
81
81
  Zip::ZipFile.open(zip_file.path) do |zip|
82
- open(FULL_COLLATION_TEST_PATH, 'w') { |file| file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt')) }
82
+ File.open(FULL_COLLATION_TEST_PATH, 'w') do |file|
83
+ file.write(zip.read('CollationAuxiliary/CollationTest_CLDR_NON_IGNORABLE.txt'))
84
+ end
83
85
  end
84
86
  zip_file.unlink
85
87