text-hyphen 1.4 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.standard.yml +5 -0
- data/Code-of-Conduct.md +73 -0
- data/Contributing.md +68 -0
- data/History.md +139 -0
- data/Licence.md +159 -0
- data/Manifest.txt +15 -5
- data/README.md +81 -0
- data/Rakefile +68 -19
- data/bin/ruby-hyphen +0 -0
- data/lib/text/hyphen/language/1.8/de1.rb +1307 -571
- data/lib/text/hyphen/language/1.8/en_us.rb +412 -453
- data/lib/text/hyphen/language/1.8/fr.rb +128 -334
- data/lib/text/hyphen/language/1.8/la.rb +1 -0
- data/lib/text/hyphen/language/1.8/lt.rb +115 -0
- data/lib/text/hyphen/language/1.8/pt.rb +2 -1
- data/lib/text/hyphen/language/1.8/ru.rb +434 -0
- data/lib/text/hyphen/language/1.8/sk.rb +277 -0
- data/lib/text/hyphen/language/1.9/ca.rb +2 -1
- data/lib/text/hyphen/language/1.9/cs.rb +2 -1
- data/lib/text/hyphen/language/1.9/da.rb +2 -1
- data/lib/text/hyphen/language/1.9/de1.rb +1382 -646
- data/lib/text/hyphen/language/1.9/de2.rb +110 -109
- data/lib/text/hyphen/language/1.9/en_uk.rb +2 -1
- data/lib/text/hyphen/language/1.9/en_us.rb +412 -454
- data/lib/text/hyphen/language/1.9/es.rb +2 -1
- data/lib/text/hyphen/language/1.9/et.rb +6 -5
- data/lib/text/hyphen/language/1.9/eu.rb +4 -3
- data/lib/text/hyphen/language/1.9/fi.rb +3 -2
- data/lib/text/hyphen/language/1.9/fr.rb +136 -343
- data/lib/text/hyphen/language/1.9/ga.rb +27 -26
- data/lib/text/hyphen/language/1.9/hr.rb +6 -5
- data/lib/text/hyphen/language/1.9/hsb.rb +3 -2
- data/lib/text/hyphen/language/1.9/hu1.rb +3 -2
- data/lib/text/hyphen/language/1.9/hu2.rb +5 -4
- data/lib/text/hyphen/language/1.9/ia.rb +2 -1
- data/lib/text/hyphen/language/1.9/id.rb +8 -7
- data/lib/text/hyphen/language/1.9/is.rb +2 -1
- data/lib/text/hyphen/language/1.9/it.rb +74 -74
- data/lib/text/hyphen/language/1.9/la.rb +54 -53
- data/lib/text/hyphen/language/1.9/lt.rb +116 -0
- data/lib/text/hyphen/language/1.9/mn.rb +7 -6
- data/lib/text/hyphen/language/1.9/nl.rb +2 -1
- data/lib/text/hyphen/language/1.9/no1.rb +3 -2
- data/lib/text/hyphen/language/1.9/no2.rb +3 -2
- data/lib/text/hyphen/language/1.9/pl.rb +2 -1
- data/lib/text/hyphen/language/1.9/pt.rb +3 -2
- data/lib/text/hyphen/language/1.9/ru.rb +437 -0
- data/lib/text/hyphen/language/1.9/sk.rb +280 -0
- data/lib/text/hyphen/language/1.9/sv.rb +4 -3
- data/lib/text/hyphen/language/cs.rb +1 -1
- data/lib/text/hyphen/language/de.rb +2 -1
- data/lib/text/hyphen/language/de1.rb +1 -1
- data/lib/text/hyphen/language/de2.rb +1 -1
- data/lib/text/hyphen/language/en_us.rb +1 -1
- data/lib/text/hyphen/language/eu.rb +1 -1
- data/lib/text/hyphen/language/fr.rb +1 -1
- data/lib/text/hyphen/language/hu.rb +1 -1
- data/lib/text/hyphen/language/hu1.rb +1 -1
- data/lib/text/hyphen/language/hu2.rb +1 -1
- data/lib/text/hyphen/language/is.rb +1 -1
- data/lib/text/hyphen/language/lt.rb +4 -0
- data/lib/text/hyphen/language/ms.rb +3 -3
- data/lib/text/hyphen/language/nl.rb +1 -1
- data/lib/text/hyphen/language/no.rb +1 -1
- data/lib/text/hyphen/language/ru.rb +4 -0
- data/lib/text/hyphen/language/sk.rb +4 -0
- data/lib/text/hyphen/language.rb +45 -45
- data/lib/text/hyphen.rb +139 -97
- data/lib/text-hyphen.rb +1 -1
- data/test/data/bug_9807_latin1.rb +2 -2
- data/test/data/bug_9807_utf-8.rb +1 -1
- data/test/test_bugs.rb +14 -13
- data/test/test_text_hyphen.rb +31 -21
- metadata +143 -106
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/History.rdoc +0 -92
- data/License.rdoc +0 -159
- data/README.rdoc +0 -95
- data/text-hyphen.gemspec +0 -51
data/lib/text/hyphen.rb
CHANGED
@@ -7,10 +7,21 @@ end
|
|
7
7
|
# hyphenation algorithm with pattern files. Each object is constructed with
|
8
8
|
# a specific language's hyphenation patterns.
|
9
9
|
class Text::Hyphen
|
10
|
-
|
11
|
-
|
10
|
+
# Resolves a file for cleaner loading from a hyphenation loader file.
|
11
|
+
def self.require_real_hyphenation_file(loader) # :nodoc:
|
12
|
+
p = File.dirname(loader)
|
13
|
+
f = File.basename(loader)
|
14
|
+
v = if RUBY_VERSION < "1.9.1"
|
15
|
+
"1.8"
|
16
|
+
else
|
17
|
+
"1.9"
|
18
|
+
end
|
19
|
+
require File.join(p, v, f)
|
20
|
+
end
|
12
21
|
|
13
|
-
|
22
|
+
VERSION = "1.5.0"
|
23
|
+
|
24
|
+
DEFAULT_MIN_LEFT = 2
|
14
25
|
DEFAULT_MIN_RIGHT = 2
|
15
26
|
|
16
27
|
# No fewer than this number of letters will show up to the left of the
|
@@ -26,31 +37,31 @@ class Text::Hyphen
|
|
26
37
|
# two or three character ISO 639 code, with the two character form being
|
27
38
|
# the canonical resource name. This will load the language hyphenation
|
28
39
|
# definitions from text/hyphen/language/<code> as a Ruby class. The
|
29
|
-
# resource
|
40
|
+
# resource "text/hyphen/language/en_us" defines the language class
|
30
41
|
# Text::Hyphen::Language::EN_US. It also defines the secondary forms
|
31
42
|
# Text::Hyphen::Language::EN and Text::Hyphen::Language::ENG_US.
|
32
43
|
#
|
33
44
|
# Minimal transformations will be performed on the language code provided,
|
34
|
-
# such that any dashes are converted to underscores (e.g.,
|
35
|
-
#
|
36
|
-
# downcased and class names will be converted to uppercase (e.g.,
|
37
|
-
# the Portuguese language becomes
|
45
|
+
# such that any dashes are converted to underscores (e.g., "en-us" becomes
|
46
|
+
# "en_us") and all characters are regularised. Resource names will be
|
47
|
+
# downcased and class names will be converted to uppercase (e.g., "Pt" for
|
48
|
+
# the Portuguese language becomes "pt" and "PT", respectively).
|
38
49
|
#
|
39
50
|
# The language may also be specified as an instance of
|
40
51
|
# Text::Hyphen::Language.
|
41
|
-
|
52
|
+
#
|
53
|
+
# :attr_accessor: language
|
54
|
+
attr_reader :language
|
42
55
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
if lang.kind_of? Text::Hyphen::Language
|
56
|
+
def language=(lang) # :nodoc:
|
57
|
+
require "text/hyphen/language" unless defined?(Text::Hyphen::Language)
|
58
|
+
if lang.is_a? Text::Hyphen::Language
|
47
59
|
@iso_language = lang.to_s.split(%r{::}o)[-1].downcase
|
48
|
-
@language
|
60
|
+
@language = lang
|
49
61
|
else
|
50
62
|
@iso_language = lang.downcase
|
51
63
|
load_language
|
52
64
|
end
|
53
|
-
@iso_language
|
54
65
|
end
|
55
66
|
|
56
67
|
# Returns the language's ISO 639 ID, e.g., "en_us" or "pt".
|
@@ -70,23 +81,22 @@ class Text::Hyphen
|
|
70
81
|
# methods in an initialization block. The following initializations are
|
71
82
|
# all equivalent:
|
72
83
|
#
|
73
|
-
# hyp = Text::Hyphenate.new(:
|
74
|
-
# hyp = Text::Hyphenate.new
|
75
|
-
# hyp = Text::Hyphenate.new { |h| h.language = 'en_us' }
|
84
|
+
# hyp = Text::Hyphenate.new(language: "en_us")
|
85
|
+
# hyp = Text::Hyphenate.new { |h| h.language = "en_us" }
|
76
86
|
def initialize(options = {}) # :yields self:
|
77
87
|
@iso_language = options[:language]
|
78
|
-
@left
|
79
|
-
@right
|
80
|
-
@language
|
88
|
+
@left = options[:left]
|
89
|
+
@right = options[:right]
|
90
|
+
@language = nil
|
81
91
|
|
82
|
-
@cache
|
83
|
-
@vcache
|
92
|
+
@cache = {}
|
93
|
+
@vcache = {}
|
84
94
|
|
85
|
-
@hyphen
|
95
|
+
@hyphen = {}
|
86
96
|
@begin_hyphen = {}
|
87
|
-
@end_hyphen
|
88
|
-
@both_hyphen
|
89
|
-
@exception
|
97
|
+
@end_hyphen = {}
|
98
|
+
@both_hyphen = {}
|
99
|
+
@exception = {}
|
90
100
|
|
91
101
|
@first_load = true
|
92
102
|
yield self if block_given?
|
@@ -94,57 +104,87 @@ class Text::Hyphen
|
|
94
104
|
|
95
105
|
load_language
|
96
106
|
|
97
|
-
@left
|
107
|
+
@left ||= DEFAULT_MIN_LEFT
|
98
108
|
@right ||= DEFAULT_MIN_RIGHT
|
99
109
|
end
|
100
110
|
|
101
111
|
# Returns an array of character positions where a word can be hyphenated.
|
102
112
|
#
|
103
|
-
# hyp.hyphenate(
|
113
|
+
# hyp.hyphenate("representation") #=> [3, 5, 8 10]
|
104
114
|
#
|
105
115
|
# Because hyphenation can be expensive, if the word has been hyphenated
|
106
116
|
# previously, it will be returned from a per-instance cache.
|
117
|
+
#
|
118
|
+
# #hyphenate supports phrase hyphenation:
|
119
|
+
#
|
120
|
+
# hyp.hyphenate("This useful library supports phrases and sentences.")
|
121
|
+
# #=> [8, 14, 23, 27, 34, 44]
|
122
|
+
#
|
123
|
+
# When phrases are hyphenated, each word is processed individually and the
|
124
|
+
# result is returned as a single continuous list of hyphenation points.
|
107
125
|
def hyphenate(word)
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
126
|
+
words = if phrase?(word)
|
127
|
+
word.downcase.split(/[[:space:]]/)
|
128
|
+
else
|
129
|
+
[word.downcase]
|
130
|
+
end
|
131
|
+
|
132
|
+
points = words.map do |word|
|
133
|
+
next @cache[word] if @cache.has_key?(word)
|
134
|
+
|
135
|
+
if (exception = @language.exceptions[word])
|
136
|
+
next @cache[word] = make_result_list(exception)
|
137
|
+
end
|
138
|
+
|
139
|
+
letters = word.scan(@language.scan_re)
|
140
|
+
word_size = letters.size
|
141
|
+
|
142
|
+
result = [0] * (word_size + 1)
|
143
|
+
right_stop = word_size - @right
|
144
|
+
|
145
|
+
updater = proc do |hash, str, pos|
|
146
|
+
if hash.has_key?(str)
|
147
|
+
hash[str].scan(@language.scan_re).each_with_index do |cc, ii|
|
148
|
+
cc = cc.to_i
|
149
|
+
result[ii + pos] = cc if cc > result[ii + pos]
|
150
|
+
end
|
127
151
|
end
|
128
|
-
$stderr.print ": #{result.inspect}\n" if DEBUG
|
129
152
|
end
|
130
|
-
end
|
131
153
|
|
132
154
|
# Walk the word
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
155
|
+
(0..right_stop).each do |pos|
|
156
|
+
rest_length = word_size - pos
|
157
|
+
(1..rest_length).each do |length|
|
158
|
+
substr = letters[pos, length].join("")
|
159
|
+
updater[@language.hyphen, substr, pos]
|
160
|
+
updater[@language.start, substr, pos] if pos.zero?
|
161
|
+
updater[@language.stop, substr, pos] if length == rest_length
|
162
|
+
end
|
140
163
|
end
|
164
|
+
|
165
|
+
updater[@language.both, word, 0] if @language.both[word]
|
166
|
+
|
167
|
+
(0..@left).each { |i| result[i] = 0 }
|
168
|
+
((-1 - @right)..-1).each { |i| result[i] = 0 }
|
169
|
+
@cache[word] = make_result_list(result)
|
141
170
|
end
|
142
171
|
|
143
|
-
|
172
|
+
if points.length > 1
|
173
|
+
offset = 0
|
174
|
+
result = []
|
144
175
|
|
145
|
-
|
146
|
-
|
147
|
-
|
176
|
+
points.each_with_index do |word, i|
|
177
|
+
word.each do |pos|
|
178
|
+
result << pos + offset
|
179
|
+
end
|
180
|
+
|
181
|
+
offset += words[i].length + 1
|
182
|
+
end
|
183
|
+
|
184
|
+
result
|
185
|
+
else
|
186
|
+
points.flatten
|
187
|
+
end
|
148
188
|
end
|
149
189
|
|
150
190
|
# Returns a visualization of the hyphenation points.
|
@@ -157,8 +197,15 @@ class Text::Hyphen
|
|
157
197
|
#
|
158
198
|
# Because hyphenation can be expensive, if the word has been visualised
|
159
199
|
# previously, it will be returned from a per-instance cache.
|
160
|
-
|
200
|
+
#
|
201
|
+
# #visualise supports phrase hyphenation:
|
202
|
+
#
|
203
|
+
# hyp.hyphenate("This useful library supports phrases and sentences.")
|
204
|
+
# #=> This use-ful li-brary sup-port-s phras-es and sen-tences.
|
205
|
+
def visualise(word, hyphen = "-")
|
206
|
+
return visualise_phrase(word, hyphen) if phrase?(word)
|
161
207
|
return @vcache[word] if @vcache.has_key?(word)
|
208
|
+
|
162
209
|
w = word.dup
|
163
210
|
s = hyphen.size
|
164
211
|
hyphenate(w).each_with_index do |pos, n|
|
@@ -168,7 +215,7 @@ class Text::Hyphen
|
|
168
215
|
end
|
169
216
|
@vcache[word] = w
|
170
217
|
end
|
171
|
-
|
218
|
+
alias_method :visualize, :visualise
|
172
219
|
|
173
220
|
# Clears the per-instance hyphenation and visualization caches.
|
174
221
|
def clear_cache!
|
@@ -177,29 +224,33 @@ class Text::Hyphen
|
|
177
224
|
end
|
178
225
|
|
179
226
|
# This function will hyphenate a word so that the first point is at most
|
227
|
+
# +size+ characters.
|
180
228
|
#
|
181
229
|
# NOTE: if hyphen is set to a string, it will still be counted as one
|
182
230
|
# character (since it represents a hyphen)
|
183
231
|
#
|
184
|
-
#
|
185
|
-
|
232
|
+
# #hyphenate_to does not support phrase hyphenation and will throw an
|
233
|
+
# exception if there are spaces.
|
234
|
+
def hyphenate_to(word, size, hyphen = "-")
|
235
|
+
raise ArgumentError, "#hyphenate_to does not support phrases" if phrase?(word)
|
236
|
+
|
186
237
|
point = hyphenate(word).delete_if { |e| e >= size }.max
|
187
238
|
if point.nil?
|
188
239
|
[nil, word]
|
189
240
|
else
|
190
|
-
[word[0
|
241
|
+
[word[0...point] + hyphen, word[point..-1]]
|
191
242
|
end
|
192
243
|
end
|
193
244
|
|
194
245
|
# Returns a string describing the structure of the patterns for the
|
195
246
|
# language of this hyphenation object.
|
196
247
|
def stats
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
248
|
+
stats_both = @language.both.size
|
249
|
+
stats_start = @language.start.size
|
250
|
+
stats_end = @language.stop.size
|
251
|
+
stats_hyphens = @language.hyphen.size
|
252
|
+
stats_exceptions = @language.exceptions.size
|
253
|
+
stats_total = stats_both + stats_start + stats_end + stats_hyphens + stats_exceptions
|
203
254
|
|
204
255
|
s = <<-EOS
|
205
256
|
|
@@ -210,25 +261,13 @@ The language '%s' contains %d total hyphenation patterns.
|
|
210
261
|
% 6d patterns are normal patterns.
|
211
262
|
% 6d patterns are exceptions.
|
212
263
|
|
213
|
-
EOS
|
214
|
-
s % [
|
264
|
+
EOS
|
265
|
+
s % [@iso_language, stats_total, stats_start, stats_end, stats_both, stats_hyphens, stats_exceptions]
|
215
266
|
end
|
216
267
|
|
217
|
-
def updateresult(hash, str, pos)
|
218
|
-
if hash.has_key?(str)
|
219
|
-
STDERR.print "#{pos}: #{str}: #{hash[str]}" if DEBUG
|
220
|
-
hash[str].scan(@language.scan_re).each_with_index do |c, i|
|
221
|
-
c = c.to_i
|
222
|
-
@result[i + pos] = c if c > @result[i + pos]
|
223
|
-
end
|
224
|
-
STDERR.puts ": #{@result}" if DEBUG
|
225
|
-
end
|
226
|
-
end
|
227
|
-
private :updateresult
|
228
|
-
|
229
268
|
def make_result_list(res)
|
230
269
|
r = []
|
231
|
-
res.each_with_index { |c, i| r <<
|
270
|
+
res.each_with_index { |c, i| r << i * (c.to_i % 2) }
|
232
271
|
r.reject { |i| i.to_i == 0 }
|
233
272
|
end
|
234
273
|
private :make_result_list
|
@@ -251,17 +290,20 @@ EOS
|
|
251
290
|
end
|
252
291
|
private :load_language
|
253
292
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
293
|
+
def split_phrase(phrase)
|
294
|
+
phrase.split(/[[:space:]]+/)
|
295
|
+
end
|
296
|
+
private :split_phrase
|
297
|
+
|
298
|
+
def visualise_phrase(phrase, hyphen)
|
299
|
+
split_phrase(phrase).map { |word| visualise(word, hyphen) }.join(" ")
|
300
|
+
end
|
301
|
+
private :visualise_phrase
|
302
|
+
|
303
|
+
def phrase?(input)
|
304
|
+
/[^[:space:]][[:space:]][^[:space:]]/.match?(input)
|
264
305
|
end
|
306
|
+
private :phrase?
|
265
307
|
end
|
266
308
|
|
267
309
|
# vim: syntax=ruby
|
data/lib/text-hyphen.rb
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
# -*- ruby encoding: utf-8 -*-
|
2
|
-
require
|
2
|
+
require "text/hyphen"
|
@@ -1,10 +1,10 @@
|
|
1
|
-
# -*- encoding:
|
1
|
+
# -*- encoding: iso-8859-1 -*-
|
2
2
|
|
3
3
|
module TestTextHyphenData
|
4
4
|
def self.bug_9807_data
|
5
5
|
txt = "Dampfschifffahrtskapit�nsm�tzenhalterhersteller"
|
6
6
|
pts = [5, 11, 17, 19, 21, 25, 28, 31, 34, 37, 40, 44]
|
7
7
|
viz = "Dampf-schiff-fahrts-ka-pi-t�ns-m�t-zen-hal-ter-her-stel-ler"
|
8
|
-
[
|
8
|
+
[txt, pts, viz]
|
9
9
|
end
|
10
10
|
end
|
data/test/data/bug_9807_utf-8.rb
CHANGED
data/test/test_bugs.rb
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
require
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "text-hyphen"
|
4
5
|
|
5
6
|
# The behaviour of Text::Hyphen differs based on the version and the
|
6
|
-
# encoding. Ruby 1.8 fails if the input is not
|
7
|
-
# patterns are
|
8
|
-
data_version = if RUBY_VERSION <
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
data_path = File.join(File.dirname(__FILE__),
|
7
|
+
# encoding. Ruby 1.8 fails if the input is not iso-8859-1 and the hyphenation
|
8
|
+
# patterns are iso-8859-1. Ruby 1.9 always expects UTF-8 patterns.
|
9
|
+
data_version = if RUBY_VERSION < "1.9.1"
|
10
|
+
"iso-8859-1"
|
11
|
+
else
|
12
|
+
"utf-8"
|
13
|
+
end
|
14
|
+
data_path = File.join(File.dirname(__FILE__), "data")
|
14
15
|
load File.join(data_path, "bug_9807_#{data_version}.rb")
|
15
16
|
|
16
17
|
class TestTextHyphenBugs < Test::Unit::TestCase
|
@@ -19,17 +20,17 @@ class TestTextHyphenBugs < Test::Unit::TestCase
|
|
19
20
|
# http://rubyforge.org/tracker/index.php?func=detail&aid=28498&group_id=294&atid=1195
|
20
21
|
txt, pts, viz = TestTextHyphenData.bug_9807_data
|
21
22
|
|
22
|
-
de1 = Text::Hyphen.new(:language =>
|
23
|
+
de1 = Text::Hyphen.new(:language => "de")
|
23
24
|
assert_equal pts, de1.hyphenate(txt)
|
24
25
|
assert_equal viz, de1.visualize(txt)
|
25
26
|
|
26
|
-
de2 = Text::Hyphen.new(:language =>
|
27
|
+
de2 = Text::Hyphen.new(:language => "de2")
|
27
28
|
assert_equal pts, de2.hyphenate(txt)
|
28
29
|
assert_equal viz, de2.visualize(txt)
|
29
30
|
end
|
30
31
|
|
31
32
|
def test_rubyforge_28128
|
32
|
-
en_us = Text::Hyphen.new(:language =>
|
33
|
+
en_us = Text::Hyphen.new(:language => "en_us")
|
33
34
|
assert_equal [], en_us.hyphenate("to")
|
34
35
|
assert_equal "to", en_us.visualize("to")
|
35
36
|
end
|
data/test/test_text_hyphen.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
|
3
|
-
require
|
2
|
+
|
3
|
+
require "test/unit"
|
4
|
+
require "text-hyphen"
|
4
5
|
|
5
6
|
class TestTextHyphen < Test::Unit::TestCase
|
6
|
-
WORDS
|
7
|
-
|
8
|
-
POINTS
|
7
|
+
WORDS = %w[additional declination going leaving maximizes multiple peter
|
8
|
+
playback presents programmable representation]
|
9
|
+
POINTS = [
|
9
10
|
[2, 4, 8], # additional
|
10
11
|
[3, 5, 7], # declination
|
11
12
|
[2], # going
|
@@ -15,26 +16,26 @@ class TestTextHyphen < Test::Unit::TestCase
|
|
15
16
|
[2], # peter
|
16
17
|
[4], # playback
|
17
18
|
[], # presents
|
18
|
-
[3, 7],
|
19
|
+
[3, 7, 9], # programmable
|
19
20
|
[3, 5, 8, 10] # representation
|
20
21
|
]
|
21
22
|
|
22
|
-
VISUAL = %w
|
23
|
-
|
24
|
-
|
23
|
+
VISUAL = %w[ad-di-tion-al dec-li-na-tion go-ing leav-ing max-i-mizes
|
24
|
+
mul-ti-ple pe-ter play-back presents pro-gram-ma-ble
|
25
|
+
rep-re-sen-ta-tion]
|
25
26
|
|
26
|
-
HY_TO
|
27
|
-
|
28
|
-
|
29
|
-
|
27
|
+
HY_TO = [%w[addi- tional], %w[dec- lination], %w[go- ing],
|
28
|
+
%w[leav- ing], %w[maxi- mizes], %w[mul- tiple], %w[pe- ter],
|
29
|
+
%w[play- back], [nil, "presents"], %w[pro- grammable],
|
30
|
+
%w[rep- resentation]]
|
30
31
|
|
31
32
|
SOFT_HYPHEN = "­"
|
32
33
|
|
33
34
|
def test_hyphenate
|
34
35
|
@r = []
|
35
36
|
a = Text::Hyphen.new do |xx|
|
36
|
-
xx.left
|
37
|
-
xx.right
|
37
|
+
xx.left = 0
|
38
|
+
xx.right = 0
|
38
39
|
end
|
39
40
|
assert_nothing_raised { WORDS.each { |w| @r << a.hyphenate(w) } }
|
40
41
|
assert_equal(POINTS, @r)
|
@@ -58,21 +59,30 @@ class TestTextHyphen < Test::Unit::TestCase
|
|
58
59
|
end
|
59
60
|
|
60
61
|
def test_alt_hyphen_for_visualize
|
61
|
-
a = Text::Hyphen.new.visualize(
|
62
|
+
a = Text::Hyphen.new.visualize("backpack", SOFT_HYPHEN)
|
62
63
|
assert_equal "back#{SOFT_HYPHEN}pack", a
|
63
64
|
|
64
|
-
a = Text::Hyphen.new.visualize(
|
65
|
+
a = Text::Hyphen.new.visualize("representation", SOFT_HYPHEN)
|
65
66
|
assert_equal "rep#{SOFT_HYPHEN}re#{SOFT_HYPHEN}sen#{SOFT_HYPHEN}ta#{SOFT_HYPHEN}tion", a
|
66
67
|
end
|
67
68
|
|
68
69
|
def test_alt_hyphen_for_hyphenate_to
|
69
|
-
a = Text::Hyphen.new.hyphenate_to(
|
70
|
-
assert_equal ["back#{SOFT_HYPHEN}",
|
71
|
-
|
70
|
+
a = Text::Hyphen.new.hyphenate_to("backpack", 5, SOFT_HYPHEN)
|
71
|
+
assert_equal ["back#{SOFT_HYPHEN}", "pack"], a
|
72
72
|
end
|
73
73
|
|
74
74
|
def test_russian
|
75
|
-
a = Text::Hyphen.new(:language =>
|
75
|
+
a = Text::Hyphen.new(:language => "ru").visualize("скоропалительный")
|
76
76
|
assert_equal "ско-ро-па-ли-тель-ный", a
|
77
77
|
end
|
78
|
+
|
79
|
+
def test_hyphenate_sentence
|
80
|
+
a = Text::Hyphen.new(:left => 0, :right => 0).hyphenate("This useful library supports phrases and sentences.")
|
81
|
+
assert_equal [8, 14, 23, 27, 34, 44], a
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_visualise_sentence
|
85
|
+
a = Text::Hyphen.new(:left => 0, :right => 0).visualize("This useful library supports phrases and sentences.")
|
86
|
+
assert_equal "This use-ful li-brary sup-port-s phras-es and sen-tences.", a
|
87
|
+
end
|
78
88
|
end
|