Text 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ require 'text/double_metaphone'
2
+ require 'text/figlet'
3
+ require 'text/levenshtein'
4
+ require 'text/metaphone'
5
+ require 'text/soundex'
@@ -0,0 +1,356 @@
1
+ #
2
+ # Ruby implementation of the Double Metaphone algorithm by Lawrence Philips,
3
+ # originally published in the June 2000 issue of C/C++ Users Journal.
4
+ #
5
+ # Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/
6
+ #
7
+ # Author: Tim Fletcher (twoggle@gmail.com)
8
+ #
9
+
10
+ module Text # :nodoc:
11
+ module Metaphone
12
+
13
+ # Returns the primary and secondary double metaphone tokens
14
+ # (the secondary will be nil if equal to the primary).
15
+ def double_metaphone(str)
16
+ primary, secondary, current = [], [], 0
17
+ original, length, last = "#{str} ".upcase, str.length, str.length - 1
18
+ if /^GN|KN|PN|WR|PS$/ =~ original[0, 2]
19
+ current += 1
20
+ end
21
+ if 'X' == original[0, 1]
22
+ primary << :S
23
+ secondary << :S
24
+ current += 1
25
+ end
26
+ while primary.length < 4 || secondary.length < 4
27
+ break if current > str.length
28
+ a, b, c = double_metaphone_lookup(original, current, length, last)
29
+ primary << a if a
30
+ secondary << b if b
31
+ current += c if c
32
+ end
33
+ primary, secondary = primary.to_s[0, 4], secondary.to_s[0, 4]
34
+ return primary, (primary == secondary ? nil : secondary)
35
+ end
36
+
37
+
38
+ private
39
+
40
+ def slavo_germanic?(str)
41
+ /W|K|CZ|WITZ/ =~ str
42
+ end
43
+
44
+ def vowel?(str)
45
+ /^A|E|I|O|U|Y$/ =~ str
46
+ end
47
+
48
+ def double_metaphone_lookup(str, pos, length, last)
49
+ case str[pos, 1]
50
+ when /^A|E|I|O|U|Y$/
51
+ if 0 == pos
52
+ return :A, :A, 1
53
+ else
54
+ return nil, nil, 1
55
+ end
56
+ when 'B'
57
+ return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
58
+ when 'Ç'
59
+ return :S, :S, 1
60
+ when 'C'
61
+ if pos > 1 &&
62
+ !vowel?(str[pos - 2, 1]) &&
63
+ 'ACH' == str[pos - 1, 3] &&
64
+ str[pos + 2, 1] != 'I' && (
65
+ str[pos + 2, 1] != 'E' ||
66
+ str[pos - 2, 6] =~ /^(B|M)ACHER$/
67
+ ) then
68
+ return :K, :K, 2
69
+ elsif 0 == pos && 'CAESAR' == str[pos, 6]
70
+ return :S, :S, 2
71
+ elsif 'CHIA' == str[pos, 4]
72
+ return :K, :K, 2
73
+ elsif 'CH' == str[pos, 2]
74
+ if pos > 0 && 'CHAE' == str[pos, 4]
75
+ return :K, :X, 2
76
+ elsif 0 == pos && (
77
+ ['HARAC', 'HARIS'].include?(str[pos + 1, 5]) ||
78
+ ['HOR', 'HYM', 'HIA', 'HEM'].include?(str[pos + 1, 3])
79
+ ) && str[0, 5] != 'CHORE' then
80
+ return :K, :K, 2
81
+ elsif ['VAN ','VON '].include?(str[0, 4]) ||
82
+ 'SCH' == str[0, 3] ||
83
+ ['ORCHES','ARCHIT','ORCHID'].include?(str[pos - 2, 6]) ||
84
+ ['T','S'].include?(str[pos + 2, 1]) || (
85
+ ((0 == pos) || ['A','O','U','E'].include?(str[pos - 1, 1])) &&
86
+ ['L','R','N','M','B','H','F','V','W',' '].include?(str[pos + 2, 1])
87
+ ) then
88
+ return :K, :K, 2
89
+ elsif pos > 0
90
+ return ('MC' == str[0, 2] ? 'K' : 'X'), 'K', 2
91
+ else
92
+ return :X, :X, 2
93
+ end
94
+ elsif 'CZ' == str[pos, 2] && 'WICZ' != str[pos - 2, 4]
95
+ return :S, :X, 2
96
+ elsif 'CIA' == str[pos + 1, 3]
97
+ return :X, :X, 3
98
+ elsif 'CC' == str[pos, 2] && !(1 == pos && 'M' == str[0, 1])
99
+ if /^I|E|H$/ =~ str[pos + 2, 1] && 'HU' != str[pos + 2, 2]
100
+ if (1 == pos && 'A' == str[pos - 1, 1]) ||
101
+ /^UCCE(E|S)$/ =~ str[pos - 1, 5] then
102
+ return :KS, :KS, 3
103
+ else
104
+ return :X, :X, 3
105
+ end
106
+ else
107
+ return :K, :K, 2
108
+ end
109
+ elsif /^C(K|G|Q)$/ =~ str[pos, 2]
110
+ return :K, :K, 2
111
+ elsif /^C(I|E|Y)$/ =~ str[pos, 2]
112
+ return :S, (/^CI(O|E|A)$/ =~ str[pos, 3] ? :X : :S), 2
113
+ else
114
+ if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
115
+ return :K, :K, 3
116
+ else
117
+ return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
118
+ end
119
+ end
120
+ when 'D'
121
+ if 'DG' == str[pos, 2]
122
+ if /^I|E|Y$/ =~ str[pos + 2, 1]
123
+ return :J, :J, 3
124
+ else
125
+ return :TK, :TK, 2
126
+ end
127
+ else
128
+ return :T, :T, (/^D(T|D)$/ =~ str[pos, 2] ? 2 : 1)
129
+ end
130
+ when 'F'
131
+ return :F, :F, ('F' == str[pos + 1, 1] ? 2 : 1)
132
+ when 'G'
133
+ if 'H' == str[pos + 1, 1]
134
+ if pos > 0 && !vowel?(str[pos - 1, 1])
135
+ return :K, :K, 2
136
+ elsif 0 == pos
137
+ if 'I' == str[pos + 2, 1]
138
+ return :J, :J, 2
139
+ else
140
+ return :K, :K, 2
141
+ end
142
+ elsif (pos > 1 && /^B|H|D$/ =~ str[pos - 2, 1]) ||
143
+ (pos > 2 && /^B|H|D$/ =~ str[pos - 3, 1]) ||
144
+ (pos > 3 && /^B|H$/ =~ str[pos - 4, 1])
145
+ return nil, nil, 2
146
+ else
147
+ if (pos > 2 && 'U' == str[pos - 1, 1] && /^C|G|L|R|T$/ =~ str[pos - 3, 1])
148
+ return :F, :F, 2
149
+ elsif pos > 0 && 'I' != str[pos - 1, 1]
150
+ return :K, :K, 2
151
+ else
152
+ return nil, nil, 2
153
+ end
154
+ end
155
+ elsif 'N' == str[pos + 1, 1]
156
+ if 1 == pos && vowel?(str[0, 1]) && !slavo_germanic?(str)
157
+ return :KN, :N, 2
158
+ else
159
+ if 'EY' != str[pos + 2, 2] && 'Y' != str[pos + 1, 1] && !slavo_germanic?(str)
160
+ return :N, :KN, 2
161
+ else
162
+ return :KN, :KN, 2
163
+ end
164
+ end
165
+ elsif 'LI' == str[pos + 1, 2] && !slavo_germanic?(str)
166
+ return :KL, :L, 2
167
+ elsif 0 == pos && ('Y' == str[pos + 1, 1] || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/ =~ str[pos + 1, 2])
168
+ return :K, :J, 2
169
+ elsif (('ER' == str[pos + 1, 2] || 'Y' == str[pos + 1, 1]) &&
170
+ /^(D|R|M)ANGER$/ !~ str[0, 6] &&
171
+ /^E|I$/ !~ str[pos - 1, 1] &&
172
+ /^(R|O)GY$/ !~ str[pos - 1, 3])
173
+ return :K, :J, 2
174
+ elsif /^E|I|Y$/ =~ str[pos + 1, 1] || /^(A|O)GGI$/ =~ str[pos - 1, 4]
175
+ if (/^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]) || 'ET' == str[pos + 1, 2]
176
+ return :K, :K, 2
177
+ else
178
+ if 'IER ' == str[pos + 1, 4]
179
+ return :J, :J, 2
180
+ else
181
+ return :J, :K, 2
182
+ end
183
+ end
184
+ elsif 'G' == str[pos + 1, 1]
185
+ return :K, :K, 2
186
+ else
187
+ return :K, :K, 1
188
+ end
189
+ when 'H'
190
+ if (0 == pos || vowel?(str[pos - 1, 1])) && vowel?(str[pos + 1, 1])
191
+ return :H, :H, 2
192
+ else
193
+ return nil, nil, 1
194
+ end
195
+ when 'J'
196
+ if 'JOSE' == str[pos, 4] || 'SAN ' == str[0, 4]
197
+ if (0 == pos && ' ' == str[pos + 4, 1]) || 'SAN ' == str[0, 4]
198
+ return :H, :H, 1
199
+ else
200
+ return :J, :H, 1
201
+ end
202
+ else
203
+ current = ('J' == str[pos + 1, 1] ? 2 : 1)
204
+
205
+ if 0 == pos && 'JOSE' != str[pos, 4]
206
+ return :J, :A, current
207
+ else
208
+ if vowel?(str[pos - 1, 1]) && !slavo_germanic?(str) && /^A|O$/ =~ str[pos + 1, 1]
209
+ return :J, :H, current
210
+ else
211
+ if last == pos
212
+ return :J, nil, current
213
+ else
214
+ if /^L|T|K|S|N|M|B|Z$/ !~ str[pos + 1, 1] && /^S|K|L$/ !~ str[pos - 1, 1]
215
+ return :J, :J, current
216
+ else
217
+ return nil, nil, current
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end
223
+ when 'K'
224
+ return :K, :K, ('K' == str[pos + 1, 1] ? 2 : 1)
225
+ when 'L'
226
+ if 'L' == str[pos + 1, 1]
227
+ if (((length - 3) == pos && /^(ILL(O|A)|ALLE)$/ =~ str[pos - 1, 4]) ||
228
+ ((/^(A|O)S$/ =~ str[last - 1, 2] || /^A|O$/ =~ str[last, 1]) && 'ALLE' == str[pos - 1, 4]))
229
+ return :L, nil, 2
230
+ else
231
+ return :L, :L, 2
232
+ end
233
+ else
234
+ return :L, :L, 1
235
+ end
236
+ when 'M'
237
+ if ('UMB' == str[pos - 1, 3] &&
238
+ ((last - 1) == pos || 'ER' == str[pos + 2, 2])) || 'M' == str[pos + 1, 1]
239
+ return :M, :M, 2
240
+ else
241
+ return :M, :M, 1
242
+ end
243
+ when 'N'
244
+ return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
245
+ when 'Ñ'
246
+ return :N, :N, 1
247
+ when 'P'
248
+ if 'H' == str[pos + 1, 1]
249
+ return :F, :F, 2
250
+ else
251
+ return :P, :P, (/^P|B$/ =~ str[pos + 1, 1] ? 2 : 1)
252
+ end
253
+ when 'Q'
254
+ return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
255
+ when 'R'
256
+ current = ('R' == str[pos + 1, 1] ? 2 : 1)
257
+
258
+ if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
259
+ return nil, :R, current
260
+ else
261
+ return :R, :R, current
262
+ end
263
+ when 'S'
264
+ if /^(I|Y)SL$/ =~ str[pos - 1, 3]
265
+ return nil, nil, 1
266
+ elsif 0 == pos && 'SUGAR' == str[pos, 5]
267
+ return :X, :S, 1
268
+ elsif 'SH' == str[pos, 2]
269
+ if /^H(EIM|OEK|OLM|OLZ)$/ =~ str[pos + 1, 4]
270
+ return :S, :S, 2
271
+ else
272
+ return :X, :X, 2
273
+ end
274
+ elsif /^SI(O|A)$/ =~ str[pos, 3] || 'SIAN' == str[pos, 4]
275
+ return :S, (slavo_germanic?(str) ? :S : :X), 3
276
+ elsif (0 == pos && /^M|N|L|W$/ =~ str[pos + 1, 1]) || 'Z' == str[pos + 1, 1]
277
+ return :S, :X, ('Z' == str[pos + 1, 1] ? 2 : 1)
278
+ elsif 'SC' == str[pos, 2]
279
+ if 'H' == str[pos + 2, 1]
280
+ if /^OO|ER|EN|UY|ED|EM$/ =~ str[pos + 3, 2]
281
+ return (/^E(R|N)$/ =~ str[pos + 3, 2] ? :X : :SK), :SK, 3
282
+ else
283
+ return :X, ((0 == pos && !vowel?(str[3, 1]) && ('W' != str[pos + 3, 1])) ? :S : :X), 3
284
+ end
285
+ elsif /^I|E|Y$/ =~ str[pos + 2, 1]
286
+ return :S, :S, 3
287
+ else
288
+ return :SK, :SK, 3
289
+ end
290
+ else
291
+ return (last == pos && /^(A|O)I$/ =~ str[pos - 2, 2] ? nil : 'S'), 'S', (/^S|Z$/ =~ str[pos + 1, 1] ? 2 : 1)
292
+ end
293
+ when 'T'
294
+ if 'TION' == str[pos, 4]
295
+ return :X, :X, 3
296
+ elsif /^T(IA|CH)$/ =~ str[pos, 3]
297
+ return :X, :X, 3
298
+ elsif 'TH' == str[pos, 2] || 'TTH' == str[pos, 3]
299
+ if /^(O|A)M$/ =~ str[pos + 2, 2] || /^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]
300
+ return :T, :T, 2
301
+ else
302
+ return 0, :T, 2
303
+ end
304
+ else
305
+ return :T, :T, (/^T|D$/ =~ str[pos + 1, 1] ? 2 : 1)
306
+ end
307
+ when 'V'
308
+ return :F, :F, ('V' == str[pos + 1, 1] ? 2 : 1)
309
+ when 'W'
310
+ if 'WR' == str[pos, 2]
311
+ return :R, :R, 2
312
+ end
313
+ pri, sec = nil, nil
314
+
315
+ if 0 == pos && (vowel?(str[pos + 1, 1]) || 'WH' == str[pos, 2])
316
+ pri = :A
317
+ sec = vowel?(str[pos + 1, 1]) ? :F : :A
318
+ end
319
+
320
+ if (last == pos && vowel?(str[pos - 1, 1])) || 'SCH' == str[0, 3] ||
321
+ /^EWSKI|EWSKY|OWSKI|OWSKY$/ =~ str[pos - 1, 5]
322
+ return pri, "#{sec}F".intern, 1
323
+ elsif /^WI(C|T)Z$/ =~ str[pos, 4]
324
+ return "#{pri}TS".intern, "#{sec}FX".intern, 4
325
+ else
326
+ return pri, sec, 1
327
+ end
328
+ when 'X'
329
+ current = (/^C|X$/ =~ str[pos + 1, 1] ? 2 : 1)
330
+
331
+ if !(last == pos && (/^(I|E)AU$/ =~ str[pos - 3, 3] || /^(A|O)U$/ =~ str[pos - 2, 2]))
332
+ return :KS, :KS, current
333
+ else
334
+ return nil, nil, current
335
+ end
336
+ when 'Z'
337
+ if 'H' == str[pos + 1, 1]
338
+ return :J, :J, 2
339
+ else
340
+ current = ('Z' == str[pos + 1, 1] ? 2 : 1)
341
+
342
+ if /^Z(O|I|A)$/ =~ str[pos + 1, 2] || (slavo_germanic?(str) && (pos > 0 && 'T' != str[pos - 1, 1]))
343
+ return :S, :TS, current
344
+ else
345
+ return :S, :S, current
346
+ end
347
+ end
348
+ else
349
+ return nil, nil, 1
350
+ end
351
+ end # def double_metaphone_lookup
352
+
353
+ extend self
354
+
355
+ end # module Metaphone
356
+ end # module Text
@@ -0,0 +1,17 @@
1
+ #
2
+ # Ruby implementation of the Figlet program (http://www.figlet.org/).
3
+ #
4
+ # Author: Tim Fletcher (twoggle@gmail.com)
5
+ #
6
+ # Usage:
7
+ #
8
+ # big_font = Text::Figlet::Font.new('big.flf')
9
+ #
10
+ # figlet = Text::Figlet::Typesetter.new(big_font)
11
+ #
12
+ # puts figlet['hello world']
13
+ #
14
+ #
15
+ require 'text/figlet/font'
16
+ require 'text/figlet/smusher'
17
+ require 'text/figlet/typesetter'
@@ -0,0 +1,117 @@
1
+ module Text
2
+ module Figlet
3
+
4
+ class UnknownFontFormat < StandardError
5
+ end
6
+
7
+ class Font
8
+ def initialize(filename, load_german = true)
9
+ file = File.open(filename, 'rb')
10
+
11
+ header = file.gets.strip.split(/ /)
12
+
13
+ raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
14
+
15
+ @hard_blank = header.shift[-1, 1]
16
+ @height = header.shift.to_i
17
+ @baseline = header.shift
18
+ @max_length = header.shift
19
+ @old_layout = header.shift.to_i
20
+ @comment_count = header.shift.to_i
21
+ @right_to_left = header.shift
22
+ @right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
23
+
24
+ @load_german, @characters = load_german, {}
25
+
26
+ load_comments file
27
+ load_ascii_characters file
28
+ load_german_characters file
29
+ load_extended_characters file
30
+
31
+ file.close
32
+ end
33
+
34
+ def [](char)
35
+ @characters[char]
36
+ end
37
+
38
+ def has_char?(char)
39
+ @characters.has_key? char
40
+ end
41
+
42
+ attr_reader :height, :hard_blank, :old_layout
43
+
44
+ def right_to_left?
45
+ @right_to_left
46
+ end
47
+
48
+
49
+ private
50
+
51
+ def load_comments(file)
52
+ @comment_count.times { file.gets.strip }
53
+ end
54
+
55
+ def load_ascii_characters(file)
56
+ (32..126).each { |i| @characters[i] = load_char(file) }
57
+ end
58
+
59
+ def load_german_characters(file)
60
+ [91, 92, 93, 123, 124, 125, 126].each do |i|
61
+ if @load_german
62
+ unless char = load_char(file)
63
+ return
64
+ end
65
+ @characters[i] = char
66
+ else
67
+ skip_char file
68
+ end
69
+ end
70
+ end
71
+
72
+ def load_extended_characters(file)
73
+ until file.eof?
74
+ i = file.gets.strip.split(/ /).first
75
+ if i.empty?
76
+ next
77
+ elsif /^\-0x/i =~ i # comment
78
+ skip_char file
79
+ else
80
+ if /^0x/i =~ i
81
+ i = i[2, 1].hex
82
+ elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
83
+ i = i.oct
84
+ end
85
+ unless char = load_char(file)
86
+ return
87
+ end
88
+ @characters[i] = char
89
+ end
90
+ end
91
+ end
92
+
93
+ def load_char(file)
94
+ char = []
95
+ @height.times do
96
+ return false if file.eof?
97
+ line = file.gets.rstrip
98
+ if match = /(.){1,2}$/.match(line)
99
+ line.gsub! match[1], ''
100
+ end
101
+ line << "\x00"
102
+ char << line
103
+ end
104
+ return char
105
+ end
106
+
107
+ def skip_char(file)
108
+ @height.times do
109
+ return if file.eof?
110
+ return if file.gets.strip.nil?
111
+ end
112
+ end
113
+
114
+ end
115
+
116
+ end # module Figlet
117
+ end # module Text