Text 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ require 'text/double_metaphone'
2
+ require 'text/figlet'
3
+ require 'text/levenshtein'
4
+ require 'text/metaphone'
5
+ require 'text/soundex'
@@ -0,0 +1,356 @@
1
+ #
2
+ # Ruby implementation of the Double Metaphone algorithm by Lawrence Philips,
3
+ # originally published in the June 2000 issue of C/C++ Users Journal.
4
+ #
5
+ # Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/
6
+ #
7
+ # Author: Tim Fletcher (twoggle@gmail.com)
8
+ #
9
+
10
+ module Text # :nodoc:
11
+ module Metaphone
12
+
13
+ # Returns the primary and secondary double metaphone tokens
14
+ # (the secondary will be nil if equal to the primary).
15
+ def double_metaphone(str)
16
+ primary, secondary, current = [], [], 0
17
+ original, length, last = "#{str} ".upcase, str.length, str.length - 1
18
+ if /^GN|KN|PN|WR|PS$/ =~ original[0, 2]
19
+ current += 1
20
+ end
21
+ if 'X' == original[0, 1]
22
+ primary << :S
23
+ secondary << :S
24
+ current += 1
25
+ end
26
+ while primary.length < 4 || secondary.length < 4
27
+ break if current > str.length
28
+ a, b, c = double_metaphone_lookup(original, current, length, last)
29
+ primary << a if a
30
+ secondary << b if b
31
+ current += c if c
32
+ end
33
+ primary, secondary = primary.to_s[0, 4], secondary.to_s[0, 4]
34
+ return primary, (primary == secondary ? nil : secondary)
35
+ end
36
+
37
+
38
+ private
39
+
40
+ def slavo_germanic?(str)
41
+ /W|K|CZ|WITZ/ =~ str
42
+ end
43
+
44
+ def vowel?(str)
45
+ /^A|E|I|O|U|Y$/ =~ str
46
+ end
47
+
48
+ def double_metaphone_lookup(str, pos, length, last)
49
+ case str[pos, 1]
50
+ when /^A|E|I|O|U|Y$/
51
+ if 0 == pos
52
+ return :A, :A, 1
53
+ else
54
+ return nil, nil, 1
55
+ end
56
+ when 'B'
57
+ return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
58
+ when 'Ç'
59
+ return :S, :S, 1
60
+ when 'C'
61
+ if pos > 1 &&
62
+ !vowel?(str[pos - 2, 1]) &&
63
+ 'ACH' == str[pos - 1, 3] &&
64
+ str[pos + 2, 1] != 'I' && (
65
+ str[pos + 2, 1] != 'E' ||
66
+ str[pos - 2, 6] =~ /^(B|M)ACHER$/
67
+ ) then
68
+ return :K, :K, 2
69
+ elsif 0 == pos && 'CAESAR' == str[pos, 6]
70
+ return :S, :S, 2
71
+ elsif 'CHIA' == str[pos, 4]
72
+ return :K, :K, 2
73
+ elsif 'CH' == str[pos, 2]
74
+ if pos > 0 && 'CHAE' == str[pos, 4]
75
+ return :K, :X, 2
76
+ elsif 0 == pos && (
77
+ ['HARAC', 'HARIS'].include?(str[pos + 1, 5]) ||
78
+ ['HOR', 'HYM', 'HIA', 'HEM'].include?(str[pos + 1, 3])
79
+ ) && str[0, 5] != 'CHORE' then
80
+ return :K, :K, 2
81
+ elsif ['VAN ','VON '].include?(str[0, 4]) ||
82
+ 'SCH' == str[0, 3] ||
83
+ ['ORCHES','ARCHIT','ORCHID'].include?(str[pos - 2, 6]) ||
84
+ ['T','S'].include?(str[pos + 2, 1]) || (
85
+ ((0 == pos) || ['A','O','U','E'].include?(str[pos - 1, 1])) &&
86
+ ['L','R','N','M','B','H','F','V','W',' '].include?(str[pos + 2, 1])
87
+ ) then
88
+ return :K, :K, 2
89
+ elsif pos > 0
90
+ return ('MC' == str[0, 2] ? 'K' : 'X'), 'K', 2
91
+ else
92
+ return :X, :X, 2
93
+ end
94
+ elsif 'CZ' == str[pos, 2] && 'WICZ' != str[pos - 2, 4]
95
+ return :S, :X, 2
96
+ elsif 'CIA' == str[pos + 1, 3]
97
+ return :X, :X, 3
98
+ elsif 'CC' == str[pos, 2] && !(1 == pos && 'M' == str[0, 1])
99
+ if /^I|E|H$/ =~ str[pos + 2, 1] && 'HU' != str[pos + 2, 2]
100
+ if (1 == pos && 'A' == str[pos - 1, 1]) ||
101
+ /^UCCE(E|S)$/ =~ str[pos - 1, 5] then
102
+ return :KS, :KS, 3
103
+ else
104
+ return :X, :X, 3
105
+ end
106
+ else
107
+ return :K, :K, 2
108
+ end
109
+ elsif /^C(K|G|Q)$/ =~ str[pos, 2]
110
+ return :K, :K, 2
111
+ elsif /^C(I|E|Y)$/ =~ str[pos, 2]
112
+ return :S, (/^CI(O|E|A)$/ =~ str[pos, 3] ? :X : :S), 2
113
+ else
114
+ if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
115
+ return :K, :K, 3
116
+ else
117
+ return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
118
+ end
119
+ end
120
+ when 'D'
121
+ if 'DG' == str[pos, 2]
122
+ if /^I|E|Y$/ =~ str[pos + 2, 1]
123
+ return :J, :J, 3
124
+ else
125
+ return :TK, :TK, 2
126
+ end
127
+ else
128
+ return :T, :T, (/^D(T|D)$/ =~ str[pos, 2] ? 2 : 1)
129
+ end
130
+ when 'F'
131
+ return :F, :F, ('F' == str[pos + 1, 1] ? 2 : 1)
132
+ when 'G'
133
+ if 'H' == str[pos + 1, 1]
134
+ if pos > 0 && !vowel?(str[pos - 1, 1])
135
+ return :K, :K, 2
136
+ elsif 0 == pos
137
+ if 'I' == str[pos + 2, 1]
138
+ return :J, :J, 2
139
+ else
140
+ return :K, :K, 2
141
+ end
142
+ elsif (pos > 1 && /^B|H|D$/ =~ str[pos - 2, 1]) ||
143
+ (pos > 2 && /^B|H|D$/ =~ str[pos - 3, 1]) ||
144
+ (pos > 3 && /^B|H$/ =~ str[pos - 4, 1])
145
+ return nil, nil, 2
146
+ else
147
+ if (pos > 2 && 'U' == str[pos - 1, 1] && /^C|G|L|R|T$/ =~ str[pos - 3, 1])
148
+ return :F, :F, 2
149
+ elsif pos > 0 && 'I' != str[pos - 1, 1]
150
+ return :K, :K, 2
151
+ else
152
+ return nil, nil, 2
153
+ end
154
+ end
155
+ elsif 'N' == str[pos + 1, 1]
156
+ if 1 == pos && vowel?(str[0, 1]) && !slavo_germanic?(str)
157
+ return :KN, :N, 2
158
+ else
159
+ if 'EY' != str[pos + 2, 2] && 'Y' != str[pos + 1, 1] && !slavo_germanic?(str)
160
+ return :N, :KN, 2
161
+ else
162
+ return :KN, :KN, 2
163
+ end
164
+ end
165
+ elsif 'LI' == str[pos + 1, 2] && !slavo_germanic?(str)
166
+ return :KL, :L, 2
167
+ elsif 0 == pos && ('Y' == str[pos + 1, 1] || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/ =~ str[pos + 1, 2])
168
+ return :K, :J, 2
169
+ elsif (('ER' == str[pos + 1, 2] || 'Y' == str[pos + 1, 1]) &&
170
+ /^(D|R|M)ANGER$/ !~ str[0, 6] &&
171
+ /^E|I$/ !~ str[pos - 1, 1] &&
172
+ /^(R|O)GY$/ !~ str[pos - 1, 3])
173
+ return :K, :J, 2
174
+ elsif /^E|I|Y$/ =~ str[pos + 1, 1] || /^(A|O)GGI$/ =~ str[pos - 1, 4]
175
+ if (/^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]) || 'ET' == str[pos + 1, 2]
176
+ return :K, :K, 2
177
+ else
178
+ if 'IER ' == str[pos + 1, 4]
179
+ return :J, :J, 2
180
+ else
181
+ return :J, :K, 2
182
+ end
183
+ end
184
+ elsif 'G' == str[pos + 1, 1]
185
+ return :K, :K, 2
186
+ else
187
+ return :K, :K, 1
188
+ end
189
+ when 'H'
190
+ if (0 == pos || vowel?(str[pos - 1, 1])) && vowel?(str[pos + 1, 1])
191
+ return :H, :H, 2
192
+ else
193
+ return nil, nil, 1
194
+ end
195
+ when 'J'
196
+ if 'JOSE' == str[pos, 4] || 'SAN ' == str[0, 4]
197
+ if (0 == pos && ' ' == str[pos + 4, 1]) || 'SAN ' == str[0, 4]
198
+ return :H, :H, 1
199
+ else
200
+ return :J, :H, 1
201
+ end
202
+ else
203
+ current = ('J' == str[pos + 1, 1] ? 2 : 1)
204
+
205
+ if 0 == pos && 'JOSE' != str[pos, 4]
206
+ return :J, :A, current
207
+ else
208
+ if vowel?(str[pos - 1, 1]) && !slavo_germanic?(str) && /^A|O$/ =~ str[pos + 1, 1]
209
+ return :J, :H, current
210
+ else
211
+ if last == pos
212
+ return :J, nil, current
213
+ else
214
+ if /^L|T|K|S|N|M|B|Z$/ !~ str[pos + 1, 1] && /^S|K|L$/ !~ str[pos - 1, 1]
215
+ return :J, :J, current
216
+ else
217
+ return nil, nil, current
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end
223
+ when 'K'
224
+ return :K, :K, ('K' == str[pos + 1, 1] ? 2 : 1)
225
+ when 'L'
226
+ if 'L' == str[pos + 1, 1]
227
+ if (((length - 3) == pos && /^(ILL(O|A)|ALLE)$/ =~ str[pos - 1, 4]) ||
228
+ ((/^(A|O)S$/ =~ str[last - 1, 2] || /^A|O$/ =~ str[last, 1]) && 'ALLE' == str[pos - 1, 4]))
229
+ return :L, nil, 2
230
+ else
231
+ return :L, :L, 2
232
+ end
233
+ else
234
+ return :L, :L, 1
235
+ end
236
+ when 'M'
237
+ if ('UMB' == str[pos - 1, 3] &&
238
+ ((last - 1) == pos || 'ER' == str[pos + 2, 2])) || 'M' == str[pos + 1, 1]
239
+ return :M, :M, 2
240
+ else
241
+ return :M, :M, 1
242
+ end
243
+ when 'N'
244
+ return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
245
+ when 'Ñ'
246
+ return :N, :N, 1
247
+ when 'P'
248
+ if 'H' == str[pos + 1, 1]
249
+ return :F, :F, 2
250
+ else
251
+ return :P, :P, (/^P|B$/ =~ str[pos + 1, 1] ? 2 : 1)
252
+ end
253
+ when 'Q'
254
+ return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
255
+ when 'R'
256
+ current = ('R' == str[pos + 1, 1] ? 2 : 1)
257
+
258
+ if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
259
+ return nil, :R, current
260
+ else
261
+ return :R, :R, current
262
+ end
263
+ when 'S'
264
+ if /^(I|Y)SL$/ =~ str[pos - 1, 3]
265
+ return nil, nil, 1
266
+ elsif 0 == pos && 'SUGAR' == str[pos, 5]
267
+ return :X, :S, 1
268
+ elsif 'SH' == str[pos, 2]
269
+ if /^H(EIM|OEK|OLM|OLZ)$/ =~ str[pos + 1, 4]
270
+ return :S, :S, 2
271
+ else
272
+ return :X, :X, 2
273
+ end
274
+ elsif /^SI(O|A)$/ =~ str[pos, 3] || 'SIAN' == str[pos, 4]
275
+ return :S, (slavo_germanic?(str) ? :S : :X), 3
276
+ elsif (0 == pos && /^M|N|L|W$/ =~ str[pos + 1, 1]) || 'Z' == str[pos + 1, 1]
277
+ return :S, :X, ('Z' == str[pos + 1, 1] ? 2 : 1)
278
+ elsif 'SC' == str[pos, 2]
279
+ if 'H' == str[pos + 2, 1]
280
+ if /^OO|ER|EN|UY|ED|EM$/ =~ str[pos + 3, 2]
281
+ return (/^E(R|N)$/ =~ str[pos + 3, 2] ? :X : :SK), :SK, 3
282
+ else
283
+ return :X, ((0 == pos && !vowel?(str[3, 1]) && ('W' != str[pos + 3, 1])) ? :S : :X), 3
284
+ end
285
+ elsif /^I|E|Y$/ =~ str[pos + 2, 1]
286
+ return :S, :S, 3
287
+ else
288
+ return :SK, :SK, 3
289
+ end
290
+ else
291
+ return (last == pos && /^(A|O)I$/ =~ str[pos - 2, 2] ? nil : 'S'), 'S', (/^S|Z$/ =~ str[pos + 1, 1] ? 2 : 1)
292
+ end
293
+ when 'T'
294
+ if 'TION' == str[pos, 4]
295
+ return :X, :X, 3
296
+ elsif /^T(IA|CH)$/ =~ str[pos, 3]
297
+ return :X, :X, 3
298
+ elsif 'TH' == str[pos, 2] || 'TTH' == str[pos, 3]
299
+ if /^(O|A)M$/ =~ str[pos + 2, 2] || /^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]
300
+ return :T, :T, 2
301
+ else
302
+ return 0, :T, 2
303
+ end
304
+ else
305
+ return :T, :T, (/^T|D$/ =~ str[pos + 1, 1] ? 2 : 1)
306
+ end
307
+ when 'V'
308
+ return :F, :F, ('V' == str[pos + 1, 1] ? 2 : 1)
309
+ when 'W'
310
+ if 'WR' == str[pos, 2]
311
+ return :R, :R, 2
312
+ end
313
+ pri, sec = nil, nil
314
+
315
+ if 0 == pos && (vowel?(str[pos + 1, 1]) || 'WH' == str[pos, 2])
316
+ pri = :A
317
+ sec = vowel?(str[pos + 1, 1]) ? :F : :A
318
+ end
319
+
320
+ if (last == pos && vowel?(str[pos - 1, 1])) || 'SCH' == str[0, 3] ||
321
+ /^EWSKI|EWSKY|OWSKI|OWSKY$/ =~ str[pos - 1, 5]
322
+ return pri, "#{sec}F".intern, 1
323
+ elsif /^WI(C|T)Z$/ =~ str[pos, 4]
324
+ return "#{pri}TS".intern, "#{sec}FX".intern, 4
325
+ else
326
+ return pri, sec, 1
327
+ end
328
+ when 'X'
329
+ current = (/^C|X$/ =~ str[pos + 1, 1] ? 2 : 1)
330
+
331
+ if !(last == pos && (/^(I|E)AU$/ =~ str[pos - 3, 3] || /^(A|O)U$/ =~ str[pos - 2, 2]))
332
+ return :KS, :KS, current
333
+ else
334
+ return nil, nil, current
335
+ end
336
+ when 'Z'
337
+ if 'H' == str[pos + 1, 1]
338
+ return :J, :J, 2
339
+ else
340
+ current = ('Z' == str[pos + 1, 1] ? 2 : 1)
341
+
342
+ if /^Z(O|I|A)$/ =~ str[pos + 1, 2] || (slavo_germanic?(str) && (pos > 0 && 'T' != str[pos - 1, 1]))
343
+ return :S, :TS, current
344
+ else
345
+ return :S, :S, current
346
+ end
347
+ end
348
+ else
349
+ return nil, nil, 1
350
+ end
351
+ end # def double_metaphone_lookup
352
+
353
+ extend self
354
+
355
+ end # module Metaphone
356
+ end # module Text
@@ -0,0 +1,17 @@
1
+ #
2
+ # Ruby implementation of the Figlet program (http://www.figlet.org/).
3
+ #
4
+ # Author: Tim Fletcher (twoggle@gmail.com)
5
+ #
6
+ # Usage:
7
+ #
8
+ # big_font = Text::Figlet::Font.new('big.flf')
9
+ #
10
+ # figlet = Text::Figlet::Typesetter.new(big_font)
11
+ #
12
+ # puts figlet['hello world']
13
+ #
14
+ #
15
+ require 'text/figlet/font'
16
+ require 'text/figlet/smusher'
17
+ require 'text/figlet/typesetter'
@@ -0,0 +1,117 @@
1
+ module Text
2
+ module Figlet
3
+
4
+ class UnknownFontFormat < StandardError
5
+ end
6
+
7
+ class Font
8
+ def initialize(filename, load_german = true)
9
+ file = File.open(filename, 'rb')
10
+
11
+ header = file.gets.strip.split(/ /)
12
+
13
+ raise UnknownFontFormat if 'flf2a' != header[0][0, 5]
14
+
15
+ @hard_blank = header.shift[-1, 1]
16
+ @height = header.shift.to_i
17
+ @baseline = header.shift
18
+ @max_length = header.shift
19
+ @old_layout = header.shift.to_i
20
+ @comment_count = header.shift.to_i
21
+ @right_to_left = header.shift
22
+ @right_to_left = !@right_to_left.nil? && @right_to_left.to_i == 1
23
+
24
+ @load_german, @characters = load_german, {}
25
+
26
+ load_comments file
27
+ load_ascii_characters file
28
+ load_german_characters file
29
+ load_extended_characters file
30
+
31
+ file.close
32
+ end
33
+
34
+ def [](char)
35
+ @characters[char]
36
+ end
37
+
38
+ def has_char?(char)
39
+ @characters.has_key? char
40
+ end
41
+
42
+ attr_reader :height, :hard_blank, :old_layout
43
+
44
+ def right_to_left?
45
+ @right_to_left
46
+ end
47
+
48
+
49
+ private
50
+
51
+ def load_comments(file)
52
+ @comment_count.times { file.gets.strip }
53
+ end
54
+
55
+ def load_ascii_characters(file)
56
+ (32..126).each { |i| @characters[i] = load_char(file) }
57
+ end
58
+
59
+ def load_german_characters(file)
60
+ [91, 92, 93, 123, 124, 125, 126].each do |i|
61
+ if @load_german
62
+ unless char = load_char(file)
63
+ return
64
+ end
65
+ @characters[i] = char
66
+ else
67
+ skip_char file
68
+ end
69
+ end
70
+ end
71
+
72
+ def load_extended_characters(file)
73
+ until file.eof?
74
+ i = file.gets.strip.split(/ /).first
75
+ if i.empty?
76
+ next
77
+ elsif /^\-0x/i =~ i # comment
78
+ skip_char file
79
+ else
80
+ if /^0x/i =~ i
81
+ i = i[2, 1].hex
82
+ elsif '0' == i[0] && '0' != i || '-0' == i[0, 2]
83
+ i = i.oct
84
+ end
85
+ unless char = load_char(file)
86
+ return
87
+ end
88
+ @characters[i] = char
89
+ end
90
+ end
91
+ end
92
+
93
+ def load_char(file)
94
+ char = []
95
+ @height.times do
96
+ return false if file.eof?
97
+ line = file.gets.rstrip
98
+ if match = /(.){1,2}$/.match(line)
99
+ line.gsub! match[1], ''
100
+ end
101
+ line << "\x00"
102
+ char << line
103
+ end
104
+ return char
105
+ end
106
+
107
+ def skip_char(file)
108
+ @height.times do
109
+ return if file.eof?
110
+ return if file.gets.strip.nil?
111
+ end
112
+ end
113
+
114
+ end
115
+
116
+ end # module Figlet
117
+ end # module Text