Text 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- metadata +51 -67
- data/README.rdoc +0 -28
- data/lib/text.rb +0 -6
- data/lib/text/double_metaphone.rb +0 -356
- data/lib/text/figlet.rb +0 -17
- data/lib/text/figlet/font.rb +0 -117
- data/lib/text/figlet/smusher.rb +0 -64
- data/lib/text/figlet/typesetter.rb +0 -68
- data/lib/text/levenshtein.rb +0 -65
- data/lib/text/metaphone.rb +0 -97
- data/lib/text/porter_stemming.rb +0 -171
- data/lib/text/soundex.rb +0 -61
- data/rakefile.rb +0 -44
- data/test/data/big.flf +0 -2204
- data/test/data/big.txt +0 -8
- data/test/data/chunky.flf +0 -512
- data/test/data/chunky.txt +0 -5
- data/test/data/double_metaphone.csv +0 -1218
- data/test/data/metaphone.txt +0 -51
- data/test/data/metaphone_buggy.txt +0 -52
- data/test/data/porter_stemming_input.txt +0 -23531
- data/test/data/porter_stemming_output.txt +0 -23531
- data/test/preamble.rb +0 -10
- data/test/test_double_metaphone.rb +0 -23
- data/test/test_figlet.rb +0 -17
- data/test/test_levenshtein.rb +0 -80
- data/test/test_metaphone.rb +0 -39
- data/test/test_porter_stemming.rb +0 -16
- data/test/test_soundex.rb +0 -27
metadata
CHANGED
@@ -1,73 +1,57 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.0.10
|
3
|
-
specification_version: 1
|
1
|
+
--- !ruby/object:Gem::Specification
|
4
2
|
name: Text
|
5
|
-
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.1.
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
-
|
11
|
-
|
12
|
-
|
13
|
-
rubyforge_project: text
|
14
|
-
description:
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.3
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Paul Battley
|
9
|
+
- Michael Neumann
|
10
|
+
- Tim Fletcher
|
15
11
|
autorequire:
|
16
|
-
default_executable:
|
17
12
|
bindir: bin
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
- lib/text/figlet.rb
|
35
|
-
- lib/text/levenshtein.rb
|
36
|
-
- lib/text/metaphone.rb
|
37
|
-
- lib/text/porter_stemming.rb
|
38
|
-
- lib/text/soundex.rb
|
39
|
-
- lib/text/figlet/font.rb
|
40
|
-
- lib/text/figlet/smusher.rb
|
41
|
-
- lib/text/figlet/typesetter.rb
|
42
|
-
- test/preamble.rb
|
43
|
-
- test/test_double_metaphone.rb
|
44
|
-
- test/test_figlet.rb
|
45
|
-
- test/test_levenshtein.rb
|
46
|
-
- test/test_metaphone.rb
|
47
|
-
- test/test_porter_stemming.rb
|
48
|
-
- test/test_soundex.rb
|
49
|
-
- test/data/big.flf
|
50
|
-
- test/data/big.txt
|
51
|
-
- test/data/chunky.flf
|
52
|
-
- test/data/chunky.txt
|
53
|
-
- test/data/double_metaphone.csv
|
54
|
-
- test/data/metaphone.txt
|
55
|
-
- test/data/metaphone_buggy.txt
|
56
|
-
- test/data/porter_stemming_input.txt
|
57
|
-
- test/data/porter_stemming_output.txt
|
58
|
-
- README.rdoc
|
59
|
-
- rakefile.rb
|
60
|
-
test_files: []
|
61
|
-
|
62
|
-
rdoc_options: []
|
63
|
-
|
64
|
-
extra_rdoc_files: []
|
65
|
-
|
13
|
+
cert_chain: []
|
14
|
+
date: 2012-06-07 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: text
|
18
|
+
requirement: &12920520 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ! '>='
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.2.0
|
24
|
+
type: :runtime
|
25
|
+
prerelease: false
|
26
|
+
version_requirements: *12920520
|
27
|
+
description: ! 'This gem is now just a legacy shim with one dependency: text'
|
28
|
+
email: pbattley@gmail.com
|
66
29
|
executables: []
|
67
|
-
|
68
30
|
extensions: []
|
69
|
-
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files: []
|
33
|
+
homepage: http://github.com/threedaymonk/text
|
34
|
+
licenses: []
|
35
|
+
post_install_message:
|
36
|
+
rdoc_options: []
|
37
|
+
require_paths:
|
38
|
+
- lib
|
39
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ! '>='
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
version: '0'
|
45
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
46
|
+
none: false
|
47
|
+
requirements:
|
48
|
+
- - ! '>='
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '0'
|
70
51
|
requirements: []
|
71
|
-
|
72
|
-
|
73
|
-
|
52
|
+
rubyforge_project:
|
53
|
+
rubygems_version: 1.8.11
|
54
|
+
signing_key:
|
55
|
+
specification_version: 3
|
56
|
+
summary: Legacy shim. Use 'text' instead.
|
57
|
+
test_files: []
|
data/README.rdoc
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
= Text
|
2
|
-
|
3
|
-
A collection of text algorithms.
|
4
|
-
|
5
|
-
|
6
|
-
= Usage
|
7
|
-
|
8
|
-
require 'text'
|
9
|
-
|
10
|
-
font = Text::Figlet::Font.new('big.flf')
|
11
|
-
figlet = Text::Figlet::Typesetter.new(font)
|
12
|
-
figlet['Hello World'] # => '...'
|
13
|
-
|
14
|
-
Text::Levenshtein.distance('test', 'test') # => 0
|
15
|
-
Text::Levenshtein.distance('test', 'tent') # => 1
|
16
|
-
|
17
|
-
Text::Metaphone.metaphone('BRIAN') # => 'BRN'
|
18
|
-
Text::Metaphone.double_metaphone('Coburn') # => ['KPRN', nil]
|
19
|
-
Text::Metaphone.double_metaphone('Angier') # => ['ANJ', 'ANJR']
|
20
|
-
|
21
|
-
Text::Soundex.soundex('Knuth') # => 'K530'
|
22
|
-
|
23
|
-
Text::PorterStemming.stem('abatements') # => 'abat'
|
24
|
-
|
25
|
-
|
26
|
-
= License
|
27
|
-
|
28
|
-
Same as Ruby.
|
data/lib/text.rb
DELETED
@@ -1,356 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Ruby implementation of the Double Metaphone algorithm by Lawrence Philips,
|
3
|
-
# originally published in the June 2000 issue of C/C++ Users Journal.
|
4
|
-
#
|
5
|
-
# Based on Stephen Woodbridge's PHP version - http://swoodbridge.com/DoubleMetaPhone/
|
6
|
-
#
|
7
|
-
# Author: Tim Fletcher (twoggle@gmail.com)
|
8
|
-
#
|
9
|
-
|
10
|
-
module Text # :nodoc:
|
11
|
-
module Metaphone
|
12
|
-
|
13
|
-
# Returns the primary and secondary double metaphone tokens
|
14
|
-
# (the secondary will be nil if equal to the primary).
|
15
|
-
def double_metaphone(str)
|
16
|
-
primary, secondary, current = [], [], 0
|
17
|
-
original, length, last = "#{str} ".upcase, str.length, str.length - 1
|
18
|
-
if /^GN|KN|PN|WR|PS$/ =~ original[0, 2]
|
19
|
-
current += 1
|
20
|
-
end
|
21
|
-
if 'X' == original[0, 1]
|
22
|
-
primary << :S
|
23
|
-
secondary << :S
|
24
|
-
current += 1
|
25
|
-
end
|
26
|
-
while primary.length < 4 || secondary.length < 4
|
27
|
-
break if current > str.length
|
28
|
-
a, b, c = double_metaphone_lookup(original, current, length, last)
|
29
|
-
primary << a if a
|
30
|
-
secondary << b if b
|
31
|
-
current += c if c
|
32
|
-
end
|
33
|
-
primary, secondary = primary.to_s[0, 4], secondary.to_s[0, 4]
|
34
|
-
return primary, (primary == secondary ? nil : secondary)
|
35
|
-
end
|
36
|
-
|
37
|
-
|
38
|
-
private
|
39
|
-
|
40
|
-
def slavo_germanic?(str)
|
41
|
-
/W|K|CZ|WITZ/ =~ str
|
42
|
-
end
|
43
|
-
|
44
|
-
def vowel?(str)
|
45
|
-
/^A|E|I|O|U|Y$/ =~ str
|
46
|
-
end
|
47
|
-
|
48
|
-
def double_metaphone_lookup(str, pos, length, last)
|
49
|
-
case str[pos, 1]
|
50
|
-
when /^A|E|I|O|U|Y$/
|
51
|
-
if 0 == pos
|
52
|
-
return :A, :A, 1
|
53
|
-
else
|
54
|
-
return nil, nil, 1
|
55
|
-
end
|
56
|
-
when 'B'
|
57
|
-
return :P, :P, ('B' == str[pos + 1, 1] ? 2 : 1)
|
58
|
-
when 'Ç'
|
59
|
-
return :S, :S, 1
|
60
|
-
when 'C'
|
61
|
-
if pos > 1 &&
|
62
|
-
!vowel?(str[pos - 2, 1]) &&
|
63
|
-
'ACH' == str[pos - 1, 3] &&
|
64
|
-
str[pos + 2, 1] != 'I' && (
|
65
|
-
str[pos + 2, 1] != 'E' ||
|
66
|
-
str[pos - 2, 6] =~ /^(B|M)ACHER$/
|
67
|
-
) then
|
68
|
-
return :K, :K, 2
|
69
|
-
elsif 0 == pos && 'CAESAR' == str[pos, 6]
|
70
|
-
return :S, :S, 2
|
71
|
-
elsif 'CHIA' == str[pos, 4]
|
72
|
-
return :K, :K, 2
|
73
|
-
elsif 'CH' == str[pos, 2]
|
74
|
-
if pos > 0 && 'CHAE' == str[pos, 4]
|
75
|
-
return :K, :X, 2
|
76
|
-
elsif 0 == pos && (
|
77
|
-
['HARAC', 'HARIS'].include?(str[pos + 1, 5]) ||
|
78
|
-
['HOR', 'HYM', 'HIA', 'HEM'].include?(str[pos + 1, 3])
|
79
|
-
) && str[0, 5] != 'CHORE' then
|
80
|
-
return :K, :K, 2
|
81
|
-
elsif ['VAN ','VON '].include?(str[0, 4]) ||
|
82
|
-
'SCH' == str[0, 3] ||
|
83
|
-
['ORCHES','ARCHIT','ORCHID'].include?(str[pos - 2, 6]) ||
|
84
|
-
['T','S'].include?(str[pos + 2, 1]) || (
|
85
|
-
((0 == pos) || ['A','O','U','E'].include?(str[pos - 1, 1])) &&
|
86
|
-
['L','R','N','M','B','H','F','V','W',' '].include?(str[pos + 2, 1])
|
87
|
-
) then
|
88
|
-
return :K, :K, 2
|
89
|
-
elsif pos > 0
|
90
|
-
return ('MC' == str[0, 2] ? 'K' : 'X'), 'K', 2
|
91
|
-
else
|
92
|
-
return :X, :X, 2
|
93
|
-
end
|
94
|
-
elsif 'CZ' == str[pos, 2] && 'WICZ' != str[pos - 2, 4]
|
95
|
-
return :S, :X, 2
|
96
|
-
elsif 'CIA' == str[pos + 1, 3]
|
97
|
-
return :X, :X, 3
|
98
|
-
elsif 'CC' == str[pos, 2] && !(1 == pos && 'M' == str[0, 1])
|
99
|
-
if /^I|E|H$/ =~ str[pos + 2, 1] && 'HU' != str[pos + 2, 2]
|
100
|
-
if (1 == pos && 'A' == str[pos - 1, 1]) ||
|
101
|
-
/^UCCE(E|S)$/ =~ str[pos - 1, 5] then
|
102
|
-
return :KS, :KS, 3
|
103
|
-
else
|
104
|
-
return :X, :X, 3
|
105
|
-
end
|
106
|
-
else
|
107
|
-
return :K, :K, 2
|
108
|
-
end
|
109
|
-
elsif /^C(K|G|Q)$/ =~ str[pos, 2]
|
110
|
-
return :K, :K, 2
|
111
|
-
elsif /^C(I|E|Y)$/ =~ str[pos, 2]
|
112
|
-
return :S, (/^CI(O|E|A)$/ =~ str[pos, 3] ? :X : :S), 2
|
113
|
-
else
|
114
|
-
if /^ (C|Q|G)$/ =~ str[pos + 1, 2]
|
115
|
-
return :K, :K, 3
|
116
|
-
else
|
117
|
-
return :K, :K, (/^C|K|Q$/ =~ str[pos + 1, 1] && !(['CE','CI'].include?(str[pos + 1, 2])) ? 2 : 1)
|
118
|
-
end
|
119
|
-
end
|
120
|
-
when 'D'
|
121
|
-
if 'DG' == str[pos, 2]
|
122
|
-
if /^I|E|Y$/ =~ str[pos + 2, 1]
|
123
|
-
return :J, :J, 3
|
124
|
-
else
|
125
|
-
return :TK, :TK, 2
|
126
|
-
end
|
127
|
-
else
|
128
|
-
return :T, :T, (/^D(T|D)$/ =~ str[pos, 2] ? 2 : 1)
|
129
|
-
end
|
130
|
-
when 'F'
|
131
|
-
return :F, :F, ('F' == str[pos + 1, 1] ? 2 : 1)
|
132
|
-
when 'G'
|
133
|
-
if 'H' == str[pos + 1, 1]
|
134
|
-
if pos > 0 && !vowel?(str[pos - 1, 1])
|
135
|
-
return :K, :K, 2
|
136
|
-
elsif 0 == pos
|
137
|
-
if 'I' == str[pos + 2, 1]
|
138
|
-
return :J, :J, 2
|
139
|
-
else
|
140
|
-
return :K, :K, 2
|
141
|
-
end
|
142
|
-
elsif (pos > 1 && /^B|H|D$/ =~ str[pos - 2, 1]) ||
|
143
|
-
(pos > 2 && /^B|H|D$/ =~ str[pos - 3, 1]) ||
|
144
|
-
(pos > 3 && /^B|H$/ =~ str[pos - 4, 1])
|
145
|
-
return nil, nil, 2
|
146
|
-
else
|
147
|
-
if (pos > 2 && 'U' == str[pos - 1, 1] && /^C|G|L|R|T$/ =~ str[pos - 3, 1])
|
148
|
-
return :F, :F, 2
|
149
|
-
elsif pos > 0 && 'I' != str[pos - 1, 1]
|
150
|
-
return :K, :K, 2
|
151
|
-
else
|
152
|
-
return nil, nil, 2
|
153
|
-
end
|
154
|
-
end
|
155
|
-
elsif 'N' == str[pos + 1, 1]
|
156
|
-
if 1 == pos && vowel?(str[0, 1]) && !slavo_germanic?(str)
|
157
|
-
return :KN, :N, 2
|
158
|
-
else
|
159
|
-
if 'EY' != str[pos + 2, 2] && 'Y' != str[pos + 1, 1] && !slavo_germanic?(str)
|
160
|
-
return :N, :KN, 2
|
161
|
-
else
|
162
|
-
return :KN, :KN, 2
|
163
|
-
end
|
164
|
-
end
|
165
|
-
elsif 'LI' == str[pos + 1, 2] && !slavo_germanic?(str)
|
166
|
-
return :KL, :L, 2
|
167
|
-
elsif 0 == pos && ('Y' == str[pos + 1, 1] || /^(E(S|P|B|L|Y|I|R)|I(B|L|N|E))$/ =~ str[pos + 1, 2])
|
168
|
-
return :K, :J, 2
|
169
|
-
elsif (('ER' == str[pos + 1, 2] || 'Y' == str[pos + 1, 1]) &&
|
170
|
-
/^(D|R|M)ANGER$/ !~ str[0, 6] &&
|
171
|
-
/^E|I$/ !~ str[pos - 1, 1] &&
|
172
|
-
/^(R|O)GY$/ !~ str[pos - 1, 3])
|
173
|
-
return :K, :J, 2
|
174
|
-
elsif /^E|I|Y$/ =~ str[pos + 1, 1] || /^(A|O)GGI$/ =~ str[pos - 1, 4]
|
175
|
-
if (/^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]) || 'ET' == str[pos + 1, 2]
|
176
|
-
return :K, :K, 2
|
177
|
-
else
|
178
|
-
if 'IER ' == str[pos + 1, 4]
|
179
|
-
return :J, :J, 2
|
180
|
-
else
|
181
|
-
return :J, :K, 2
|
182
|
-
end
|
183
|
-
end
|
184
|
-
elsif 'G' == str[pos + 1, 1]
|
185
|
-
return :K, :K, 2
|
186
|
-
else
|
187
|
-
return :K, :K, 1
|
188
|
-
end
|
189
|
-
when 'H'
|
190
|
-
if (0 == pos || vowel?(str[pos - 1, 1])) && vowel?(str[pos + 1, 1])
|
191
|
-
return :H, :H, 2
|
192
|
-
else
|
193
|
-
return nil, nil, 1
|
194
|
-
end
|
195
|
-
when 'J'
|
196
|
-
if 'JOSE' == str[pos, 4] || 'SAN ' == str[0, 4]
|
197
|
-
if (0 == pos && ' ' == str[pos + 4, 1]) || 'SAN ' == str[0, 4]
|
198
|
-
return :H, :H, 1
|
199
|
-
else
|
200
|
-
return :J, :H, 1
|
201
|
-
end
|
202
|
-
else
|
203
|
-
current = ('J' == str[pos + 1, 1] ? 2 : 1)
|
204
|
-
|
205
|
-
if 0 == pos && 'JOSE' != str[pos, 4]
|
206
|
-
return :J, :A, current
|
207
|
-
else
|
208
|
-
if vowel?(str[pos - 1, 1]) && !slavo_germanic?(str) && /^A|O$/ =~ str[pos + 1, 1]
|
209
|
-
return :J, :H, current
|
210
|
-
else
|
211
|
-
if last == pos
|
212
|
-
return :J, nil, current
|
213
|
-
else
|
214
|
-
if /^L|T|K|S|N|M|B|Z$/ !~ str[pos + 1, 1] && /^S|K|L$/ !~ str[pos - 1, 1]
|
215
|
-
return :J, :J, current
|
216
|
-
else
|
217
|
-
return nil, nil, current
|
218
|
-
end
|
219
|
-
end
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|
223
|
-
when 'K'
|
224
|
-
return :K, :K, ('K' == str[pos + 1, 1] ? 2 : 1)
|
225
|
-
when 'L'
|
226
|
-
if 'L' == str[pos + 1, 1]
|
227
|
-
if (((length - 3) == pos && /^(ILL(O|A)|ALLE)$/ =~ str[pos - 1, 4]) ||
|
228
|
-
((/^(A|O)S$/ =~ str[last - 1, 2] || /^A|O$/ =~ str[last, 1]) && 'ALLE' == str[pos - 1, 4]))
|
229
|
-
return :L, nil, 2
|
230
|
-
else
|
231
|
-
return :L, :L, 2
|
232
|
-
end
|
233
|
-
else
|
234
|
-
return :L, :L, 1
|
235
|
-
end
|
236
|
-
when 'M'
|
237
|
-
if ('UMB' == str[pos - 1, 3] &&
|
238
|
-
((last - 1) == pos || 'ER' == str[pos + 2, 2])) || 'M' == str[pos + 1, 1]
|
239
|
-
return :M, :M, 2
|
240
|
-
else
|
241
|
-
return :M, :M, 1
|
242
|
-
end
|
243
|
-
when 'N'
|
244
|
-
return :N, :N, ('N' == str[pos + 1, 1] ? 2 : 1)
|
245
|
-
when 'Ñ'
|
246
|
-
return :N, :N, 1
|
247
|
-
when 'P'
|
248
|
-
if 'H' == str[pos + 1, 1]
|
249
|
-
return :F, :F, 2
|
250
|
-
else
|
251
|
-
return :P, :P, (/^P|B$/ =~ str[pos + 1, 1] ? 2 : 1)
|
252
|
-
end
|
253
|
-
when 'Q'
|
254
|
-
return :K, :K, ('Q' == str[pos + 1, 1] ? 2 : 1)
|
255
|
-
when 'R'
|
256
|
-
current = ('R' == str[pos + 1, 1] ? 2 : 1)
|
257
|
-
|
258
|
-
if last == pos && !slavo_germanic?(str) && 'IE' == str[pos - 2, 2] && /^M(E|A)$/ !~ str[pos - 4, 2]
|
259
|
-
return nil, :R, current
|
260
|
-
else
|
261
|
-
return :R, :R, current
|
262
|
-
end
|
263
|
-
when 'S'
|
264
|
-
if /^(I|Y)SL$/ =~ str[pos - 1, 3]
|
265
|
-
return nil, nil, 1
|
266
|
-
elsif 0 == pos && 'SUGAR' == str[pos, 5]
|
267
|
-
return :X, :S, 1
|
268
|
-
elsif 'SH' == str[pos, 2]
|
269
|
-
if /^H(EIM|OEK|OLM|OLZ)$/ =~ str[pos + 1, 4]
|
270
|
-
return :S, :S, 2
|
271
|
-
else
|
272
|
-
return :X, :X, 2
|
273
|
-
end
|
274
|
-
elsif /^SI(O|A)$/ =~ str[pos, 3] || 'SIAN' == str[pos, 4]
|
275
|
-
return :S, (slavo_germanic?(str) ? :S : :X), 3
|
276
|
-
elsif (0 == pos && /^M|N|L|W$/ =~ str[pos + 1, 1]) || 'Z' == str[pos + 1, 1]
|
277
|
-
return :S, :X, ('Z' == str[pos + 1, 1] ? 2 : 1)
|
278
|
-
elsif 'SC' == str[pos, 2]
|
279
|
-
if 'H' == str[pos + 2, 1]
|
280
|
-
if /^OO|ER|EN|UY|ED|EM$/ =~ str[pos + 3, 2]
|
281
|
-
return (/^E(R|N)$/ =~ str[pos + 3, 2] ? :X : :SK), :SK, 3
|
282
|
-
else
|
283
|
-
return :X, ((0 == pos && !vowel?(str[3, 1]) && ('W' != str[pos + 3, 1])) ? :S : :X), 3
|
284
|
-
end
|
285
|
-
elsif /^I|E|Y$/ =~ str[pos + 2, 1]
|
286
|
-
return :S, :S, 3
|
287
|
-
else
|
288
|
-
return :SK, :SK, 3
|
289
|
-
end
|
290
|
-
else
|
291
|
-
return (last == pos && /^(A|O)I$/ =~ str[pos - 2, 2] ? nil : 'S'), 'S', (/^S|Z$/ =~ str[pos + 1, 1] ? 2 : 1)
|
292
|
-
end
|
293
|
-
when 'T'
|
294
|
-
if 'TION' == str[pos, 4]
|
295
|
-
return :X, :X, 3
|
296
|
-
elsif /^T(IA|CH)$/ =~ str[pos, 3]
|
297
|
-
return :X, :X, 3
|
298
|
-
elsif 'TH' == str[pos, 2] || 'TTH' == str[pos, 3]
|
299
|
-
if /^(O|A)M$/ =~ str[pos + 2, 2] || /^V(A|O)N $/ =~ str[0, 4] || 'SCH' == str[0, 3]
|
300
|
-
return :T, :T, 2
|
301
|
-
else
|
302
|
-
return 0, :T, 2
|
303
|
-
end
|
304
|
-
else
|
305
|
-
return :T, :T, (/^T|D$/ =~ str[pos + 1, 1] ? 2 : 1)
|
306
|
-
end
|
307
|
-
when 'V'
|
308
|
-
return :F, :F, ('V' == str[pos + 1, 1] ? 2 : 1)
|
309
|
-
when 'W'
|
310
|
-
if 'WR' == str[pos, 2]
|
311
|
-
return :R, :R, 2
|
312
|
-
end
|
313
|
-
pri, sec = nil, nil
|
314
|
-
|
315
|
-
if 0 == pos && (vowel?(str[pos + 1, 1]) || 'WH' == str[pos, 2])
|
316
|
-
pri = :A
|
317
|
-
sec = vowel?(str[pos + 1, 1]) ? :F : :A
|
318
|
-
end
|
319
|
-
|
320
|
-
if (last == pos && vowel?(str[pos - 1, 1])) || 'SCH' == str[0, 3] ||
|
321
|
-
/^EWSKI|EWSKY|OWSKI|OWSKY$/ =~ str[pos - 1, 5]
|
322
|
-
return pri, "#{sec}F".intern, 1
|
323
|
-
elsif /^WI(C|T)Z$/ =~ str[pos, 4]
|
324
|
-
return "#{pri}TS".intern, "#{sec}FX".intern, 4
|
325
|
-
else
|
326
|
-
return pri, sec, 1
|
327
|
-
end
|
328
|
-
when 'X'
|
329
|
-
current = (/^C|X$/ =~ str[pos + 1, 1] ? 2 : 1)
|
330
|
-
|
331
|
-
if !(last == pos && (/^(I|E)AU$/ =~ str[pos - 3, 3] || /^(A|O)U$/ =~ str[pos - 2, 2]))
|
332
|
-
return :KS, :KS, current
|
333
|
-
else
|
334
|
-
return nil, nil, current
|
335
|
-
end
|
336
|
-
when 'Z'
|
337
|
-
if 'H' == str[pos + 1, 1]
|
338
|
-
return :J, :J, 2
|
339
|
-
else
|
340
|
-
current = ('Z' == str[pos + 1, 1] ? 2 : 1)
|
341
|
-
|
342
|
-
if /^Z(O|I|A)$/ =~ str[pos + 1, 2] || (slavo_germanic?(str) && (pos > 0 && 'T' != str[pos - 1, 1]))
|
343
|
-
return :S, :TS, current
|
344
|
-
else
|
345
|
-
return :S, :S, current
|
346
|
-
end
|
347
|
-
end
|
348
|
-
else
|
349
|
-
return nil, nil, 1
|
350
|
-
end
|
351
|
-
end # def double_metaphone_lookup
|
352
|
-
|
353
|
-
extend self
|
354
|
-
|
355
|
-
end # module Metaphone
|
356
|
-
end # module Text
|