phonetic 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -1
- data/README.md +7 -0
- data/lib/phonetic.rb +1 -0
- data/lib/phonetic/core_ext/string/dm_soundex.rb +12 -0
- data/lib/phonetic/dm_soundex.rb +82 -0
- data/lib/phonetic/dm_soundex_map.rb +233 -0
- data/lib/phonetic/double_metaphone.rb +519 -569
- data/lib/phonetic/metaphone.rb +43 -69
- data/lib/phonetic/version.rb +1 -1
- data/spec/phonetic/core_ext/string/dm_soundex_spec.rb +9 -0
- data/spec/phonetic/dm_soundex_spec.rb +13 -0
- data/spec/support/dm_soundex_data.rb +259 -0
- data/spec/support/double_metaphone_data.rb +30 -0
- metadata +11 -2
data/lib/phonetic/metaphone.rb
CHANGED
@@ -20,51 +20,43 @@ module Phonetic
|
|
20
20
|
w = word.upcase.gsub(/[^A-Z]/, '')
|
21
21
|
return if w.empty?
|
22
22
|
two = w[0, 2]
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
w[0] = '' if two =~ /PN|AE|KN|GN|WR/
|
24
|
+
w[0] = 'S' if w[0] == 'X'
|
25
|
+
w[1] = '' if two == 'WH'
|
26
26
|
l = w.size
|
27
27
|
metaph = ''
|
28
28
|
for n in 0..(l - 1)
|
29
29
|
break unless metaph.size < code_size
|
30
30
|
symb = w[n]
|
31
|
-
if
|
31
|
+
if symb == 'C' || n == 0 || w[n - 1] != symb
|
32
32
|
case
|
33
33
|
when vowel?(symb) && n == 0
|
34
34
|
metaph = symb
|
35
35
|
when symb == 'B'
|
36
|
-
|
37
|
-
metaph = metaph + symb
|
38
|
-
end
|
36
|
+
metaph += symb if n != l - 1 || w[n - 1] != 'M'
|
39
37
|
when symb == 'C'
|
40
|
-
if
|
38
|
+
if n == 0 || w[n - 1] != 'S' || !front_vowel?(w[n + 1])
|
41
39
|
if w[n + 1, 2] == 'IA'
|
42
|
-
metaph
|
43
|
-
|
44
|
-
|
45
|
-
|
40
|
+
metaph += 'X'
|
41
|
+
elsif front_vowel?(w[n + 1])
|
42
|
+
metaph += 'S'
|
43
|
+
elsif n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
|
44
|
+
metaph += 'K'
|
45
|
+
elsif w[n + 1] == 'H'
|
46
|
+
if n == 0 && !vowel?(w[n + 2])
|
47
|
+
metaph += 'K'
|
46
48
|
else
|
47
|
-
|
48
|
-
metaph = metaph + 'K'
|
49
|
-
else
|
50
|
-
if w[n + 1] == 'H'
|
51
|
-
if n == 0 && !vowel?(w[n + 2])
|
52
|
-
metaph = metaph + 'K'
|
53
|
-
else
|
54
|
-
metaph = metaph + 'X'
|
55
|
-
end
|
56
|
-
else
|
57
|
-
metaph = metaph + 'K'
|
58
|
-
end
|
59
|
-
end
|
49
|
+
metaph += 'X'
|
60
50
|
end
|
51
|
+
else
|
52
|
+
metaph += 'K'
|
61
53
|
end
|
62
54
|
end
|
63
55
|
when symb == 'D'
|
64
56
|
if w[n + 1] == 'G' && front_vowel?(w[n + 2])
|
65
|
-
metaph
|
57
|
+
metaph += 'J'
|
66
58
|
else
|
67
|
-
metaph
|
59
|
+
metaph += 'T'
|
68
60
|
end
|
69
61
|
when symb == 'G'
|
70
62
|
silent = (w[n + 1] == 'H' && !vowel?(w[n + 2]))
|
@@ -77,69 +69,51 @@ module Phonetic
|
|
77
69
|
hard = (n > 0 && w[n - 1] == 'G')
|
78
70
|
unless silent
|
79
71
|
if front_vowel?(w[n + 1]) && !hard
|
80
|
-
metaph
|
72
|
+
metaph += 'J'
|
81
73
|
else
|
82
|
-
metaph
|
74
|
+
metaph += 'K'
|
83
75
|
end
|
84
76
|
end
|
85
77
|
when symb == 'H'
|
86
78
|
if !(n == l - 1 || (n > 0 && VARSON[w[n - 1]]))
|
87
|
-
if vowel?(w[n + 1])
|
88
|
-
metaph = metaph + 'H'
|
89
|
-
end
|
79
|
+
metaph += 'H' if vowel?(w[n + 1])
|
90
80
|
end
|
91
|
-
when
|
92
|
-
metaph
|
81
|
+
when symb =~ /[FJLMNR]/
|
82
|
+
metaph += symb
|
93
83
|
when symb == 'K'
|
94
84
|
if n > 0 && w[n - 1] != 'C'
|
95
|
-
metaph
|
96
|
-
|
97
|
-
|
98
|
-
metaph = 'K'
|
99
|
-
end
|
85
|
+
metaph += 'K'
|
86
|
+
elsif n == 0
|
87
|
+
metaph = 'K'
|
100
88
|
end
|
101
89
|
when symb == 'P'
|
102
|
-
|
103
|
-
metaph = metaph + 'F'
|
104
|
-
else
|
105
|
-
metaph = metaph + 'P'
|
106
|
-
end
|
90
|
+
metaph += w[n + 1] == 'H' ? 'F' : 'P'
|
107
91
|
when symb == 'Q'
|
108
|
-
metaph
|
92
|
+
metaph += 'K'
|
109
93
|
when symb == 'S'
|
110
|
-
if w[n + 1
|
94
|
+
if w[n + 1, 2] =~ /I[OA]/
|
95
|
+
metaph += 'X'
|
96
|
+
elsif w[n + 1] == 'H'
|
111
97
|
metaph += 'X'
|
112
98
|
else
|
113
|
-
|
114
|
-
metaph += 'X'
|
115
|
-
else
|
116
|
-
metaph += 'S'
|
117
|
-
end
|
99
|
+
metaph += 'S'
|
118
100
|
end
|
119
101
|
when symb == 'T'
|
120
|
-
if w[n + 1
|
121
|
-
metaph
|
102
|
+
if w[n + 1, 2] =~ /I[OA]/
|
103
|
+
metaph += 'X'
|
104
|
+
elsif w[n + 1] == 'H'
|
105
|
+
metaph += '0' if n == 0 || w[n - 1] != 'T'
|
122
106
|
else
|
123
|
-
if w[n + 1]
|
124
|
-
if !(n > 0 && w[n - 1] == 'T')
|
125
|
-
metaph = metaph + '0'
|
126
|
-
end
|
127
|
-
else
|
128
|
-
if !(w[n + 1] == 'C' && w[n + 2] == 'H')
|
129
|
-
metaph = metaph + 'T'
|
130
|
-
end
|
131
|
-
end
|
107
|
+
metaph += 'T' if w[n + 1, 2] != 'CH'
|
132
108
|
end
|
133
109
|
when symb == 'V'
|
134
|
-
metaph
|
135
|
-
when symb
|
136
|
-
if vowel?(w[n + 1])
|
137
|
-
metaph = metaph + symb
|
138
|
-
end
|
110
|
+
metaph += 'F'
|
111
|
+
when symb =~ /[WY]/
|
112
|
+
metaph += symb if vowel?(w[n + 1])
|
139
113
|
when symb == 'X'
|
140
|
-
metaph
|
114
|
+
metaph += 'KS'
|
141
115
|
when symb == 'Z'
|
142
|
-
metaph
|
116
|
+
metaph += 'S'
|
143
117
|
end
|
144
118
|
end
|
145
119
|
end
|
data/lib/phonetic/version.rb
CHANGED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'support/dm_soundex_data'
|
3
|
+
|
4
|
+
describe Phonetic::DMSoundex do
|
5
|
+
describe '.encode' do
|
6
|
+
it 'should calculate Daitch-Mokotoff Soundex values of string' do
|
7
|
+
Phonetic::DM_SOUNDEX_TEST_TABLE.each do |w, r|
|
8
|
+
res = Phonetic::DMSoundex.encode(w)
|
9
|
+
res.should eq(r), "expected: #{r}\ngot: #{res}\nword: #{w}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Phonetic
|
4
|
+
DM_SOUNDEX_TEST_TABLE = {
|
5
|
+
'Abucay' => ['075000', '074000'],
|
6
|
+
'Ajuna' => ['060000'],
|
7
|
+
'Akeem' => ['056000'],
|
8
|
+
'Alfa' => ['087000'],
|
9
|
+
'Alpert' => ['087930'],
|
10
|
+
'Alysa' => ['084000'],
|
11
|
+
'Amani' => ['066000'],
|
12
|
+
'Angélica' => ['065850', '065840'],
|
13
|
+
'Aniyah' => ['060000'],
|
14
|
+
'Anja' => ['060000', '064000'],
|
15
|
+
'Annemarie' => ['066900'],
|
16
|
+
'Applaud' => ['078300'],
|
17
|
+
'Aputsiaq' => ['074500'],
|
18
|
+
'Arathana' => ['093600'],
|
19
|
+
'Asdic' => ['043500', '043400'],
|
20
|
+
'Ashtrays' => ['043940'],
|
21
|
+
'Athie' => ['030000'],
|
22
|
+
'Australia' => ['043980'],
|
23
|
+
'Badou' => ['730000'],
|
24
|
+
'Ballina' => ['786000'],
|
25
|
+
'Beteng' => ['736500'],
|
26
|
+
'Bethany' => ['736000'],
|
27
|
+
'Bineta' => ['763000'],
|
28
|
+
'Boto' => ['730000'],
|
29
|
+
'Brandon' => ['796360'],
|
30
|
+
'Breuer' => ['791900'],
|
31
|
+
'Brushcut' => ['794530', '794300'],
|
32
|
+
'Caja' => ['510000', '410000'],
|
33
|
+
'Caleb' => ['587000', '487000'],
|
34
|
+
'Carina' => ['596000', '496000'],
|
35
|
+
'Carol' => ['598000', '498000'],
|
36
|
+
'Cassidy' => ['543000', '443000'],
|
37
|
+
'Cayetano' => ['513600', '413600'],
|
38
|
+
'Charlotte' => ['598300', '498300'],
|
39
|
+
'Cheboh' => ['570000', '470000'],
|
40
|
+
'Chloe' => ['580000', '480000'],
|
41
|
+
'Christoffersen' => ['594379', '494379', '594374', '494374'],
|
42
|
+
'Cleo' => ['580000', '480000'],
|
43
|
+
'Colby' => ['587000', '487000'],
|
44
|
+
'Conner' => ['569000', '469000'],
|
45
|
+
'Considine' => ['564360', '464360'],
|
46
|
+
'Cormac' => ['596500', '496500', '596400', '496400'],
|
47
|
+
'Cornell' => ['596800', '496800'],
|
48
|
+
'Corpuz' => ['597400', '497400'],
|
49
|
+
'Courtney' => ['593600', '493600'],
|
50
|
+
'Cszinchjou' => ['465000', '464000', '465400'],
|
51
|
+
'Cudkowicz' => ['535740', '435740'],
|
52
|
+
'Czstochowa' => ['435700', '434700'],
|
53
|
+
'Daina' => ['360000'],
|
54
|
+
'Daisha' => ['340000'],
|
55
|
+
'Damian' => ['366000'],
|
56
|
+
'Dan' => ['360000'],
|
57
|
+
'Daren' => ['396000'],
|
58
|
+
'Davidsen' => ['374600'],
|
59
|
+
'Deja' => ['310000'],
|
60
|
+
'Deltoid' => ['383300'],
|
61
|
+
'Denisse' => ['364000'],
|
62
|
+
'Derek' => ['395000'],
|
63
|
+
'Diakhou' => ['350000'],
|
64
|
+
'Dionysia' => ['364000'],
|
65
|
+
'Dixie' => ['354000'],
|
66
|
+
'Domenik' => ['366500'],
|
67
|
+
'Donny' => ['360000'],
|
68
|
+
'Dorian' => ['396000'],
|
69
|
+
'Draft' => ['397300'],
|
70
|
+
'Drsný' => ['460000'],
|
71
|
+
'Drzewica' => ['475000', '474000'],
|
72
|
+
'Dzhezkazgan' => ['445456'],
|
73
|
+
'Dzsenifer' => ['467900'],
|
74
|
+
'Ehemba' => ['056700'],
|
75
|
+
'Elarbi' => ['089700'],
|
76
|
+
'Emmet' => ['063000'],
|
77
|
+
'Estelle' => ['043800'],
|
78
|
+
'Fardin' => ['793600'],
|
79
|
+
'Floy' => ['780000'],
|
80
|
+
'Fodié' => ['730000'],
|
81
|
+
'Freeda' => ['793000'],
|
82
|
+
'Freud' => ['793000'],
|
83
|
+
'Fuji' => ['710000'],
|
84
|
+
'Gaoussou' => ['540000'],
|
85
|
+
'Garcia' => ['595000', '594000'],
|
86
|
+
'George' => ['595000'],
|
87
|
+
'Georgette' => ['595300'],
|
88
|
+
'Ginger' => ['565900'],
|
89
|
+
'Golden' => ['583600'],
|
90
|
+
'Gordillo' => ['593800'],
|
91
|
+
'Greg' => ['595000'],
|
92
|
+
'Guevarra' => ['579000'],
|
93
|
+
'Gutkowski' => ['535745'],
|
94
|
+
'Gwen' => ['576000'],
|
95
|
+
'Haber' => ['579000'],
|
96
|
+
'Hady' => ['530000'],
|
97
|
+
'Hagenes' => ['556400'],
|
98
|
+
'Hailee' => ['580000'],
|
99
|
+
'Halfback' => ['587500', '587450'],
|
100
|
+
'Handshake' => ['564500'],
|
101
|
+
'Hardtack' => ['593500', '593450'],
|
102
|
+
'Hazael' => ['548000'],
|
103
|
+
'Heitschmidt' => ['546300'],
|
104
|
+
'Hymn' => ['566000'],
|
105
|
+
'Iliana' => ['086000'],
|
106
|
+
'Ingegerd' => ['065593'],
|
107
|
+
'Irini' => ['096000'],
|
108
|
+
'Jaclyn' => ['158600', '458600', '148600', '448600'],
|
109
|
+
'Jackson-Jackson' => ['154654','454654', '145465', '445465', '154645',
|
110
|
+
'454645', '145464', '445464', '154644', '454644'],
|
111
|
+
'James' => ['164000', '464000'],
|
112
|
+
'Jamina' => ['166000', '466000'],
|
113
|
+
'Jamir' => ['169000', '469000'],
|
114
|
+
'Jannie' => ['160000', '460000'],
|
115
|
+
'Jerel' => ['198000', '498000'],
|
116
|
+
'Jerzy' => ['140000', '440000', '194000', '494000'],
|
117
|
+
'Jesse' => ['140000', '440000'],
|
118
|
+
'Joanie' => ['160000', '460000'],
|
119
|
+
'Joseph' => ['147000', '447000'],
|
120
|
+
'Josianne' => ['146000', '446000'],
|
121
|
+
'Joya' => ['100000', '410000'],
|
122
|
+
'Juri' => ['190000', '490000'],
|
123
|
+
'Justyn' => ['143600', '443600'],
|
124
|
+
'Kandeh' => ['563000'],
|
125
|
+
'Kedzie' => ['540000'],
|
126
|
+
'Keshawn' => ['547600'],
|
127
|
+
'Khrushchev' => ['594700'],
|
128
|
+
'Kirlin' => ['598600'],
|
129
|
+
'Kirsten' => ['594360', '543600'],
|
130
|
+
'Kjær' => ['590000', '549000'],
|
131
|
+
'Kleinman' => ['586660'],
|
132
|
+
'Korbin' => ['597600'],
|
133
|
+
'Krista' => ['594300'],
|
134
|
+
'Larkin' => ['895600'],
|
135
|
+
'Laurence' => ['896500', '896400'],
|
136
|
+
'Laverna' => ['879600'],
|
137
|
+
'Lavonne' => ['876000'],
|
138
|
+
'Leia' => ['810000'],
|
139
|
+
'Lia' => ['800000'],
|
140
|
+
'Lilia' => ['880000'],
|
141
|
+
'Loren' => ['896000'],
|
142
|
+
'Louise' => ['840000'],
|
143
|
+
'Lovisa' => ['874000'],
|
144
|
+
'Luella' => ['880000'],
|
145
|
+
'Luise' => ['840000'],
|
146
|
+
'Mable' => ['678000'],
|
147
|
+
'Madonna' => ['636000'],
|
148
|
+
'Majabrith' => ['617930'],
|
149
|
+
'Majad' => ['613000'],
|
150
|
+
'Malomar' => ['686900'],
|
151
|
+
'Mamandew' => ['666370'],
|
152
|
+
'Mame' => ['660000'],
|
153
|
+
'Manheim' => ['665600'],
|
154
|
+
'Manlafy' => ['668700'],
|
155
|
+
'Margareta' => ['695930'],
|
156
|
+
'Marisol' => ['694800'],
|
157
|
+
'Marjolaine' => ['698600', '694860'],
|
158
|
+
'Mary' => ['690000'],
|
159
|
+
'Mathew' => ['637000'],
|
160
|
+
'Mbamoussa' => ['676400'],
|
161
|
+
'Meike' => ['650000'],
|
162
|
+
'Mintz' => ['664000'],
|
163
|
+
'Mirac' => ['695000', '694000'],
|
164
|
+
'Monserrate' => ['664930'],
|
165
|
+
'Moritz' => ['694000'],
|
166
|
+
'Musa' => ['640000'],
|
167
|
+
'Musse' => ['640000'],
|
168
|
+
'Myra' => ['690000'],
|
169
|
+
'Myrtie' => ['693000'],
|
170
|
+
'Nadhim' => ['635600'],
|
171
|
+
'Napel' => ['678000'],
|
172
|
+
'Nash' => ['640000'],
|
173
|
+
'Ndour' => ['639000'],
|
174
|
+
'Nelda' => ['683000'],
|
175
|
+
'Nelli' => ['680000'],
|
176
|
+
'Neoma' => ['660000'],
|
177
|
+
'Niels' => ['684000'],
|
178
|
+
'Novella' => ['678000'],
|
179
|
+
'Nájera' => ['690000', '649000'],
|
180
|
+
'Obaar' => ['079000'],
|
181
|
+
'Oleta' => ['083000'],
|
182
|
+
'Osio' => ['040000'],
|
183
|
+
'Othilie' => ['038000'],
|
184
|
+
'Pabodhi' => ['773500'],
|
185
|
+
'Pagsisihang' => ['754456'],
|
186
|
+
'Pavith' => ['773000'],
|
187
|
+
'Pete' => ['730000'],
|
188
|
+
'Portugal' => ['793580'],
|
189
|
+
'Postcard' => ['743593', '743493'],
|
190
|
+
'Postscript' => ['743497'],
|
191
|
+
'Predovic' => ['793750', '793740'],
|
192
|
+
'Price' => ['795000', '794000'],
|
193
|
+
'Project' => ['791530', '791430'],
|
194
|
+
'Quyne' => ['560000'],
|
195
|
+
'Rachel' => ['958000', '948000'],
|
196
|
+
'Radius' => ['934000'],
|
197
|
+
'Reilly' => ['980000'],
|
198
|
+
'Sabina' => ['476000'],
|
199
|
+
'Sacoura' => ['459000', '449000'],
|
200
|
+
'Safi' => ['470000'],
|
201
|
+
'Saiarr' => ['419000'],
|
202
|
+
'Salgado' => ['485300'],
|
203
|
+
'Samara' => ['469000'],
|
204
|
+
'Samsidine' => ['464360'],
|
205
|
+
'Sanford' => ['467930'],
|
206
|
+
'Sarah' => ['490000'],
|
207
|
+
'Sasha' => ['440000'],
|
208
|
+
'Satterfield' => ['439783'],
|
209
|
+
'Shchaveleva' => ['278700'],
|
210
|
+
'School' => ['480000'],
|
211
|
+
'Schuster' => ['443900'],
|
212
|
+
'Schtolteheim' => ['283560'],
|
213
|
+
'Schtschigry' => ['259000'],
|
214
|
+
'Schwarz' => ['474000', '479400'],
|
215
|
+
'Science' => ['265000', '264000'],
|
216
|
+
'Senger' => ['465900'],
|
217
|
+
'Servín' => ['497600'],
|
218
|
+
'Shad' => ['430000'],
|
219
|
+
'Shawna' => ['476000'],
|
220
|
+
'Shdanow' => ['267000'],
|
221
|
+
'Shtchirowskaya' => ['297451'],
|
222
|
+
'Shtorov' => ['297000'],
|
223
|
+
'Shtshuf' => ['270000'],
|
224
|
+
'Simeon' => ['466000'],
|
225
|
+
'Sipan' => ['476000'],
|
226
|
+
'Sizze' => ['440000'],
|
227
|
+
'Sundqvist' => ['463574'],
|
228
|
+
'Syjuco' => ['450000', '445000', '440000', '444000'],
|
229
|
+
'Sytengco' => ['436500', '436540'],
|
230
|
+
'Tanhehco' => ['365500', '365400'],
|
231
|
+
'Tapia' => ['370000'],
|
232
|
+
'Taya' => ['310000'],
|
233
|
+
'Touchstone' => ['354360'],
|
234
|
+
'Topf' => ['370000'],
|
235
|
+
'Torrealba' => ['398700'],
|
236
|
+
'Trinity' => ['396300'],
|
237
|
+
'Tucson' => ['346000'],
|
238
|
+
'Tupa' => ['370000'],
|
239
|
+
'Uribe' => ['097000'],
|
240
|
+
'Valentina' => ['786360'],
|
241
|
+
'Vera' => ['790000'],
|
242
|
+
'Verna' => ['796000'],
|
243
|
+
'Vickie' => ['750000', '745000'],
|
244
|
+
'Vilhelmine' => ['785866'],
|
245
|
+
'Von' => ['760000'],
|
246
|
+
'Víctor' => ['753900', '743900'],
|
247
|
+
'Webster' => ['774390'],
|
248
|
+
'Westcheste' => ['744300'],
|
249
|
+
'Whitney' => ['753600'],
|
250
|
+
'Wilberto' => ['787930'],
|
251
|
+
'Wilton' => ['783600'],
|
252
|
+
'Wrists' => ['794340'],
|
253
|
+
'Yakou' => ['150000'],
|
254
|
+
'Yaye' => ['100000'],
|
255
|
+
'Yin' => ['160000'],
|
256
|
+
'Yoyoy' => ['100000'],
|
257
|
+
'Zena' => ['460000']
|
258
|
+
}
|
259
|
+
end
|