phonetic 1.0.1 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,51 +20,43 @@ module Phonetic
20
20
  w = word.upcase.gsub(/[^A-Z]/, '')
21
21
  return if w.empty?
22
22
  two = w[0, 2]
23
- if ['PN', 'AE', 'KN', 'GN', 'WR'].include?(two) then w[0] = '' end
24
- if w[0] == 'X' then w[0] = 'S' end
25
- if two == 'WH' then w[1] = '' end
23
+ w[0] = '' if two =~ /PN|AE|KN|GN|WR/
24
+ w[0] = 'S' if w[0] == 'X'
25
+ w[1] = '' if two == 'WH'
26
26
  l = w.size
27
27
  metaph = ''
28
28
  for n in 0..(l - 1)
29
29
  break unless metaph.size < code_size
30
30
  symb = w[n]
31
- if !(symb != 'C' && n > 0 && w[n - 1] == symb)
31
+ if symb == 'C' || n == 0 || w[n - 1] != symb
32
32
  case
33
33
  when vowel?(symb) && n == 0
34
34
  metaph = symb
35
35
  when symb == 'B'
36
- unless n == l - 1 && w[n - 1] == 'M'
37
- metaph = metaph + symb
38
- end
36
+ metaph += symb if n != l - 1 || w[n - 1] != 'M'
39
37
  when symb == 'C'
40
- if !(n > 0 && w[n - 1] == 'S' && front_vowel?(w[n + 1]))
38
+ if n == 0 || w[n - 1] != 'S' || !front_vowel?(w[n + 1])
41
39
  if w[n + 1, 2] == 'IA'
42
- metaph = metaph + 'X'
43
- else
44
- if front_vowel?(w[n + 1])
45
- metaph = metaph + 'S'
40
+ metaph += 'X'
41
+ elsif front_vowel?(w[n + 1])
42
+ metaph += 'S'
43
+ elsif n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
44
+ metaph += 'K'
45
+ elsif w[n + 1] == 'H'
46
+ if n == 0 && !vowel?(w[n + 2])
47
+ metaph += 'K'
46
48
  else
47
- if n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
48
- metaph = metaph + 'K'
49
- else
50
- if w[n + 1] == 'H'
51
- if n == 0 && !vowel?(w[n + 2])
52
- metaph = metaph + 'K'
53
- else
54
- metaph = metaph + 'X'
55
- end
56
- else
57
- metaph = metaph + 'K'
58
- end
59
- end
49
+ metaph += 'X'
60
50
  end
51
+ else
52
+ metaph += 'K'
61
53
  end
62
54
  end
63
55
  when symb == 'D'
64
56
  if w[n + 1] == 'G' && front_vowel?(w[n + 2])
65
- metaph = metaph + 'J'
57
+ metaph += 'J'
66
58
  else
67
- metaph = metaph + 'T'
59
+ metaph += 'T'
68
60
  end
69
61
  when symb == 'G'
70
62
  silent = (w[n + 1] == 'H' && !vowel?(w[n + 2]))
@@ -77,69 +69,51 @@ module Phonetic
77
69
  hard = (n > 0 && w[n - 1] == 'G')
78
70
  unless silent
79
71
  if front_vowel?(w[n + 1]) && !hard
80
- metaph = metaph + 'J'
72
+ metaph += 'J'
81
73
  else
82
- metaph = metaph + 'K'
74
+ metaph += 'K'
83
75
  end
84
76
  end
85
77
  when symb == 'H'
86
78
  if !(n == l - 1 || (n > 0 && VARSON[w[n - 1]]))
87
- if vowel?(w[n + 1])
88
- metaph = metaph + 'H'
89
- end
79
+ metaph += 'H' if vowel?(w[n + 1])
90
80
  end
91
- when 'FJLMNR'[symb]
92
- metaph = metaph + symb
81
+ when symb =~ /[FJLMNR]/
82
+ metaph += symb
93
83
  when symb == 'K'
94
84
  if n > 0 && w[n - 1] != 'C'
95
- metaph = metaph + 'K'
96
- else
97
- if n == 0
98
- metaph = 'K'
99
- end
85
+ metaph += 'K'
86
+ elsif n == 0
87
+ metaph = 'K'
100
88
  end
101
89
  when symb == 'P'
102
- if w[n + 1] == 'H'
103
- metaph = metaph + 'F'
104
- else
105
- metaph = metaph + 'P'
106
- end
90
+ metaph += w[n + 1] == 'H' ? 'F' : 'P'
107
91
  when symb == 'Q'
108
- metaph = metaph + 'K'
92
+ metaph += 'K'
109
93
  when symb == 'S'
110
- if w[n + 1] == 'I' && (w[n + 2] == 'O' || w[n + 2] == 'A')
94
+ if w[n + 1, 2] =~ /I[OA]/
95
+ metaph += 'X'
96
+ elsif w[n + 1] == 'H'
111
97
  metaph += 'X'
112
98
  else
113
- if w[n + 1] == 'H'
114
- metaph += 'X'
115
- else
116
- metaph += 'S'
117
- end
99
+ metaph += 'S'
118
100
  end
119
101
  when symb == 'T'
120
- if w[n + 1] == 'I' && (w[n + 2] == 'O' || w[n + 2] == 'A')
121
- metaph = metaph + 'X'
102
+ if w[n + 1, 2] =~ /I[OA]/
103
+ metaph += 'X'
104
+ elsif w[n + 1] == 'H'
105
+ metaph += '0' if n == 0 || w[n - 1] != 'T'
122
106
  else
123
- if w[n + 1] == 'H'
124
- if !(n > 0 && w[n - 1] == 'T')
125
- metaph = metaph + '0'
126
- end
127
- else
128
- if !(w[n + 1] == 'C' && w[n + 2] == 'H')
129
- metaph = metaph + 'T'
130
- end
131
- end
107
+ metaph += 'T' if w[n + 1, 2] != 'CH'
132
108
  end
133
109
  when symb == 'V'
134
- metaph = metaph + 'F'
135
- when symb == 'W' || symb == 'Y'
136
- if vowel?(w[n + 1])
137
- metaph = metaph + symb
138
- end
110
+ metaph += 'F'
111
+ when symb =~ /[WY]/
112
+ metaph += symb if vowel?(w[n + 1])
139
113
  when symb == 'X'
140
- metaph = metaph + 'KS'
114
+ metaph += 'KS'
141
115
  when symb == 'Z'
142
- metaph = metaph + 'S'
116
+ metaph += 'S'
143
117
  end
144
118
  end
145
119
  end
@@ -1,3 +1,3 @@
1
1
  module Phonetic
2
- VERSION = '1.0.1'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -0,0 +1,9 @@
1
+ require 'phonetic'
2
+
3
+ describe String do
4
+ describe '#dm_soundex' do
5
+ it 'should return D-M Soundex code of string' do
6
+ 'Syjuco'.dm_soundex.should == ['450000', '445000', '440000', '444000']
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'support/dm_soundex_data'
3
+
4
+ describe Phonetic::DMSoundex do
5
+ describe '.encode' do
6
+ it 'should calculate Daitch-Mokotoff Soundex values of string' do
7
+ Phonetic::DM_SOUNDEX_TEST_TABLE.each do |w, r|
8
+ res = Phonetic::DMSoundex.encode(w)
9
+ res.should eq(r), "expected: #{r}\ngot: #{res}\nword: #{w}"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,259 @@
1
+ # encoding: utf-8
2
+
3
+ module Phonetic
4
+ DM_SOUNDEX_TEST_TABLE = {
5
+ 'Abucay' => ['075000', '074000'],
6
+ 'Ajuna' => ['060000'],
7
+ 'Akeem' => ['056000'],
8
+ 'Alfa' => ['087000'],
9
+ 'Alpert' => ['087930'],
10
+ 'Alysa' => ['084000'],
11
+ 'Amani' => ['066000'],
12
+ 'Angélica' => ['065850', '065840'],
13
+ 'Aniyah' => ['060000'],
14
+ 'Anja' => ['060000', '064000'],
15
+ 'Annemarie' => ['066900'],
16
+ 'Applaud' => ['078300'],
17
+ 'Aputsiaq' => ['074500'],
18
+ 'Arathana' => ['093600'],
19
+ 'Asdic' => ['043500', '043400'],
20
+ 'Ashtrays' => ['043940'],
21
+ 'Athie' => ['030000'],
22
+ 'Australia' => ['043980'],
23
+ 'Badou' => ['730000'],
24
+ 'Ballina' => ['786000'],
25
+ 'Beteng' => ['736500'],
26
+ 'Bethany' => ['736000'],
27
+ 'Bineta' => ['763000'],
28
+ 'Boto' => ['730000'],
29
+ 'Brandon' => ['796360'],
30
+ 'Breuer' => ['791900'],
31
+ 'Brushcut' => ['794530', '794300'],
32
+ 'Caja' => ['510000', '410000'],
33
+ 'Caleb' => ['587000', '487000'],
34
+ 'Carina' => ['596000', '496000'],
35
+ 'Carol' => ['598000', '498000'],
36
+ 'Cassidy' => ['543000', '443000'],
37
+ 'Cayetano' => ['513600', '413600'],
38
+ 'Charlotte' => ['598300', '498300'],
39
+ 'Cheboh' => ['570000', '470000'],
40
+ 'Chloe' => ['580000', '480000'],
41
+ 'Christoffersen' => ['594379', '494379', '594374', '494374'],
42
+ 'Cleo' => ['580000', '480000'],
43
+ 'Colby' => ['587000', '487000'],
44
+ 'Conner' => ['569000', '469000'],
45
+ 'Considine' => ['564360', '464360'],
46
+ 'Cormac' => ['596500', '496500', '596400', '496400'],
47
+ 'Cornell' => ['596800', '496800'],
48
+ 'Corpuz' => ['597400', '497400'],
49
+ 'Courtney' => ['593600', '493600'],
50
+ 'Cszinchjou' => ['465000', '464000', '465400'],
51
+ 'Cudkowicz' => ['535740', '435740'],
52
+ 'Czstochowa' => ['435700', '434700'],
53
+ 'Daina' => ['360000'],
54
+ 'Daisha' => ['340000'],
55
+ 'Damian' => ['366000'],
56
+ 'Dan' => ['360000'],
57
+ 'Daren' => ['396000'],
58
+ 'Davidsen' => ['374600'],
59
+ 'Deja' => ['310000'],
60
+ 'Deltoid' => ['383300'],
61
+ 'Denisse' => ['364000'],
62
+ 'Derek' => ['395000'],
63
+ 'Diakhou' => ['350000'],
64
+ 'Dionysia' => ['364000'],
65
+ 'Dixie' => ['354000'],
66
+ 'Domenik' => ['366500'],
67
+ 'Donny' => ['360000'],
68
+ 'Dorian' => ['396000'],
69
+ 'Draft' => ['397300'],
70
+ 'Drsný' => ['460000'],
71
+ 'Drzewica' => ['475000', '474000'],
72
+ 'Dzhezkazgan' => ['445456'],
73
+ 'Dzsenifer' => ['467900'],
74
+ 'Ehemba' => ['056700'],
75
+ 'Elarbi' => ['089700'],
76
+ 'Emmet' => ['063000'],
77
+ 'Estelle' => ['043800'],
78
+ 'Fardin' => ['793600'],
79
+ 'Floy' => ['780000'],
80
+ 'Fodié' => ['730000'],
81
+ 'Freeda' => ['793000'],
82
+ 'Freud' => ['793000'],
83
+ 'Fuji' => ['710000'],
84
+ 'Gaoussou' => ['540000'],
85
+ 'Garcia' => ['595000', '594000'],
86
+ 'George' => ['595000'],
87
+ 'Georgette' => ['595300'],
88
+ 'Ginger' => ['565900'],
89
+ 'Golden' => ['583600'],
90
+ 'Gordillo' => ['593800'],
91
+ 'Greg' => ['595000'],
92
+ 'Guevarra' => ['579000'],
93
+ 'Gutkowski' => ['535745'],
94
+ 'Gwen' => ['576000'],
95
+ 'Haber' => ['579000'],
96
+ 'Hady' => ['530000'],
97
+ 'Hagenes' => ['556400'],
98
+ 'Hailee' => ['580000'],
99
+ 'Halfback' => ['587500', '587450'],
100
+ 'Handshake' => ['564500'],
101
+ 'Hardtack' => ['593500', '593450'],
102
+ 'Hazael' => ['548000'],
103
+ 'Heitschmidt' => ['546300'],
104
+ 'Hymn' => ['566000'],
105
+ 'Iliana' => ['086000'],
106
+ 'Ingegerd' => ['065593'],
107
+ 'Irini' => ['096000'],
108
+ 'Jaclyn' => ['158600', '458600', '148600', '448600'],
109
+ 'Jackson-Jackson' => ['154654','454654', '145465', '445465', '154645',
110
+ '454645', '145464', '445464', '154644', '454644'],
111
+ 'James' => ['164000', '464000'],
112
+ 'Jamina' => ['166000', '466000'],
113
+ 'Jamir' => ['169000', '469000'],
114
+ 'Jannie' => ['160000', '460000'],
115
+ 'Jerel' => ['198000', '498000'],
116
+ 'Jerzy' => ['140000', '440000', '194000', '494000'],
117
+ 'Jesse' => ['140000', '440000'],
118
+ 'Joanie' => ['160000', '460000'],
119
+ 'Joseph' => ['147000', '447000'],
120
+ 'Josianne' => ['146000', '446000'],
121
+ 'Joya' => ['100000', '410000'],
122
+ 'Juri' => ['190000', '490000'],
123
+ 'Justyn' => ['143600', '443600'],
124
+ 'Kandeh' => ['563000'],
125
+ 'Kedzie' => ['540000'],
126
+ 'Keshawn' => ['547600'],
127
+ 'Khrushchev' => ['594700'],
128
+ 'Kirlin' => ['598600'],
129
+ 'Kirsten' => ['594360', '543600'],
130
+ 'Kjær' => ['590000', '549000'],
131
+ 'Kleinman' => ['586660'],
132
+ 'Korbin' => ['597600'],
133
+ 'Krista' => ['594300'],
134
+ 'Larkin' => ['895600'],
135
+ 'Laurence' => ['896500', '896400'],
136
+ 'Laverna' => ['879600'],
137
+ 'Lavonne' => ['876000'],
138
+ 'Leia' => ['810000'],
139
+ 'Lia' => ['800000'],
140
+ 'Lilia' => ['880000'],
141
+ 'Loren' => ['896000'],
142
+ 'Louise' => ['840000'],
143
+ 'Lovisa' => ['874000'],
144
+ 'Luella' => ['880000'],
145
+ 'Luise' => ['840000'],
146
+ 'Mable' => ['678000'],
147
+ 'Madonna' => ['636000'],
148
+ 'Majabrith' => ['617930'],
149
+ 'Majad' => ['613000'],
150
+ 'Malomar' => ['686900'],
151
+ 'Mamandew' => ['666370'],
152
+ 'Mame' => ['660000'],
153
+ 'Manheim' => ['665600'],
154
+ 'Manlafy' => ['668700'],
155
+ 'Margareta' => ['695930'],
156
+ 'Marisol' => ['694800'],
157
+ 'Marjolaine' => ['698600', '694860'],
158
+ 'Mary' => ['690000'],
159
+ 'Mathew' => ['637000'],
160
+ 'Mbamoussa' => ['676400'],
161
+ 'Meike' => ['650000'],
162
+ 'Mintz' => ['664000'],
163
+ 'Mirac' => ['695000', '694000'],
164
+ 'Monserrate' => ['664930'],
165
+ 'Moritz' => ['694000'],
166
+ 'Musa' => ['640000'],
167
+ 'Musse' => ['640000'],
168
+ 'Myra' => ['690000'],
169
+ 'Myrtie' => ['693000'],
170
+ 'Nadhim' => ['635600'],
171
+ 'Napel' => ['678000'],
172
+ 'Nash' => ['640000'],
173
+ 'Ndour' => ['639000'],
174
+ 'Nelda' => ['683000'],
175
+ 'Nelli' => ['680000'],
176
+ 'Neoma' => ['660000'],
177
+ 'Niels' => ['684000'],
178
+ 'Novella' => ['678000'],
179
+ 'Nájera' => ['690000', '649000'],
180
+ 'Obaar' => ['079000'],
181
+ 'Oleta' => ['083000'],
182
+ 'Osio' => ['040000'],
183
+ 'Othilie' => ['038000'],
184
+ 'Pabodhi' => ['773500'],
185
+ 'Pagsisihang' => ['754456'],
186
+ 'Pavith' => ['773000'],
187
+ 'Pete' => ['730000'],
188
+ 'Portugal' => ['793580'],
189
+ 'Postcard' => ['743593', '743493'],
190
+ 'Postscript' => ['743497'],
191
+ 'Predovic' => ['793750', '793740'],
192
+ 'Price' => ['795000', '794000'],
193
+ 'Project' => ['791530', '791430'],
194
+ 'Quyne' => ['560000'],
195
+ 'Rachel' => ['958000', '948000'],
196
+ 'Radius' => ['934000'],
197
+ 'Reilly' => ['980000'],
198
+ 'Sabina' => ['476000'],
199
+ 'Sacoura' => ['459000', '449000'],
200
+ 'Safi' => ['470000'],
201
+ 'Saiarr' => ['419000'],
202
+ 'Salgado' => ['485300'],
203
+ 'Samara' => ['469000'],
204
+ 'Samsidine' => ['464360'],
205
+ 'Sanford' => ['467930'],
206
+ 'Sarah' => ['490000'],
207
+ 'Sasha' => ['440000'],
208
+ 'Satterfield' => ['439783'],
209
+ 'Shchaveleva' => ['278700'],
210
+ 'School' => ['480000'],
211
+ 'Schuster' => ['443900'],
212
+ 'Schtolteheim' => ['283560'],
213
+ 'Schtschigry' => ['259000'],
214
+ 'Schwarz' => ['474000', '479400'],
215
+ 'Science' => ['265000', '264000'],
216
+ 'Senger' => ['465900'],
217
+ 'Servín' => ['497600'],
218
+ 'Shad' => ['430000'],
219
+ 'Shawna' => ['476000'],
220
+ 'Shdanow' => ['267000'],
221
+ 'Shtchirowskaya' => ['297451'],
222
+ 'Shtorov' => ['297000'],
223
+ 'Shtshuf' => ['270000'],
224
+ 'Simeon' => ['466000'],
225
+ 'Sipan' => ['476000'],
226
+ 'Sizze' => ['440000'],
227
+ 'Sundqvist' => ['463574'],
228
+ 'Syjuco' => ['450000', '445000', '440000', '444000'],
229
+ 'Sytengco' => ['436500', '436540'],
230
+ 'Tanhehco' => ['365500', '365400'],
231
+ 'Tapia' => ['370000'],
232
+ 'Taya' => ['310000'],
233
+ 'Touchstone' => ['354360'],
234
+ 'Topf' => ['370000'],
235
+ 'Torrealba' => ['398700'],
236
+ 'Trinity' => ['396300'],
237
+ 'Tucson' => ['346000'],
238
+ 'Tupa' => ['370000'],
239
+ 'Uribe' => ['097000'],
240
+ 'Valentina' => ['786360'],
241
+ 'Vera' => ['790000'],
242
+ 'Verna' => ['796000'],
243
+ 'Vickie' => ['750000', '745000'],
244
+ 'Vilhelmine' => ['785866'],
245
+ 'Von' => ['760000'],
246
+ 'Víctor' => ['753900', '743900'],
247
+ 'Webster' => ['774390'],
248
+ 'Westcheste' => ['744300'],
249
+ 'Whitney' => ['753600'],
250
+ 'Wilberto' => ['787930'],
251
+ 'Wilton' => ['783600'],
252
+ 'Wrists' => ['794340'],
253
+ 'Yakou' => ['150000'],
254
+ 'Yaye' => ['100000'],
255
+ 'Yin' => ['160000'],
256
+ 'Yoyoy' => ['100000'],
257
+ 'Zena' => ['460000']
258
+ }
259
+ end