phonetic 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,51 +20,43 @@ module Phonetic
20
20
  w = word.upcase.gsub(/[^A-Z]/, '')
21
21
  return if w.empty?
22
22
  two = w[0, 2]
23
- if ['PN', 'AE', 'KN', 'GN', 'WR'].include?(two) then w[0] = '' end
24
- if w[0] == 'X' then w[0] = 'S' end
25
- if two == 'WH' then w[1] = '' end
23
+ w[0] = '' if two =~ /PN|AE|KN|GN|WR/
24
+ w[0] = 'S' if w[0] == 'X'
25
+ w[1] = '' if two == 'WH'
26
26
  l = w.size
27
27
  metaph = ''
28
28
  for n in 0..(l - 1)
29
29
  break unless metaph.size < code_size
30
30
  symb = w[n]
31
- if !(symb != 'C' && n > 0 && w[n - 1] == symb)
31
+ if symb == 'C' || n == 0 || w[n - 1] != symb
32
32
  case
33
33
  when vowel?(symb) && n == 0
34
34
  metaph = symb
35
35
  when symb == 'B'
36
- unless n == l - 1 && w[n - 1] == 'M'
37
- metaph = metaph + symb
38
- end
36
+ metaph += symb if n != l - 1 || w[n - 1] != 'M'
39
37
  when symb == 'C'
40
- if !(n > 0 && w[n - 1] == 'S' && front_vowel?(w[n + 1]))
38
+ if n == 0 || w[n - 1] != 'S' || !front_vowel?(w[n + 1])
41
39
  if w[n + 1, 2] == 'IA'
42
- metaph = metaph + 'X'
43
- else
44
- if front_vowel?(w[n + 1])
45
- metaph = metaph + 'S'
40
+ metaph += 'X'
41
+ elsif front_vowel?(w[n + 1])
42
+ metaph += 'S'
43
+ elsif n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
44
+ metaph += 'K'
45
+ elsif w[n + 1] == 'H'
46
+ if n == 0 && !vowel?(w[n + 2])
47
+ metaph += 'K'
46
48
  else
47
- if n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
48
- metaph = metaph + 'K'
49
- else
50
- if w[n + 1] == 'H'
51
- if n == 0 && !vowel?(w[n + 2])
52
- metaph = metaph + 'K'
53
- else
54
- metaph = metaph + 'X'
55
- end
56
- else
57
- metaph = metaph + 'K'
58
- end
59
- end
49
+ metaph += 'X'
60
50
  end
51
+ else
52
+ metaph += 'K'
61
53
  end
62
54
  end
63
55
  when symb == 'D'
64
56
  if w[n + 1] == 'G' && front_vowel?(w[n + 2])
65
- metaph = metaph + 'J'
57
+ metaph += 'J'
66
58
  else
67
- metaph = metaph + 'T'
59
+ metaph += 'T'
68
60
  end
69
61
  when symb == 'G'
70
62
  silent = (w[n + 1] == 'H' && !vowel?(w[n + 2]))
@@ -77,69 +69,51 @@ module Phonetic
77
69
  hard = (n > 0 && w[n - 1] == 'G')
78
70
  unless silent
79
71
  if front_vowel?(w[n + 1]) && !hard
80
- metaph = metaph + 'J'
72
+ metaph += 'J'
81
73
  else
82
- metaph = metaph + 'K'
74
+ metaph += 'K'
83
75
  end
84
76
  end
85
77
  when symb == 'H'
86
78
  if !(n == l - 1 || (n > 0 && VARSON[w[n - 1]]))
87
- if vowel?(w[n + 1])
88
- metaph = metaph + 'H'
89
- end
79
+ metaph += 'H' if vowel?(w[n + 1])
90
80
  end
91
- when 'FJLMNR'[symb]
92
- metaph = metaph + symb
81
+ when symb =~ /[FJLMNR]/
82
+ metaph += symb
93
83
  when symb == 'K'
94
84
  if n > 0 && w[n - 1] != 'C'
95
- metaph = metaph + 'K'
96
- else
97
- if n == 0
98
- metaph = 'K'
99
- end
85
+ metaph += 'K'
86
+ elsif n == 0
87
+ metaph = 'K'
100
88
  end
101
89
  when symb == 'P'
102
- if w[n + 1] == 'H'
103
- metaph = metaph + 'F'
104
- else
105
- metaph = metaph + 'P'
106
- end
90
+ metaph += w[n + 1] == 'H' ? 'F' : 'P'
107
91
  when symb == 'Q'
108
- metaph = metaph + 'K'
92
+ metaph += 'K'
109
93
  when symb == 'S'
110
- if w[n + 1] == 'I' && (w[n + 2] == 'O' || w[n + 2] == 'A')
94
+ if w[n + 1, 2] =~ /I[OA]/
95
+ metaph += 'X'
96
+ elsif w[n + 1] == 'H'
111
97
  metaph += 'X'
112
98
  else
113
- if w[n + 1] == 'H'
114
- metaph += 'X'
115
- else
116
- metaph += 'S'
117
- end
99
+ metaph += 'S'
118
100
  end
119
101
  when symb == 'T'
120
- if w[n + 1] == 'I' && (w[n + 2] == 'O' || w[n + 2] == 'A')
121
- metaph = metaph + 'X'
102
+ if w[n + 1, 2] =~ /I[OA]/
103
+ metaph += 'X'
104
+ elsif w[n + 1] == 'H'
105
+ metaph += '0' if n == 0 || w[n - 1] != 'T'
122
106
  else
123
- if w[n + 1] == 'H'
124
- if !(n > 0 && w[n - 1] == 'T')
125
- metaph = metaph + '0'
126
- end
127
- else
128
- if !(w[n + 1] == 'C' && w[n + 2] == 'H')
129
- metaph = metaph + 'T'
130
- end
131
- end
107
+ metaph += 'T' if w[n + 1, 2] != 'CH'
132
108
  end
133
109
  when symb == 'V'
134
- metaph = metaph + 'F'
135
- when symb == 'W' || symb == 'Y'
136
- if vowel?(w[n + 1])
137
- metaph = metaph + symb
138
- end
110
+ metaph += 'F'
111
+ when symb =~ /[WY]/
112
+ metaph += symb if vowel?(w[n + 1])
139
113
  when symb == 'X'
140
- metaph = metaph + 'KS'
114
+ metaph += 'KS'
141
115
  when symb == 'Z'
142
- metaph = metaph + 'S'
116
+ metaph += 'S'
143
117
  end
144
118
  end
145
119
  end
@@ -1,3 +1,3 @@
1
1
  module Phonetic
2
- VERSION = '1.0.1'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -0,0 +1,9 @@
1
+ require 'phonetic'
2
+
3
+ describe String do
4
+ describe '#dm_soundex' do
5
+ it 'should return D-M Soundex code of string' do
6
+ 'Syjuco'.dm_soundex.should == ['450000', '445000', '440000', '444000']
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ require 'spec_helper'
2
+ require 'support/dm_soundex_data'
3
+
4
+ describe Phonetic::DMSoundex do
5
+ describe '.encode' do
6
+ it 'should calculate Daitch-Mokotoff Soundex values of string' do
7
+ Phonetic::DM_SOUNDEX_TEST_TABLE.each do |w, r|
8
+ res = Phonetic::DMSoundex.encode(w)
9
+ res.should eq(r), "expected: #{r}\ngot: #{res}\nword: #{w}"
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,259 @@
1
+ # encoding: utf-8
2
+
3
+ module Phonetic
4
+ DM_SOUNDEX_TEST_TABLE = {
5
+ 'Abucay' => ['075000', '074000'],
6
+ 'Ajuna' => ['060000'],
7
+ 'Akeem' => ['056000'],
8
+ 'Alfa' => ['087000'],
9
+ 'Alpert' => ['087930'],
10
+ 'Alysa' => ['084000'],
11
+ 'Amani' => ['066000'],
12
+ 'Angélica' => ['065850', '065840'],
13
+ 'Aniyah' => ['060000'],
14
+ 'Anja' => ['060000', '064000'],
15
+ 'Annemarie' => ['066900'],
16
+ 'Applaud' => ['078300'],
17
+ 'Aputsiaq' => ['074500'],
18
+ 'Arathana' => ['093600'],
19
+ 'Asdic' => ['043500', '043400'],
20
+ 'Ashtrays' => ['043940'],
21
+ 'Athie' => ['030000'],
22
+ 'Australia' => ['043980'],
23
+ 'Badou' => ['730000'],
24
+ 'Ballina' => ['786000'],
25
+ 'Beteng' => ['736500'],
26
+ 'Bethany' => ['736000'],
27
+ 'Bineta' => ['763000'],
28
+ 'Boto' => ['730000'],
29
+ 'Brandon' => ['796360'],
30
+ 'Breuer' => ['791900'],
31
+ 'Brushcut' => ['794530', '794300'],
32
+ 'Caja' => ['510000', '410000'],
33
+ 'Caleb' => ['587000', '487000'],
34
+ 'Carina' => ['596000', '496000'],
35
+ 'Carol' => ['598000', '498000'],
36
+ 'Cassidy' => ['543000', '443000'],
37
+ 'Cayetano' => ['513600', '413600'],
38
+ 'Charlotte' => ['598300', '498300'],
39
+ 'Cheboh' => ['570000', '470000'],
40
+ 'Chloe' => ['580000', '480000'],
41
+ 'Christoffersen' => ['594379', '494379', '594374', '494374'],
42
+ 'Cleo' => ['580000', '480000'],
43
+ 'Colby' => ['587000', '487000'],
44
+ 'Conner' => ['569000', '469000'],
45
+ 'Considine' => ['564360', '464360'],
46
+ 'Cormac' => ['596500', '496500', '596400', '496400'],
47
+ 'Cornell' => ['596800', '496800'],
48
+ 'Corpuz' => ['597400', '497400'],
49
+ 'Courtney' => ['593600', '493600'],
50
+ 'Cszinchjou' => ['465000', '464000', '465400'],
51
+ 'Cudkowicz' => ['535740', '435740'],
52
+ 'Czstochowa' => ['435700', '434700'],
53
+ 'Daina' => ['360000'],
54
+ 'Daisha' => ['340000'],
55
+ 'Damian' => ['366000'],
56
+ 'Dan' => ['360000'],
57
+ 'Daren' => ['396000'],
58
+ 'Davidsen' => ['374600'],
59
+ 'Deja' => ['310000'],
60
+ 'Deltoid' => ['383300'],
61
+ 'Denisse' => ['364000'],
62
+ 'Derek' => ['395000'],
63
+ 'Diakhou' => ['350000'],
64
+ 'Dionysia' => ['364000'],
65
+ 'Dixie' => ['354000'],
66
+ 'Domenik' => ['366500'],
67
+ 'Donny' => ['360000'],
68
+ 'Dorian' => ['396000'],
69
+ 'Draft' => ['397300'],
70
+ 'Drsný' => ['460000'],
71
+ 'Drzewica' => ['475000', '474000'],
72
+ 'Dzhezkazgan' => ['445456'],
73
+ 'Dzsenifer' => ['467900'],
74
+ 'Ehemba' => ['056700'],
75
+ 'Elarbi' => ['089700'],
76
+ 'Emmet' => ['063000'],
77
+ 'Estelle' => ['043800'],
78
+ 'Fardin' => ['793600'],
79
+ 'Floy' => ['780000'],
80
+ 'Fodié' => ['730000'],
81
+ 'Freeda' => ['793000'],
82
+ 'Freud' => ['793000'],
83
+ 'Fuji' => ['710000'],
84
+ 'Gaoussou' => ['540000'],
85
+ 'Garcia' => ['595000', '594000'],
86
+ 'George' => ['595000'],
87
+ 'Georgette' => ['595300'],
88
+ 'Ginger' => ['565900'],
89
+ 'Golden' => ['583600'],
90
+ 'Gordillo' => ['593800'],
91
+ 'Greg' => ['595000'],
92
+ 'Guevarra' => ['579000'],
93
+ 'Gutkowski' => ['535745'],
94
+ 'Gwen' => ['576000'],
95
+ 'Haber' => ['579000'],
96
+ 'Hady' => ['530000'],
97
+ 'Hagenes' => ['556400'],
98
+ 'Hailee' => ['580000'],
99
+ 'Halfback' => ['587500', '587450'],
100
+ 'Handshake' => ['564500'],
101
+ 'Hardtack' => ['593500', '593450'],
102
+ 'Hazael' => ['548000'],
103
+ 'Heitschmidt' => ['546300'],
104
+ 'Hymn' => ['566000'],
105
+ 'Iliana' => ['086000'],
106
+ 'Ingegerd' => ['065593'],
107
+ 'Irini' => ['096000'],
108
+ 'Jaclyn' => ['158600', '458600', '148600', '448600'],
109
+ 'Jackson-Jackson' => ['154654','454654', '145465', '445465', '154645',
110
+ '454645', '145464', '445464', '154644', '454644'],
111
+ 'James' => ['164000', '464000'],
112
+ 'Jamina' => ['166000', '466000'],
113
+ 'Jamir' => ['169000', '469000'],
114
+ 'Jannie' => ['160000', '460000'],
115
+ 'Jerel' => ['198000', '498000'],
116
+ 'Jerzy' => ['140000', '440000', '194000', '494000'],
117
+ 'Jesse' => ['140000', '440000'],
118
+ 'Joanie' => ['160000', '460000'],
119
+ 'Joseph' => ['147000', '447000'],
120
+ 'Josianne' => ['146000', '446000'],
121
+ 'Joya' => ['100000', '410000'],
122
+ 'Juri' => ['190000', '490000'],
123
+ 'Justyn' => ['143600', '443600'],
124
+ 'Kandeh' => ['563000'],
125
+ 'Kedzie' => ['540000'],
126
+ 'Keshawn' => ['547600'],
127
+ 'Khrushchev' => ['594700'],
128
+ 'Kirlin' => ['598600'],
129
+ 'Kirsten' => ['594360', '543600'],
130
+ 'Kjær' => ['590000', '549000'],
131
+ 'Kleinman' => ['586660'],
132
+ 'Korbin' => ['597600'],
133
+ 'Krista' => ['594300'],
134
+ 'Larkin' => ['895600'],
135
+ 'Laurence' => ['896500', '896400'],
136
+ 'Laverna' => ['879600'],
137
+ 'Lavonne' => ['876000'],
138
+ 'Leia' => ['810000'],
139
+ 'Lia' => ['800000'],
140
+ 'Lilia' => ['880000'],
141
+ 'Loren' => ['896000'],
142
+ 'Louise' => ['840000'],
143
+ 'Lovisa' => ['874000'],
144
+ 'Luella' => ['880000'],
145
+ 'Luise' => ['840000'],
146
+ 'Mable' => ['678000'],
147
+ 'Madonna' => ['636000'],
148
+ 'Majabrith' => ['617930'],
149
+ 'Majad' => ['613000'],
150
+ 'Malomar' => ['686900'],
151
+ 'Mamandew' => ['666370'],
152
+ 'Mame' => ['660000'],
153
+ 'Manheim' => ['665600'],
154
+ 'Manlafy' => ['668700'],
155
+ 'Margareta' => ['695930'],
156
+ 'Marisol' => ['694800'],
157
+ 'Marjolaine' => ['698600', '694860'],
158
+ 'Mary' => ['690000'],
159
+ 'Mathew' => ['637000'],
160
+ 'Mbamoussa' => ['676400'],
161
+ 'Meike' => ['650000'],
162
+ 'Mintz' => ['664000'],
163
+ 'Mirac' => ['695000', '694000'],
164
+ 'Monserrate' => ['664930'],
165
+ 'Moritz' => ['694000'],
166
+ 'Musa' => ['640000'],
167
+ 'Musse' => ['640000'],
168
+ 'Myra' => ['690000'],
169
+ 'Myrtie' => ['693000'],
170
+ 'Nadhim' => ['635600'],
171
+ 'Napel' => ['678000'],
172
+ 'Nash' => ['640000'],
173
+ 'Ndour' => ['639000'],
174
+ 'Nelda' => ['683000'],
175
+ 'Nelli' => ['680000'],
176
+ 'Neoma' => ['660000'],
177
+ 'Niels' => ['684000'],
178
+ 'Novella' => ['678000'],
179
+ 'Nájera' => ['690000', '649000'],
180
+ 'Obaar' => ['079000'],
181
+ 'Oleta' => ['083000'],
182
+ 'Osio' => ['040000'],
183
+ 'Othilie' => ['038000'],
184
+ 'Pabodhi' => ['773500'],
185
+ 'Pagsisihang' => ['754456'],
186
+ 'Pavith' => ['773000'],
187
+ 'Pete' => ['730000'],
188
+ 'Portugal' => ['793580'],
189
+ 'Postcard' => ['743593', '743493'],
190
+ 'Postscript' => ['743497'],
191
+ 'Predovic' => ['793750', '793740'],
192
+ 'Price' => ['795000', '794000'],
193
+ 'Project' => ['791530', '791430'],
194
+ 'Quyne' => ['560000'],
195
+ 'Rachel' => ['958000', '948000'],
196
+ 'Radius' => ['934000'],
197
+ 'Reilly' => ['980000'],
198
+ 'Sabina' => ['476000'],
199
+ 'Sacoura' => ['459000', '449000'],
200
+ 'Safi' => ['470000'],
201
+ 'Saiarr' => ['419000'],
202
+ 'Salgado' => ['485300'],
203
+ 'Samara' => ['469000'],
204
+ 'Samsidine' => ['464360'],
205
+ 'Sanford' => ['467930'],
206
+ 'Sarah' => ['490000'],
207
+ 'Sasha' => ['440000'],
208
+ 'Satterfield' => ['439783'],
209
+ 'Shchaveleva' => ['278700'],
210
+ 'School' => ['480000'],
211
+ 'Schuster' => ['443900'],
212
+ 'Schtolteheim' => ['283560'],
213
+ 'Schtschigry' => ['259000'],
214
+ 'Schwarz' => ['474000', '479400'],
215
+ 'Science' => ['265000', '264000'],
216
+ 'Senger' => ['465900'],
217
+ 'Servín' => ['497600'],
218
+ 'Shad' => ['430000'],
219
+ 'Shawna' => ['476000'],
220
+ 'Shdanow' => ['267000'],
221
+ 'Shtchirowskaya' => ['297451'],
222
+ 'Shtorov' => ['297000'],
223
+ 'Shtshuf' => ['270000'],
224
+ 'Simeon' => ['466000'],
225
+ 'Sipan' => ['476000'],
226
+ 'Sizze' => ['440000'],
227
+ 'Sundqvist' => ['463574'],
228
+ 'Syjuco' => ['450000', '445000', '440000', '444000'],
229
+ 'Sytengco' => ['436500', '436540'],
230
+ 'Tanhehco' => ['365500', '365400'],
231
+ 'Tapia' => ['370000'],
232
+ 'Taya' => ['310000'],
233
+ 'Touchstone' => ['354360'],
234
+ 'Topf' => ['370000'],
235
+ 'Torrealba' => ['398700'],
236
+ 'Trinity' => ['396300'],
237
+ 'Tucson' => ['346000'],
238
+ 'Tupa' => ['370000'],
239
+ 'Uribe' => ['097000'],
240
+ 'Valentina' => ['786360'],
241
+ 'Vera' => ['790000'],
242
+ 'Verna' => ['796000'],
243
+ 'Vickie' => ['750000', '745000'],
244
+ 'Vilhelmine' => ['785866'],
245
+ 'Von' => ['760000'],
246
+ 'Víctor' => ['753900', '743900'],
247
+ 'Webster' => ['774390'],
248
+ 'Westcheste' => ['744300'],
249
+ 'Whitney' => ['753600'],
250
+ 'Wilberto' => ['787930'],
251
+ 'Wilton' => ['783600'],
252
+ 'Wrists' => ['794340'],
253
+ 'Yakou' => ['150000'],
254
+ 'Yaye' => ['100000'],
255
+ 'Yin' => ['160000'],
256
+ 'Yoyoy' => ['100000'],
257
+ 'Zena' => ['460000']
258
+ }
259
+ end