phonetic 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a20da7ce0b4dab68d7671088098226a035c64b05
4
- data.tar.gz: 2b721bc986d8e23ba6780bb7cab92059e6a7652b
3
+ metadata.gz: 98bef8e122a5abed59eee25d4e9e4a2475aef89b
4
+ data.tar.gz: 9a4656b92c3e81f507ab5ffdcbd55a701728ff05
5
5
  SHA512:
6
- metadata.gz: 14325fa3846251dd1a1cbc59b38c12a32471291b45b07074387747fa9331b5ad98b1b0afaa8dbbac62872f9bf959d5e622742e5ec673f3e1294807f91b5fdc85
7
- data.tar.gz: ad80a4c26cae46cbc516cc6cfebbfea39be69fbfe86426737cd94c065da57f4448ff3ffe9f892bd60af94fd834ae122c472cdc09b5fe683344ef479d1f31f90c
6
+ metadata.gz: aea70d4160ade24bfd89370b06ba2381d7e444f8e59f361c48c48209e77de10b5dde42ef04e77a67558f3be5e6f3379812618e5a3c91f165e37aa5378e6b4acf
7
+ data.tar.gz: 17af435c3b3d7c8603a0a5a2de323671f04eefe5cbec6100e89bef862cfe4beab14fa63f3449b83233e001479f0d52a30b8f448400281db33f2789669b7cb31b
@@ -1,5 +1,9 @@
1
- language: ruby
2
- rvm:
3
- - "1.9.2"
4
- - "1.9.3"
5
- - "2.0.0"
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - 2.0.0
6
+ - ruby-head
7
+ - jruby-19mode
8
+ - jruby-head
9
+ - rbx-2.1.1
@@ -0,0 +1,5 @@
1
+ --charset utf-8
2
+ -
3
+ README.md
4
+ LICENSE.txt
5
+ CHANGELOG.md
@@ -0,0 +1,14 @@
1
+ # Phonetic CHANGELOG
2
+
3
+ ## 1.2.0
4
+
5
+ * added Refined NYSIIS
6
+
7
+ ## 1.1.0
8
+
9
+ * added Daitch–Mokotoff Soundex
10
+
11
+ ## 1.0.0
12
+
13
+ * Initial release with Soundex, Refined Soundex, Metaphone, Double Metaphone,
14
+ Caverphone, Caverphone 2 and NYSIIS
data/README.md CHANGED
@@ -1,109 +1,123 @@
1
- # Phonetic
2
- [![Build Status](https://travis-ci.org/n7v/phonetic.png)](https://travis-ci.org/n7v/phonetic)
3
- [![Gem Version](https://badge.fury.io/rb/phonetic.png)](http://badge.fury.io/rb/phonetic)
4
- [![Coverage Status](https://coveralls.io/repos/n7v/phonetic/badge.png)](https://coveralls.io/r/n7v/phonetic)
5
- [![Code Climate](https://codeclimate.com/github/n7v/phonetic.png)](https://codeclimate.com/github/n7v/phonetic)
6
-
7
- Ruby library for phonetic algorithms.
8
- It supports Soundex, Metaphone, Double Metaphone, Caverphone, NYSIIS and others.
9
-
10
- ## Installation
11
-
12
- Add this line to your application's Gemfile:
13
-
14
- gem 'phonetic'
15
-
16
- And then execute:
17
-
18
- ```shell
19
- $ bundle
20
- ```
21
-
22
- Or install it yourself as:
23
-
24
- ```shell
25
- $ gem install phonetic
26
- ```
27
-
28
- ## Usage
29
-
30
- ```ruby
31
- require 'phonetic'
32
- ```
33
-
34
- ### Soundex
35
-
36
- ```ruby
37
- 'Ackerman'.soundex # => 'A265'
38
- 'ammonium'.soundex # => 'A500'
39
- 'implementation'.soundex # => 'I514'
40
- ```
41
-
42
- ### Refined Soundex
43
-
44
- ```ruby
45
- 'Caren'.refined_soundex # => 'C30908'
46
- 'Hayers'.refined_soundex # => 'H093'
47
- 'Lambard'.refined_soundex # => 'L7081096'
48
- ```
49
-
50
- ### Metaphone
51
-
52
- ```ruby
53
- 'Accola'.metaphone # => 'AKKL'
54
- 'Nikki'.metaphone # => 'NK'
55
- 'Wright'.metaphone #=> 'RT'
56
- ```
57
-
58
- ### Double Metaphone
59
-
60
- ```ruby
61
- 'czerny'.double_metaphone # => ['SRN', 'XRN']
62
- 'dumb'.double_metaphone # => ['TM', 'TM']
63
- 'edgar'.double_metaphone # => ['ATKR', 'ATKR']
64
- ```
65
-
66
- or use alias:
67
-
68
- ```ruby
69
- 'czerny'.metaphone2 # => ['SRN', 'XRN']
70
- 'dumb'.metaphone2 # => ['TM', 'TM']
71
- 'edgar'.metaphone2 # => ['ATKR', 'ATKR']
72
- ```
73
-
74
- ### Caverphone
75
-
76
- ```ruby
77
- 'Lashaunda'.caverphone # => 'LSNT11'
78
- 'Vidaurri'.caverphone # => 'FTR111'
79
- ````
80
-
81
- ### Caverphone 2
82
-
83
- ```ruby
84
- 'Stevenson'.caverphone2 # => 'STFNSN1111'
85
- 'Peter'.caverphone2 # => 'PTA1111111'
86
- ```
87
-
88
- ### NYSIIS
89
-
90
- ```ruby
91
- 'Alexandra'.nysiis # => 'ALAXANDR'
92
- 'Aumont'.nysiis # => 'AANAD'
93
- 'Bonnie'.nysiis # => 'BANY'
94
- ```
95
-
96
- ### Daitch–Mokotoff Soundex (D–M Soundex)
97
- ```ruby
98
- 'Anja'.dm_soundex # => ['060000', '064000']
99
- 'Schwarz'.dm_soundex # => ['474000', '479400']
100
- 'Schtolteheim'.dm_soundex # => ['283560']
101
- ```
102
-
103
- ## Contributing
104
-
105
- 1. Fork it
106
- 2. Create your feature branch (`git checkout -b my-new-feature`)
107
- 3. Commit your changes (`git commit -am 'Add some feature'`)
108
- 4. Push to the branch (`git push origin my-new-feature`)
109
- 5. Create new Pull Request
1
+ # Phonetic
2
+ [![Build Status](https://travis-ci.org/n7v/phonetic.png)](https://travis-ci.org/n7v/phonetic)
3
+ [![Gem Version](https://badge.fury.io/rb/phonetic.png)](http://badge.fury.io/rb/phonetic)
4
+ [![Coverage Status](https://coveralls.io/repos/n7v/phonetic/badge.png)](https://coveralls.io/r/n7v/phonetic)
5
+ [![Code Climate](https://codeclimate.com/github/n7v/phonetic.png)](https://codeclimate.com/github/n7v/phonetic)
6
+ [![Dependency Status](https://gemnasium.com/n7v/phonetic.png)](https://gemnasium.com/n7v/phonetic)
7
+
8
+ Ruby library for phonetic algorithms.
9
+ It supports Soundex, Metaphone, Double Metaphone, Caverphone, NYSIIS and others.
10
+
11
+ ## Installation
12
+
13
+ Add this line to your application's Gemfile:
14
+
15
+ gem 'phonetic'
16
+
17
+ And then execute:
18
+
19
+ ```shell
20
+ $ bundle
21
+ ```
22
+
23
+ Or install it yourself as:
24
+
25
+ ```shell
26
+ $ gem install phonetic
27
+ ```
28
+
29
+ ## Dependencies
30
+
31
+ Ruby >= 1.9, JRuby 1.7.6, Rubinius 2.1.1
32
+
33
+ ## Usage
34
+
35
+ ```ruby
36
+ require 'phonetic'
37
+ ```
38
+
39
+ ### Soundex
40
+
41
+ ```ruby
42
+ 'Ackerman'.soundex # => 'A265'
43
+ 'ammonium'.soundex # => 'A500'
44
+ 'implementation'.soundex # => 'I514'
45
+ ```
46
+
47
+ ### Refined Soundex
48
+
49
+ ```ruby
50
+ 'Caren'.refined_soundex # => 'C30908'
51
+ 'Hayers'.refined_soundex # => 'H093'
52
+ 'Lambard'.refined_soundex # => 'L7081096'
53
+ ```
54
+
55
+ ### Metaphone
56
+
57
+ ```ruby
58
+ 'Accola'.metaphone # => 'AKKL'
59
+ 'Nikki'.metaphone # => 'NK'
60
+ 'Wright'.metaphone #=> 'RT'
61
+ ```
62
+
63
+ ### Double Metaphone
64
+
65
+ ```ruby
66
+ 'czerny'.double_metaphone # => ['SRN', 'XRN']
67
+ 'dumb'.double_metaphone # => ['TM', 'TM']
68
+ 'edgar'.double_metaphone # => ['ATKR', 'ATKR']
69
+ ```
70
+
71
+ or use alias:
72
+
73
+ ```ruby
74
+ 'czerny'.metaphone2 # => ['SRN', 'XRN']
75
+ 'dumb'.metaphone2 # => ['TM', 'TM']
76
+ 'edgar'.metaphone2 # => ['ATKR', 'ATKR']
77
+ ```
78
+
79
+ ### Caverphone
80
+
81
+ ```ruby
82
+ 'Lashaunda'.caverphone # => 'LSNT11'
83
+ 'Vidaurri'.caverphone # => 'FTR111'
84
+ ````
85
+
86
+ ### Caverphone 2
87
+
88
+ ```ruby
89
+ 'Stevenson'.caverphone2 # => 'STFNSN1111'
90
+ 'Peter'.caverphone2 # => 'PTA1111111'
91
+ ```
92
+
93
+ ### NYSIIS
94
+
95
+ ```ruby
96
+ 'Alexandra'.nysiis # => 'ALAXANDR'
97
+ 'Aumont'.nysiis # => 'AANAD'
98
+ 'Bonnie'.nysiis # => 'BANY'
99
+ ```
100
+
101
+ ### Refined NYSIIS
102
+
103
+ ```ruby
104
+ 'Aumont'.refined_nysiis # => 'ANAD'
105
+ 'Phoenix'.refined_nysiis # => 'FANAC'
106
+ 'Schmidt'.refined_nysiis # => 'SNAD'
107
+ ```
108
+
109
+ ### Daitch–Mokotoff Soundex (D–M Soundex)
110
+
111
+ ```ruby
112
+ 'Anja'.dm_soundex # => ['060000', '064000']
113
+ 'Schwarz'.dm_soundex # => ['474000', '479400']
114
+ 'Schtolteheim'.dm_soundex # => ['283560']
115
+ ```
116
+
117
+ ## Contributing
118
+
119
+ 1. Fork it
120
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
121
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
122
+ 4. Push to the branch (`git push origin my-new-feature`)
123
+ 5. Create new Pull Request
@@ -1,5 +1,6 @@
1
1
  require 'phonetic/version'
2
2
  require 'phonetic/nysiis'
3
+ require 'phonetic/refined_nysiis'
3
4
  require 'phonetic/soundex'
4
5
  require 'phonetic/refined_soundex'
5
6
  require 'phonetic/metaphone'
@@ -1,7 +1,7 @@
1
1
  require 'phonetic/nysiis'
2
2
 
3
3
  class String
4
- # Caverphone value of string.
4
+ # NYSIIS value of string.
5
5
  # @example
6
6
  # 'Alexandra'.nysiis # => 'ALAXANDR'
7
7
  # 'Aumont'.nysiis # => 'AANAD'
@@ -0,0 +1,12 @@
1
+ require 'phonetic/refined_nysiis'
2
+
3
+ class String
4
+ # Refined NYSIIS value of string.
5
+ # @example
6
+ # 'Aumont'.refined_nysiis # => 'ANAD'
7
+ # 'Phoenix'.refined_nysiis # => 'FANAC'
8
+ # 'Schmidt'.refined_nysiis # => 'SNAD'
9
+ def refined_nysiis(options = { trim: true })
10
+ Phonetic::RefinedNYSIIS.encode(self, options)
11
+ end
12
+ end
@@ -1,5 +1,6 @@
1
1
  require 'phonetic/algorithm'
2
- require 'phonetic/dm_soundex_map'
2
+ require 'phonetic/dm_soundex/map'
3
+ require 'phonetic/dm_soundex/code'
3
4
 
4
5
  module Phonetic
5
6
  # Daitch–Mokotoff Soundex (D–M Soundex) is a phonetic algorithm invented
@@ -19,7 +20,7 @@ module Phonetic
19
20
  def self.encode_word(word, options = {})
20
21
  w = word.strip.upcase.gsub(/[^A-Z]+/, '')
21
22
  i = 0
22
- code = init_code()
23
+ code = Code.new
23
24
  while i < w.size
24
25
  if w[i] != w[i + 1]
25
26
  c = find_code(MAP, w, i)
@@ -37,29 +38,11 @@ module Phonetic
37
38
  end
38
39
  i += 1
39
40
  end
40
- code.result
41
+ code.results
41
42
  end
42
43
 
43
44
  private
44
45
 
45
- def self.init_code
46
- code = [[]]
47
- def code.add(a)
48
- case a
49
- when Array
50
- c = self.map{|w| w.last != a[1] ? w + [a[1]] : w}
51
- self.map!{|w| w.last != a[0] ? w + [a[0]] : w}
52
- self.push(*c)
53
- else
54
- self.map!{|w| w.last != a ? w + [a] : w}
55
- end
56
- end
57
- def code.result
58
- self.map{|w| w.join[0..5].ljust(6, '0')}.uniq
59
- end
60
- code
61
- end
62
-
63
46
  def self.find_code(map, w, i, last = nil, count = 0)
64
47
  elem = map[w[i]]
65
48
  r = case elem
@@ -0,0 +1,30 @@
1
+ module Phonetic
2
+ class DMSoundex
3
+ class Code
4
+ def initialize
5
+ @codes = [[]]
6
+ end
7
+
8
+ def add(a)
9
+ case a
10
+ when Array
11
+ c1 = add_code(a[0])
12
+ c2 = add_code(a[1])
13
+ @codes = c1 + c2
14
+ else
15
+ @codes = add_code(a)
16
+ end
17
+ end
18
+
19
+ def results
20
+ @codes.map{|w| w.join[0..5].ljust(6, '0')}.uniq
21
+ end
22
+
23
+ private
24
+
25
+ def add_code(code)
26
+ @codes.map{|w| w.last != code ? w + [code] : w}
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'phonetic/algorithm'
4
+ require 'phonetic/double_metaphone/code'
4
5
 
5
6
  module Phonetic
6
7
  # The Double Metaphone phonetic encoding algorithm is the second generation
@@ -22,15 +23,39 @@ module Phonetic
22
23
  # Phonetic::Metaphone2.encode('dumb') # => ['TM', 'TM']
23
24
  # Phonetic::Metaphone2.encode('edgar') # => ['ATKR', 'ATKR']
24
25
  class DoubleMetaphone < Algorithm
26
+ START_OF_WORD_MAP = {
27
+ # skip these when at start of word
28
+ /^([GKP]N|WR|PS)/ => ['', '', 1],
29
+ # initial 'X' is pronounced 'Z' e.g. 'Xavier'
30
+ /^X/ => ['S', 'S', 1],
31
+ # all init vowels now map to 'A'
32
+ /^[AEIOUY]/ => ['A', 'A', 1],
33
+ # special case 'caesar'
34
+ /^CAESAR/ => ['S', 'S', 1],
35
+ # special case 'sugar-'
36
+ /^SUGAR/ => ['X', 'S', 1],
37
+ # -ges-, -gep-, -gel-, -gie- at beginning
38
+ /^G(Y|E[SPBLYIR]|I[BLNE])/ => ['K', 'J', 2],
39
+ # keep H if first & before vowel
40
+ /^H[AEIOUY]/ => ['H', 'H', 2],
41
+ # german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
42
+ /^S[MNLW]/ => ['S', 'X', 1],
43
+ # ghislane, ghiradelli
44
+ /^GHI/ => ['J', 'J', 2],
45
+ /^GH/ => ['K', 'K', 2],
46
+ # greek roots e.g. 'chemistry', 'chorus'
47
+ /^CH(ARAC|ARIS|OR[^E]|YM|EM)/ => ['K', 'K', 2],
48
+ # Wasserman should match Vasserman
49
+ /^W[AEIOUY]/ => ['A', 'F', 0],
50
+ # need Uomo to match Womo
51
+ /^WH/ => ['A', 'A', 0]
52
+ }
53
+
25
54
  # Encode word to its Double Metaphone code.
26
55
  def self.encode_word(word, options = { size: 4 })
27
56
  code_size = options[:size] || 4
28
57
  w = word.strip.upcase
29
- code = ['', '']
30
- def code.add(primary, secondary)
31
- self[0] += primary
32
- self[1] += secondary
33
- end
58
+ code = Code.new
34
59
  i = 0
35
60
  len = w.size
36
61
  last = len - 1
@@ -47,22 +72,12 @@ module Phonetic
47
72
  when 'Ç', 'ç'
48
73
  code.add 'S', 'S'
49
74
  i += 1
50
- when 'C'
51
- i += encode_c(w, i, len, code)
52
- when 'D'
53
- i += encode_d(w, i, len, code)
75
+ when 'C', 'D'
76
+ i += char_encode(w, i, len, code)
54
77
  when 'F', 'K', 'N'
55
78
  i += gen_encode(w, i, w[i], w[i], code)
56
- when 'G'
57
- i += encode_g(w, i, len, code)
58
- when 'H'
59
- i += encode_h(w, i, len, code)
60
- when 'J'
61
- i += encode_j(w, i, len, code)
62
- when 'L'
63
- i += encode_l(w, i, len, code)
64
- when 'M'
65
- i += encode_m(w, i, len, code)
79
+ when 'G', 'H', 'J', 'L', 'M'
80
+ i += char_encode(w, i, len, code)
66
81
  when 'Ñ', 'ñ'
67
82
  code.add 'N', 'N'
68
83
  i += 1
@@ -70,25 +85,17 @@ module Phonetic
70
85
  i += encode_p(w, i, len, code)
71
86
  when 'Q'
72
87
  i += gen_encode(w, i, 'K', 'K', code)
73
- when 'R'
74
- i += encode_r(w, i, len, code)
75
- when 'S'
76
- i += encode_s(w, i, len, code)
77
- when 'T'
78
- i += encode_t(w, i, len, code)
88
+ when 'R', 'S', 'T'
89
+ i += char_encode(w, i, len, code)
79
90
  when 'V'
80
91
  i += gen_encode(w, i, 'F', 'F', code)
81
- when 'W'
82
- i += encode_w(w, i, len, code)
83
- when 'X'
84
- i += encode_x(w, i, len, code)
85
- when 'Z'
86
- i += encode_z(w, i, len, code)
92
+ when 'W', 'X', 'Z'
93
+ i += char_encode(w, i, len, code)
87
94
  else
88
95
  i += 1
89
96
  end
90
97
  end
91
- [code.first[0, code_size], code.last[0, code_size]]
98
+ code.results(code_size)
92
99
  end
93
100
 
94
101
  def self.encode(str, options = { size: 4 })
@@ -99,19 +106,12 @@ module Phonetic
99
106
 
100
107
  def self.encode_start_of_word(w, code)
101
108
  i = 0
102
- # skip these when at start of word
103
- if w[0, 2] =~ /[GKP]N|WR|PS/
104
- i = 1
105
- # initial 'X' is pronounced 'Z' e.g. 'Xavier'
106
- elsif w[0] == 'X'
107
- code.add 'S', 'S'
108
- i = 1
109
- elsif w[0] =~ /[AEIOUY]/
110
- code.add 'A', 'A' # all init vowels now map to 'A'
111
- i = 1
112
- elsif w[0, 6] == 'CAESAR' # special case 'caesar'
113
- code.add 'S', 'S'
114
- i = 1
109
+ START_OF_WORD_MAP.each do |r, v|
110
+ if w =~ r
111
+ code.add v[0], v[1]
112
+ i = v[2]
113
+ break
114
+ end
115
115
  end
116
116
  i
117
117
  end
@@ -121,6 +121,10 @@ module Phonetic
121
121
  w[i + 1] == w[i] ? 2 : 1
122
122
  end
123
123
 
124
+ def self.char_encode(w, i, len, code)
125
+ self.send "encode_#{w[i].downcase}", w, i, len, code
126
+ end
127
+
124
128
  def self.encode_c(w, i, len, code)
125
129
  r = 1
126
130
  case
@@ -129,8 +133,7 @@ module Phonetic
129
133
  code.add 'K', 'K'
130
134
  r += 1
131
135
  when w[i, 2] == 'CH'
132
- encode_ch(w, i, len, code)
133
- r += 1
136
+ r += encode_ch(w, i, len, code)
134
137
  when w[i, 2] == 'CZ' && !(i > 1 && w[i - 2, 4] == 'WICZ')
135
138
  # e.g, 'czerny'
136
139
  code.add 'S', 'X'
@@ -145,13 +148,12 @@ module Phonetic
145
148
  when w[i, 2] =~ /C[KGQ]/
146
149
  code.add 'K', 'K'
147
150
  r += 1
151
+ # italian vs. english
152
+ when w[i, 3] =~ /CI[OEA]/
153
+ code.add 'S', 'X'
154
+ r += 1
148
155
  when w[i, 2] =~ /C[IEY]/
149
- # italian vs. english
150
- if w[i, 3] =~ /CI[OEA]/
151
- code.add 'S', 'X'
152
- else
153
- code.add 'S', 'S'
154
- end
156
+ code.add 'S', 'S'
155
157
  r += 1
156
158
  else
157
159
  code.add 'K', 'K'
@@ -167,17 +169,16 @@ module Phonetic
167
169
 
168
170
  def self.encode_d(w, i, len, code)
169
171
  r = 1
170
- if w[i, 2] == 'DG'
171
- if w[i + 2] =~ /[IEY]/
172
- # e.g. 'edge'
173
- code.add 'J', 'J'
174
- r += 2
175
- else
176
- # e.g. 'edgar'
177
- code.add 'TK', 'TK'
178
- r += 1
179
- end
180
- elsif w[i, 2] =~ /D[TD]/
172
+ case
173
+ when w[i + 1, 2] =~ /G[IEY]/
174
+ # e.g. 'edge'
175
+ code.add 'J', 'J'
176
+ r += 2
177
+ when w[i + 1] == 'G'
178
+ # e.g. 'edgar'
179
+ code.add 'TK', 'TK'
180
+ r += 1
181
+ when w[i + 1] =~ /[TD]/
181
182
  code.add 'T', 'T'
182
183
  r += 1
183
184
  else
@@ -188,22 +189,19 @@ module Phonetic
188
189
 
189
190
  def self.encode_g(w, i, len, code)
190
191
  r = 2
191
- if w[i + 1] == 'H'
192
+ case
193
+ when w[i + 1] == 'H'
192
194
  encode_gh(w, i, code)
193
- elsif w[i + 1] == 'N'
195
+ when w[i + 1] == 'N'
194
196
  encode_gn(w, i, code)
195
197
  # 'tagliaro'
196
- elsif w[i + 1, 2] == 'LI' && !slavo_germanic?(w)
198
+ when w[i + 1, 2] == 'LI' && !slavo_germanic?(w)
197
199
  code.add 'KL', 'L'
198
- # -ges-, -gep-, -gel-, -gie- at beginning
199
- elsif i == 0 && w[1, 2] =~ /^Y|E[SPBLYIR]|I[BLNE]/
200
- code.add 'K', 'J'
201
200
  # -ger-, -gy-
202
- elsif g_ger_or_gy?(w, i)
201
+ when g_ger_or_gy?(w, i)
203
202
  code.add 'K', 'J'
204
- # italian e.g, 'biaggi'
205
- elsif w[i + 1] =~ /[EIY]/ || (i > 0 && w[i - 1, 4] =~ /[AO]GGI/)
206
- if w[0, 4] =~ /^(VAN |VON |SCH)/ || w[i + 1, 2] == 'ET'
203
+ when g_italian?(w, i)
204
+ if w[0, 4] =~ /^(V[AO]N\s|SCH)/ || w[i + 1, 2] == 'ET'
207
205
  code.add 'K', 'K'
208
206
  elsif w[i + 1, 4] =~ /IER\s/
209
207
  code.add 'J', 'J'
@@ -219,8 +217,8 @@ module Phonetic
219
217
 
220
218
  def self.encode_h(w, i, len, code)
221
219
  r = 1
222
- # only keep if first & before vowel or btw. 2 vowels
223
- if (i == 0 || i > 0 && vowel?(w[i - 1])) && vowel?(w[i + 1])
220
+ # keep if btw. 2 vowels
221
+ if i > 0 && vowel?(w[i - 1]) && vowel?(w[i + 1])
224
222
  code.add 'H', 'H'
225
223
  r += 1
226
224
  end
@@ -307,39 +305,27 @@ module Phonetic
307
305
  def self.encode_s(w, i, len, code)
308
306
  r = 1
309
307
  last = len - 1
308
+ case
310
309
  # special cases 'island', 'isle', 'carlisle', 'carlysle'
311
- if i > 0 && w[i - 1, 3] =~ /[IY]SL/
312
- # special case 'sugar-'
313
- elsif i == 0 && w[i, 5] == 'SUGAR'
314
- code.add 'X', 'S'
315
- elsif w[i, 2] == 'SH'
316
- # germanic
317
- if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
318
- code.add 'S', 'S'
319
- else
320
- code.add 'X', 'X'
321
- end
322
- r += 1
310
+ when i > 0 && w[i - 1, 3] =~ /[IY]SL/
311
+ when w[i, 2] == 'SH'
312
+ r += encode_sh(w, i, code)
323
313
  # italian & armenian
324
- elsif w[i, 3] =~ /SI[OA]/
314
+ when w[i, 3] =~ /SI[OA]/
325
315
  if !slavo_germanic?(w)
326
316
  code.add 'S', 'X'
327
317
  else
328
318
  code.add 'S', 'S'
329
319
  end
330
320
  r += 2
331
- # german & anglicisations, e.g. 'smith' match 'schmidt',
332
- # 'snider' match 'schneider' also, -sz- in slavic language altho in
333
- # hungarian it is pronounced 's'
334
- elsif i == 0 && w[i + 1] =~ /[MNLW]/ || w[i + 1] == 'Z'
321
+ # -sz- in slavic language altho in hungarian it is pronounced 's'
322
+ when w[i, 2] == 'SZ'
335
323
  code.add 'S', 'X'
336
- r += 1 if w[i + 1] == 'Z'
337
- elsif w[i, 2] == 'SC'
338
- encode_sc(w, i, code)
339
- r += 2
340
- # french e.g. 'resnais', 'artois'
324
+ r += 1
325
+ when w[i, 2] == 'SC'
326
+ r += encode_sc(w, i, code)
341
327
  else
342
- if i == last && i > 1 && w[i - 2, 2] =~ /[AO]I/
328
+ if s_french?(w, i, last)
343
329
  code.add '', 'S'
344
330
  else
345
331
  code.add 'S', 'S'
@@ -377,18 +363,9 @@ module Phonetic
377
363
  code.add 'R', 'R'
378
364
  r += 1
379
365
  else
380
- if i == 0 && (vowel?(w[i + 1]) || w[i, 2] == 'WH')
381
- # Wasserman should match Vasserman
382
- if vowel?(w[i + 1])
383
- code.add 'A', 'F'
384
- else
385
- # need Uomo to match Womo
386
- code.add 'A', 'A'
387
- end
388
- end
389
366
  # Arnow should match Arnoff
390
367
  if i == last && i > 0 && vowel?(w[i - 1]) ||
391
- i > 0 && w[i - 1, 5] =~ /EWSKI|EWSKY|OWSKI|OWSKY/ ||
368
+ i > 0 && w[i - 1, 5] =~ /[EO]WSK[IY]/ ||
392
369
  w[0, 3] == 'SCH'
393
370
  code.add '', 'F'
394
371
  elsif w[i, 4] =~ /WICZ|WITZ/
@@ -432,9 +409,6 @@ module Phonetic
432
409
  # find 'michael'
433
410
  when i > 0 && w[i, 4] == 'CHAE'
434
411
  code.add 'K', 'X'
435
- # greek roots e.g. 'chemistry', 'chorus'
436
- when ch_greek_roots?(w, i)
437
- code.add 'K', 'K'
438
412
  # germanic, greek, or otherwise 'ch' for 'kh' sound
439
413
  when ch_germanic_or_greek?(w, i, len)
440
414
  code.add 'K', 'K'
@@ -446,6 +420,7 @@ module Phonetic
446
420
  else
447
421
  code.add 'X', 'K'
448
422
  end
423
+ 1
449
424
  end
450
425
 
451
426
  def self.encode_cc(w, i, code)
@@ -470,19 +445,12 @@ module Phonetic
470
445
  def self.encode_gh(w, i, code)
471
446
  if i > 0 && !vowel?(w[i - 1])
472
447
  code.add 'K', 'K'
473
- elsif i == 0
474
- # ghislane, ghiradelli
475
- if w[i + 2] == 'I'
476
- code.add 'J', 'J'
477
- else
478
- code.add 'K', 'K'
479
- end
480
448
  # Parker's rule (with some further refinements)
481
449
  elsif !(i > 1 && w[i - 2] =~ /[BHD]/ || # e.g., 'hugh'
482
450
  i > 2 && w[i - 3] =~ /[BHD]/ || # e.g., 'bough'
483
- i > 3 && w[i - 4] =~ /[BH]/) # e.g., 'broughton'
451
+ i > 3 && w[i - 4] =~ /[BH]/) # e.g., 'broughton'
484
452
  # e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
485
- if i > 2 && w[i - 1] == 'U' && w[i - 3] =~ /[CGLRT]/
453
+ if i > 2 && w[i - 3, 3] =~ /[CGLRT].U/
486
454
  code.add 'F', 'F'
487
455
  elsif i > 0 && w[i - 1] != 'I'
488
456
  code.add 'K', 'K'
@@ -501,6 +469,16 @@ module Phonetic
501
469
  end
502
470
  end
503
471
 
472
+ def self.encode_sh(w, i, code)
473
+ # germanic
474
+ if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
475
+ code.add 'S', 'S'
476
+ else
477
+ code.add 'X', 'X'
478
+ end
479
+ 1
480
+ end
481
+
504
482
  def self.encode_sc(w, i, code)
505
483
  # Schlesinger's rule
506
484
  if w[i + 2] == 'H'
@@ -520,6 +498,7 @@ module Phonetic
520
498
  else
521
499
  code.add 'SK', 'SK'
522
500
  end
501
+ 2
523
502
  end
524
503
 
525
504
  def self.slavo_germanic?(w)
@@ -532,15 +511,7 @@ module Phonetic
532
511
 
533
512
  def self.c_germanic?(w, i)
534
513
  # various germanic
535
- i > 1 &&
536
- !vowel?(w[i - 2]) &&
537
- w[i - 1, 3] == 'ACH' &&
538
- (w[i + 2] !~ /[IE]/ || w[i - 2, 6] =~ /[BM]ACHER/)
539
- end
540
-
541
- def self.ch_greek_roots?(w, i)
542
- # greek roots e.g. 'chemistry', 'chorus'
543
- i == 0 && w[1, 5] =~ /^H(ARAC|ARIS|OR|YM|IA|EM)/ && w[0, 5] != 'CHORE'
514
+ i > 1 && w[i - 2, 6] =~ /(^[^AEIOUY]ACH[^IE])|([BM]ACHER)/
544
515
  end
545
516
 
546
517
  def self.ch_germanic_or_greek?(w, i, len)
@@ -562,6 +533,11 @@ module Phonetic
562
533
  !(i > 0 && w[i - 1, 3] =~ /[RO]GY/)
563
534
  end
564
535
 
536
+ def self.g_italian?(w, i)
537
+ # italian e.g, 'biaggi'
538
+ w[i + 1] =~ /[EIY]/ || (i > 0 && w[i - 1, 4] =~ /[AO]GGI/)
539
+ end
540
+
565
541
  def self.j_spanish_pron?(w, i)
566
542
  # spanish pron. of e.g. 'bajador'
567
543
  i > 0 && vowel?(w[i - 1]) && !slavo_germanic?(w) && w[i + 1] =~ /[AO]/
@@ -582,6 +558,11 @@ module Phonetic
582
558
  !(i > 3 && w[i - 4, 2] =~ /M[EA]/)
583
559
  end
584
560
 
561
+ def self.s_french?(w, i, last)
562
+ # french e.g. 'resnais', 'artois'
563
+ i == last && i > 1 && w[i - 2, 2] =~ /[AO]I/
564
+ end
565
+
585
566
  def self.x_french?(w, i, last)
586
567
  # french e.g. breaux
587
568
  i == last && (i > 2 && w[i - 3, 3] =~ /[IE]AU/ || i > 1 && w[i - 2, 2] =~ /[AO]U/)