phonetic 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +9 -5
- data/.yardopts +5 -0
- data/CHANGELOG.md +14 -0
- data/README.md +123 -109
- data/lib/phonetic.rb +1 -0
- data/lib/phonetic/core_ext/string/nysiis.rb +1 -1
- data/lib/phonetic/core_ext/string/refined_nysiis.rb +12 -0
- data/lib/phonetic/dm_soundex.rb +4 -21
- data/lib/phonetic/dm_soundex/code.rb +30 -0
- data/lib/phonetic/{dm_soundex_map.rb → dm_soundex/map.rb} +0 -0
- data/lib/phonetic/double_metaphone.rb +111 -130
- data/lib/phonetic/double_metaphone/code.rb +28 -0
- data/lib/phonetic/metaphone.rb +123 -87
- data/lib/phonetic/refined_nysiis.rb +72 -0
- data/lib/phonetic/version.rb +1 -1
- data/phonetic.gemspec +29 -27
- data/spec/phonetic/caverphone2_spec.rb +2 -53
- data/spec/phonetic/caverphone_spec.rb +2 -104
- data/spec/phonetic/core_ext/string/refined_nysiis_spec.rb +9 -0
- data/spec/phonetic/double_metaphone_spec.rb +3 -2
- data/spec/phonetic/refined_nysiis_spec.rb +30 -0
- data/spec/spec_helper.rb +6 -5
- data/spec/support/caverphone2_data.rb +53 -0
- data/spec/support/caverphone_data.rb +104 -0
- data/spec/support/double_metaphone_data.rb +5 -0
- data/spec/support/refined_nysiis_data.rb +49 -0
- metadata +20 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98bef8e122a5abed59eee25d4e9e4a2475aef89b
|
4
|
+
data.tar.gz: 9a4656b92c3e81f507ab5ffdcbd55a701728ff05
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aea70d4160ade24bfd89370b06ba2381d7e444f8e59f361c48c48209e77de10b5dde42ef04e77a67558f3be5e6f3379812618e5a3c91f165e37aa5378e6b4acf
|
7
|
+
data.tar.gz: 17af435c3b3d7c8603a0a5a2de323671f04eefe5cbec6100e89bef862cfe4beab14fa63f3449b83233e001479f0d52a30b8f448400281db33f2789669b7cb31b
|
data/.travis.yml
CHANGED
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,109 +1,123 @@
|
|
1
|
-
# Phonetic
|
2
|
-
[![Build Status](https://travis-ci.org/n7v/phonetic.png)](https://travis-ci.org/n7v/phonetic)
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/phonetic.png)](http://badge.fury.io/rb/phonetic)
|
4
|
-
[![Coverage Status](https://coveralls.io/repos/n7v/phonetic/badge.png)](https://coveralls.io/r/n7v/phonetic)
|
5
|
-
[![Code Climate](https://codeclimate.com/github/n7v/phonetic.png)](https://codeclimate.com/github/n7v/phonetic)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
'
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
1
|
+
# Phonetic
|
2
|
+
[![Build Status](https://travis-ci.org/n7v/phonetic.png)](https://travis-ci.org/n7v/phonetic)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/phonetic.png)](http://badge.fury.io/rb/phonetic)
|
4
|
+
[![Coverage Status](https://coveralls.io/repos/n7v/phonetic/badge.png)](https://coveralls.io/r/n7v/phonetic)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/n7v/phonetic.png)](https://codeclimate.com/github/n7v/phonetic)
|
6
|
+
[![Dependency Status](https://gemnasium.com/n7v/phonetic.png)](https://gemnasium.com/n7v/phonetic)
|
7
|
+
|
8
|
+
Ruby library for phonetic algorithms.
|
9
|
+
It supports Soundex, Metaphone, Double Metaphone, Caverphone, NYSIIS and others.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
gem 'phonetic'
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
```shell
|
20
|
+
$ bundle
|
21
|
+
```
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
```shell
|
26
|
+
$ gem install phonetic
|
27
|
+
```
|
28
|
+
|
29
|
+
## Dependencies
|
30
|
+
|
31
|
+
Ruby >= 1.9, JRuby 1.7.6, Rubinius 2.1.1
|
32
|
+
|
33
|
+
## Usage
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
require 'phonetic'
|
37
|
+
```
|
38
|
+
|
39
|
+
### Soundex
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
'Ackerman'.soundex # => 'A265'
|
43
|
+
'ammonium'.soundex # => 'A500'
|
44
|
+
'implementation'.soundex # => 'I514'
|
45
|
+
```
|
46
|
+
|
47
|
+
### Refined Soundex
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
'Caren'.refined_soundex # => 'C30908'
|
51
|
+
'Hayers'.refined_soundex # => 'H093'
|
52
|
+
'Lambard'.refined_soundex # => 'L7081096'
|
53
|
+
```
|
54
|
+
|
55
|
+
### Metaphone
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
'Accola'.metaphone # => 'AKKL'
|
59
|
+
'Nikki'.metaphone # => 'NK'
|
60
|
+
'Wright'.metaphone #=> 'RT'
|
61
|
+
```
|
62
|
+
|
63
|
+
### Double Metaphone
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
'czerny'.double_metaphone # => ['SRN', 'XRN']
|
67
|
+
'dumb'.double_metaphone # => ['TM', 'TM']
|
68
|
+
'edgar'.double_metaphone # => ['ATKR', 'ATKR']
|
69
|
+
```
|
70
|
+
|
71
|
+
or use alias:
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
'czerny'.metaphone2 # => ['SRN', 'XRN']
|
75
|
+
'dumb'.metaphone2 # => ['TM', 'TM']
|
76
|
+
'edgar'.metaphone2 # => ['ATKR', 'ATKR']
|
77
|
+
```
|
78
|
+
|
79
|
+
### Caverphone
|
80
|
+
|
81
|
+
```ruby
|
82
|
+
'Lashaunda'.caverphone # => 'LSNT11'
|
83
|
+
'Vidaurri'.caverphone # => 'FTR111'
|
84
|
+
````
|
85
|
+
|
86
|
+
### Caverphone 2
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
'Stevenson'.caverphone2 # => 'STFNSN1111'
|
90
|
+
'Peter'.caverphone2 # => 'PTA1111111'
|
91
|
+
```
|
92
|
+
|
93
|
+
### NYSIIS
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
'Alexandra'.nysiis # => 'ALAXANDR'
|
97
|
+
'Aumont'.nysiis # => 'AANAD'
|
98
|
+
'Bonnie'.nysiis # => 'BANY'
|
99
|
+
```
|
100
|
+
|
101
|
+
### Refined NYSIIS
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
'Aumont'.refined_nysiis # => 'ANAD'
|
105
|
+
'Phoenix'.refined_nysiis # => 'FANAC'
|
106
|
+
'Schmidt'.refined_nysiis # => 'SNAD'
|
107
|
+
```
|
108
|
+
|
109
|
+
### Daitch–Mokotoff Soundex (D–M Soundex)
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
'Anja'.dm_soundex # => ['060000', '064000']
|
113
|
+
'Schwarz'.dm_soundex # => ['474000', '479400']
|
114
|
+
'Schtolteheim'.dm_soundex # => ['283560']
|
115
|
+
```
|
116
|
+
|
117
|
+
## Contributing
|
118
|
+
|
119
|
+
1. Fork it
|
120
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
121
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
122
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
123
|
+
5. Create new Pull Request
|
data/lib/phonetic.rb
CHANGED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'phonetic/refined_nysiis'
|
2
|
+
|
3
|
+
class String
|
4
|
+
# Refined NYSIIS value of string.
|
5
|
+
# @example
|
6
|
+
# 'Aumont'.refined_nysiis # => 'ANAD'
|
7
|
+
# 'Phoenix'.refined_nysiis # => 'FANAC'
|
8
|
+
# 'Schmidt'.refined_nysiis # => 'SNAD'
|
9
|
+
def refined_nysiis(options = { trim: true })
|
10
|
+
Phonetic::RefinedNYSIIS.encode(self, options)
|
11
|
+
end
|
12
|
+
end
|
data/lib/phonetic/dm_soundex.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'phonetic/algorithm'
|
2
|
-
require 'phonetic/
|
2
|
+
require 'phonetic/dm_soundex/map'
|
3
|
+
require 'phonetic/dm_soundex/code'
|
3
4
|
|
4
5
|
module Phonetic
|
5
6
|
# Daitch–Mokotoff Soundex (D–M Soundex) is a phonetic algorithm invented
|
@@ -19,7 +20,7 @@ module Phonetic
|
|
19
20
|
def self.encode_word(word, options = {})
|
20
21
|
w = word.strip.upcase.gsub(/[^A-Z]+/, '')
|
21
22
|
i = 0
|
22
|
-
code =
|
23
|
+
code = Code.new
|
23
24
|
while i < w.size
|
24
25
|
if w[i] != w[i + 1]
|
25
26
|
c = find_code(MAP, w, i)
|
@@ -37,29 +38,11 @@ module Phonetic
|
|
37
38
|
end
|
38
39
|
i += 1
|
39
40
|
end
|
40
|
-
code.
|
41
|
+
code.results
|
41
42
|
end
|
42
43
|
|
43
44
|
private
|
44
45
|
|
45
|
-
def self.init_code
|
46
|
-
code = [[]]
|
47
|
-
def code.add(a)
|
48
|
-
case a
|
49
|
-
when Array
|
50
|
-
c = self.map{|w| w.last != a[1] ? w + [a[1]] : w}
|
51
|
-
self.map!{|w| w.last != a[0] ? w + [a[0]] : w}
|
52
|
-
self.push(*c)
|
53
|
-
else
|
54
|
-
self.map!{|w| w.last != a ? w + [a] : w}
|
55
|
-
end
|
56
|
-
end
|
57
|
-
def code.result
|
58
|
-
self.map{|w| w.join[0..5].ljust(6, '0')}.uniq
|
59
|
-
end
|
60
|
-
code
|
61
|
-
end
|
62
|
-
|
63
46
|
def self.find_code(map, w, i, last = nil, count = 0)
|
64
47
|
elem = map[w[i]]
|
65
48
|
r = case elem
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Phonetic
|
2
|
+
class DMSoundex
|
3
|
+
class Code
|
4
|
+
def initialize
|
5
|
+
@codes = [[]]
|
6
|
+
end
|
7
|
+
|
8
|
+
def add(a)
|
9
|
+
case a
|
10
|
+
when Array
|
11
|
+
c1 = add_code(a[0])
|
12
|
+
c2 = add_code(a[1])
|
13
|
+
@codes = c1 + c2
|
14
|
+
else
|
15
|
+
@codes = add_code(a)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def results
|
20
|
+
@codes.map{|w| w.join[0..5].ljust(6, '0')}.uniq
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def add_code(code)
|
26
|
+
@codes.map{|w| w.last != code ? w + [code] : w}
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
File without changes
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'phonetic/algorithm'
|
4
|
+
require 'phonetic/double_metaphone/code'
|
4
5
|
|
5
6
|
module Phonetic
|
6
7
|
# The Double Metaphone phonetic encoding algorithm is the second generation
|
@@ -22,15 +23,39 @@ module Phonetic
|
|
22
23
|
# Phonetic::Metaphone2.encode('dumb') # => ['TM', 'TM']
|
23
24
|
# Phonetic::Metaphone2.encode('edgar') # => ['ATKR', 'ATKR']
|
24
25
|
class DoubleMetaphone < Algorithm
|
26
|
+
START_OF_WORD_MAP = {
|
27
|
+
# skip these when at start of word
|
28
|
+
/^([GKP]N|WR|PS)/ => ['', '', 1],
|
29
|
+
# initial 'X' is pronounced 'Z' e.g. 'Xavier'
|
30
|
+
/^X/ => ['S', 'S', 1],
|
31
|
+
# all init vowels now map to 'A'
|
32
|
+
/^[AEIOUY]/ => ['A', 'A', 1],
|
33
|
+
# special case 'caesar'
|
34
|
+
/^CAESAR/ => ['S', 'S', 1],
|
35
|
+
# special case 'sugar-'
|
36
|
+
/^SUGAR/ => ['X', 'S', 1],
|
37
|
+
# -ges-, -gep-, -gel-, -gie- at beginning
|
38
|
+
/^G(Y|E[SPBLYIR]|I[BLNE])/ => ['K', 'J', 2],
|
39
|
+
# keep H if first & before vowel
|
40
|
+
/^H[AEIOUY]/ => ['H', 'H', 2],
|
41
|
+
# german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
|
42
|
+
/^S[MNLW]/ => ['S', 'X', 1],
|
43
|
+
# ghislane, ghiradelli
|
44
|
+
/^GHI/ => ['J', 'J', 2],
|
45
|
+
/^GH/ => ['K', 'K', 2],
|
46
|
+
# greek roots e.g. 'chemistry', 'chorus'
|
47
|
+
/^CH(ARAC|ARIS|OR[^E]|YM|EM)/ => ['K', 'K', 2],
|
48
|
+
# Wasserman should match Vasserman
|
49
|
+
/^W[AEIOUY]/ => ['A', 'F', 0],
|
50
|
+
# need Uomo to match Womo
|
51
|
+
/^WH/ => ['A', 'A', 0]
|
52
|
+
}
|
53
|
+
|
25
54
|
# Encode word to its Double Metaphone code.
|
26
55
|
def self.encode_word(word, options = { size: 4 })
|
27
56
|
code_size = options[:size] || 4
|
28
57
|
w = word.strip.upcase
|
29
|
-
code =
|
30
|
-
def code.add(primary, secondary)
|
31
|
-
self[0] += primary
|
32
|
-
self[1] += secondary
|
33
|
-
end
|
58
|
+
code = Code.new
|
34
59
|
i = 0
|
35
60
|
len = w.size
|
36
61
|
last = len - 1
|
@@ -47,22 +72,12 @@ module Phonetic
|
|
47
72
|
when 'Ç', 'ç'
|
48
73
|
code.add 'S', 'S'
|
49
74
|
i += 1
|
50
|
-
when 'C'
|
51
|
-
i +=
|
52
|
-
when 'D'
|
53
|
-
i += encode_d(w, i, len, code)
|
75
|
+
when 'C', 'D'
|
76
|
+
i += char_encode(w, i, len, code)
|
54
77
|
when 'F', 'K', 'N'
|
55
78
|
i += gen_encode(w, i, w[i], w[i], code)
|
56
|
-
when 'G'
|
57
|
-
i +=
|
58
|
-
when 'H'
|
59
|
-
i += encode_h(w, i, len, code)
|
60
|
-
when 'J'
|
61
|
-
i += encode_j(w, i, len, code)
|
62
|
-
when 'L'
|
63
|
-
i += encode_l(w, i, len, code)
|
64
|
-
when 'M'
|
65
|
-
i += encode_m(w, i, len, code)
|
79
|
+
when 'G', 'H', 'J', 'L', 'M'
|
80
|
+
i += char_encode(w, i, len, code)
|
66
81
|
when 'Ñ', 'ñ'
|
67
82
|
code.add 'N', 'N'
|
68
83
|
i += 1
|
@@ -70,25 +85,17 @@ module Phonetic
|
|
70
85
|
i += encode_p(w, i, len, code)
|
71
86
|
when 'Q'
|
72
87
|
i += gen_encode(w, i, 'K', 'K', code)
|
73
|
-
when 'R'
|
74
|
-
i +=
|
75
|
-
when 'S'
|
76
|
-
i += encode_s(w, i, len, code)
|
77
|
-
when 'T'
|
78
|
-
i += encode_t(w, i, len, code)
|
88
|
+
when 'R', 'S', 'T'
|
89
|
+
i += char_encode(w, i, len, code)
|
79
90
|
when 'V'
|
80
91
|
i += gen_encode(w, i, 'F', 'F', code)
|
81
|
-
when 'W'
|
82
|
-
i +=
|
83
|
-
when 'X'
|
84
|
-
i += encode_x(w, i, len, code)
|
85
|
-
when 'Z'
|
86
|
-
i += encode_z(w, i, len, code)
|
92
|
+
when 'W', 'X', 'Z'
|
93
|
+
i += char_encode(w, i, len, code)
|
87
94
|
else
|
88
95
|
i += 1
|
89
96
|
end
|
90
97
|
end
|
91
|
-
|
98
|
+
code.results(code_size)
|
92
99
|
end
|
93
100
|
|
94
101
|
def self.encode(str, options = { size: 4 })
|
@@ -99,19 +106,12 @@ module Phonetic
|
|
99
106
|
|
100
107
|
def self.encode_start_of_word(w, code)
|
101
108
|
i = 0
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
i = 1
|
109
|
-
elsif w[0] =~ /[AEIOUY]/
|
110
|
-
code.add 'A', 'A' # all init vowels now map to 'A'
|
111
|
-
i = 1
|
112
|
-
elsif w[0, 6] == 'CAESAR' # special case 'caesar'
|
113
|
-
code.add 'S', 'S'
|
114
|
-
i = 1
|
109
|
+
START_OF_WORD_MAP.each do |r, v|
|
110
|
+
if w =~ r
|
111
|
+
code.add v[0], v[1]
|
112
|
+
i = v[2]
|
113
|
+
break
|
114
|
+
end
|
115
115
|
end
|
116
116
|
i
|
117
117
|
end
|
@@ -121,6 +121,10 @@ module Phonetic
|
|
121
121
|
w[i + 1] == w[i] ? 2 : 1
|
122
122
|
end
|
123
123
|
|
124
|
+
def self.char_encode(w, i, len, code)
|
125
|
+
self.send "encode_#{w[i].downcase}", w, i, len, code
|
126
|
+
end
|
127
|
+
|
124
128
|
def self.encode_c(w, i, len, code)
|
125
129
|
r = 1
|
126
130
|
case
|
@@ -129,8 +133,7 @@ module Phonetic
|
|
129
133
|
code.add 'K', 'K'
|
130
134
|
r += 1
|
131
135
|
when w[i, 2] == 'CH'
|
132
|
-
encode_ch(w, i, len, code)
|
133
|
-
r += 1
|
136
|
+
r += encode_ch(w, i, len, code)
|
134
137
|
when w[i, 2] == 'CZ' && !(i > 1 && w[i - 2, 4] == 'WICZ')
|
135
138
|
# e.g, 'czerny'
|
136
139
|
code.add 'S', 'X'
|
@@ -145,13 +148,12 @@ module Phonetic
|
|
145
148
|
when w[i, 2] =~ /C[KGQ]/
|
146
149
|
code.add 'K', 'K'
|
147
150
|
r += 1
|
151
|
+
# italian vs. english
|
152
|
+
when w[i, 3] =~ /CI[OEA]/
|
153
|
+
code.add 'S', 'X'
|
154
|
+
r += 1
|
148
155
|
when w[i, 2] =~ /C[IEY]/
|
149
|
-
|
150
|
-
if w[i, 3] =~ /CI[OEA]/
|
151
|
-
code.add 'S', 'X'
|
152
|
-
else
|
153
|
-
code.add 'S', 'S'
|
154
|
-
end
|
156
|
+
code.add 'S', 'S'
|
155
157
|
r += 1
|
156
158
|
else
|
157
159
|
code.add 'K', 'K'
|
@@ -167,17 +169,16 @@ module Phonetic
|
|
167
169
|
|
168
170
|
def self.encode_d(w, i, len, code)
|
169
171
|
r = 1
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
elsif w[i, 2] =~ /D[TD]/
|
172
|
+
case
|
173
|
+
when w[i + 1, 2] =~ /G[IEY]/
|
174
|
+
# e.g. 'edge'
|
175
|
+
code.add 'J', 'J'
|
176
|
+
r += 2
|
177
|
+
when w[i + 1] == 'G'
|
178
|
+
# e.g. 'edgar'
|
179
|
+
code.add 'TK', 'TK'
|
180
|
+
r += 1
|
181
|
+
when w[i + 1] =~ /[TD]/
|
181
182
|
code.add 'T', 'T'
|
182
183
|
r += 1
|
183
184
|
else
|
@@ -188,22 +189,19 @@ module Phonetic
|
|
188
189
|
|
189
190
|
def self.encode_g(w, i, len, code)
|
190
191
|
r = 2
|
191
|
-
|
192
|
+
case
|
193
|
+
when w[i + 1] == 'H'
|
192
194
|
encode_gh(w, i, code)
|
193
|
-
|
195
|
+
when w[i + 1] == 'N'
|
194
196
|
encode_gn(w, i, code)
|
195
197
|
# 'tagliaro'
|
196
|
-
|
198
|
+
when w[i + 1, 2] == 'LI' && !slavo_germanic?(w)
|
197
199
|
code.add 'KL', 'L'
|
198
|
-
# -ges-, -gep-, -gel-, -gie- at beginning
|
199
|
-
elsif i == 0 && w[1, 2] =~ /^Y|E[SPBLYIR]|I[BLNE]/
|
200
|
-
code.add 'K', 'J'
|
201
200
|
# -ger-, -gy-
|
202
|
-
|
201
|
+
when g_ger_or_gy?(w, i)
|
203
202
|
code.add 'K', 'J'
|
204
|
-
|
205
|
-
|
206
|
-
if w[0, 4] =~ /^(VAN |VON |SCH)/ || w[i + 1, 2] == 'ET'
|
203
|
+
when g_italian?(w, i)
|
204
|
+
if w[0, 4] =~ /^(V[AO]N\s|SCH)/ || w[i + 1, 2] == 'ET'
|
207
205
|
code.add 'K', 'K'
|
208
206
|
elsif w[i + 1, 4] =~ /IER\s/
|
209
207
|
code.add 'J', 'J'
|
@@ -219,8 +217,8 @@ module Phonetic
|
|
219
217
|
|
220
218
|
def self.encode_h(w, i, len, code)
|
221
219
|
r = 1
|
222
|
-
#
|
223
|
-
if
|
220
|
+
# keep if btw. 2 vowels
|
221
|
+
if i > 0 && vowel?(w[i - 1]) && vowel?(w[i + 1])
|
224
222
|
code.add 'H', 'H'
|
225
223
|
r += 1
|
226
224
|
end
|
@@ -307,39 +305,27 @@ module Phonetic
|
|
307
305
|
def self.encode_s(w, i, len, code)
|
308
306
|
r = 1
|
309
307
|
last = len - 1
|
308
|
+
case
|
310
309
|
# special cases 'island', 'isle', 'carlisle', 'carlysle'
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
code.add 'X', 'S'
|
315
|
-
elsif w[i, 2] == 'SH'
|
316
|
-
# germanic
|
317
|
-
if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
|
318
|
-
code.add 'S', 'S'
|
319
|
-
else
|
320
|
-
code.add 'X', 'X'
|
321
|
-
end
|
322
|
-
r += 1
|
310
|
+
when i > 0 && w[i - 1, 3] =~ /[IY]SL/
|
311
|
+
when w[i, 2] == 'SH'
|
312
|
+
r += encode_sh(w, i, code)
|
323
313
|
# italian & armenian
|
324
|
-
|
314
|
+
when w[i, 3] =~ /SI[OA]/
|
325
315
|
if !slavo_germanic?(w)
|
326
316
|
code.add 'S', 'X'
|
327
317
|
else
|
328
318
|
code.add 'S', 'S'
|
329
319
|
end
|
330
320
|
r += 2
|
331
|
-
#
|
332
|
-
|
333
|
-
# hungarian it is pronounced 's'
|
334
|
-
elsif i == 0 && w[i + 1] =~ /[MNLW]/ || w[i + 1] == 'Z'
|
321
|
+
# -sz- in slavic language altho in hungarian it is pronounced 's'
|
322
|
+
when w[i, 2] == 'SZ'
|
335
323
|
code.add 'S', 'X'
|
336
|
-
r += 1
|
337
|
-
|
338
|
-
encode_sc(w, i, code)
|
339
|
-
r += 2
|
340
|
-
# french e.g. 'resnais', 'artois'
|
324
|
+
r += 1
|
325
|
+
when w[i, 2] == 'SC'
|
326
|
+
r += encode_sc(w, i, code)
|
341
327
|
else
|
342
|
-
if
|
328
|
+
if s_french?(w, i, last)
|
343
329
|
code.add '', 'S'
|
344
330
|
else
|
345
331
|
code.add 'S', 'S'
|
@@ -377,18 +363,9 @@ module Phonetic
|
|
377
363
|
code.add 'R', 'R'
|
378
364
|
r += 1
|
379
365
|
else
|
380
|
-
if i == 0 && (vowel?(w[i + 1]) || w[i, 2] == 'WH')
|
381
|
-
# Wasserman should match Vasserman
|
382
|
-
if vowel?(w[i + 1])
|
383
|
-
code.add 'A', 'F'
|
384
|
-
else
|
385
|
-
# need Uomo to match Womo
|
386
|
-
code.add 'A', 'A'
|
387
|
-
end
|
388
|
-
end
|
389
366
|
# Arnow should match Arnoff
|
390
367
|
if i == last && i > 0 && vowel?(w[i - 1]) ||
|
391
|
-
i > 0 && w[i - 1, 5] =~ /
|
368
|
+
i > 0 && w[i - 1, 5] =~ /[EO]WSK[IY]/ ||
|
392
369
|
w[0, 3] == 'SCH'
|
393
370
|
code.add '', 'F'
|
394
371
|
elsif w[i, 4] =~ /WICZ|WITZ/
|
@@ -432,9 +409,6 @@ module Phonetic
|
|
432
409
|
# find 'michael'
|
433
410
|
when i > 0 && w[i, 4] == 'CHAE'
|
434
411
|
code.add 'K', 'X'
|
435
|
-
# greek roots e.g. 'chemistry', 'chorus'
|
436
|
-
when ch_greek_roots?(w, i)
|
437
|
-
code.add 'K', 'K'
|
438
412
|
# germanic, greek, or otherwise 'ch' for 'kh' sound
|
439
413
|
when ch_germanic_or_greek?(w, i, len)
|
440
414
|
code.add 'K', 'K'
|
@@ -446,6 +420,7 @@ module Phonetic
|
|
446
420
|
else
|
447
421
|
code.add 'X', 'K'
|
448
422
|
end
|
423
|
+
1
|
449
424
|
end
|
450
425
|
|
451
426
|
def self.encode_cc(w, i, code)
|
@@ -470,19 +445,12 @@ module Phonetic
|
|
470
445
|
def self.encode_gh(w, i, code)
|
471
446
|
if i > 0 && !vowel?(w[i - 1])
|
472
447
|
code.add 'K', 'K'
|
473
|
-
elsif i == 0
|
474
|
-
# ghislane, ghiradelli
|
475
|
-
if w[i + 2] == 'I'
|
476
|
-
code.add 'J', 'J'
|
477
|
-
else
|
478
|
-
code.add 'K', 'K'
|
479
|
-
end
|
480
448
|
# Parker's rule (with some further refinements)
|
481
449
|
elsif !(i > 1 && w[i - 2] =~ /[BHD]/ || # e.g., 'hugh'
|
482
450
|
i > 2 && w[i - 3] =~ /[BHD]/ || # e.g., 'bough'
|
483
|
-
i > 3 && w[i - 4] =~ /[BH]/)
|
451
|
+
i > 3 && w[i - 4] =~ /[BH]/) # e.g., 'broughton'
|
484
452
|
# e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
|
485
|
-
if i > 2 && w[i -
|
453
|
+
if i > 2 && w[i - 3, 3] =~ /[CGLRT].U/
|
486
454
|
code.add 'F', 'F'
|
487
455
|
elsif i > 0 && w[i - 1] != 'I'
|
488
456
|
code.add 'K', 'K'
|
@@ -501,6 +469,16 @@ module Phonetic
|
|
501
469
|
end
|
502
470
|
end
|
503
471
|
|
472
|
+
def self.encode_sh(w, i, code)
|
473
|
+
# germanic
|
474
|
+
if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
|
475
|
+
code.add 'S', 'S'
|
476
|
+
else
|
477
|
+
code.add 'X', 'X'
|
478
|
+
end
|
479
|
+
1
|
480
|
+
end
|
481
|
+
|
504
482
|
def self.encode_sc(w, i, code)
|
505
483
|
# Schlesinger's rule
|
506
484
|
if w[i + 2] == 'H'
|
@@ -520,6 +498,7 @@ module Phonetic
|
|
520
498
|
else
|
521
499
|
code.add 'SK', 'SK'
|
522
500
|
end
|
501
|
+
2
|
523
502
|
end
|
524
503
|
|
525
504
|
def self.slavo_germanic?(w)
|
@@ -532,15 +511,7 @@ module Phonetic
|
|
532
511
|
|
533
512
|
def self.c_germanic?(w, i)
|
534
513
|
# various germanic
|
535
|
-
i > 1 &&
|
536
|
-
!vowel?(w[i - 2]) &&
|
537
|
-
w[i - 1, 3] == 'ACH' &&
|
538
|
-
(w[i + 2] !~ /[IE]/ || w[i - 2, 6] =~ /[BM]ACHER/)
|
539
|
-
end
|
540
|
-
|
541
|
-
def self.ch_greek_roots?(w, i)
|
542
|
-
# greek roots e.g. 'chemistry', 'chorus'
|
543
|
-
i == 0 && w[1, 5] =~ /^H(ARAC|ARIS|OR|YM|IA|EM)/ && w[0, 5] != 'CHORE'
|
514
|
+
i > 1 && w[i - 2, 6] =~ /(^[^AEIOUY]ACH[^IE])|([BM]ACHER)/
|
544
515
|
end
|
545
516
|
|
546
517
|
def self.ch_germanic_or_greek?(w, i, len)
|
@@ -562,6 +533,11 @@ module Phonetic
|
|
562
533
|
!(i > 0 && w[i - 1, 3] =~ /[RO]GY/)
|
563
534
|
end
|
564
535
|
|
536
|
+
def self.g_italian?(w, i)
|
537
|
+
# italian e.g, 'biaggi'
|
538
|
+
w[i + 1] =~ /[EIY]/ || (i > 0 && w[i - 1, 4] =~ /[AO]GGI/)
|
539
|
+
end
|
540
|
+
|
565
541
|
def self.j_spanish_pron?(w, i)
|
566
542
|
# spanish pron. of e.g. 'bajador'
|
567
543
|
i > 0 && vowel?(w[i - 1]) && !slavo_germanic?(w) && w[i + 1] =~ /[AO]/
|
@@ -582,6 +558,11 @@ module Phonetic
|
|
582
558
|
!(i > 3 && w[i - 4, 2] =~ /M[EA]/)
|
583
559
|
end
|
584
560
|
|
561
|
+
def self.s_french?(w, i, last)
|
562
|
+
# french e.g. 'resnais', 'artois'
|
563
|
+
i == last && i > 1 && w[i - 2, 2] =~ /[AO]I/
|
564
|
+
end
|
565
|
+
|
585
566
|
def self.x_french?(w, i, last)
|
586
567
|
# french e.g. breaux
|
587
568
|
i == last && (i > 2 && w[i - 3, 3] =~ /[IE]AU/ || i > 1 && w[i - 2, 2] =~ /[AO]U/)
|