phonetic 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +9 -5
- data/.yardopts +5 -0
- data/CHANGELOG.md +14 -0
- data/README.md +123 -109
- data/lib/phonetic.rb +1 -0
- data/lib/phonetic/core_ext/string/nysiis.rb +1 -1
- data/lib/phonetic/core_ext/string/refined_nysiis.rb +12 -0
- data/lib/phonetic/dm_soundex.rb +4 -21
- data/lib/phonetic/dm_soundex/code.rb +30 -0
- data/lib/phonetic/{dm_soundex_map.rb → dm_soundex/map.rb} +0 -0
- data/lib/phonetic/double_metaphone.rb +111 -130
- data/lib/phonetic/double_metaphone/code.rb +28 -0
- data/lib/phonetic/metaphone.rb +123 -87
- data/lib/phonetic/refined_nysiis.rb +72 -0
- data/lib/phonetic/version.rb +1 -1
- data/phonetic.gemspec +29 -27
- data/spec/phonetic/caverphone2_spec.rb +2 -53
- data/spec/phonetic/caverphone_spec.rb +2 -104
- data/spec/phonetic/core_ext/string/refined_nysiis_spec.rb +9 -0
- data/spec/phonetic/double_metaphone_spec.rb +3 -2
- data/spec/phonetic/refined_nysiis_spec.rb +30 -0
- data/spec/spec_helper.rb +6 -5
- data/spec/support/caverphone2_data.rb +53 -0
- data/spec/support/caverphone_data.rb +104 -0
- data/spec/support/double_metaphone_data.rb +5 -0
- data/spec/support/refined_nysiis_data.rb +49 -0
- metadata +20 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 98bef8e122a5abed59eee25d4e9e4a2475aef89b
|
|
4
|
+
data.tar.gz: 9a4656b92c3e81f507ab5ffdcbd55a701728ff05
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aea70d4160ade24bfd89370b06ba2381d7e444f8e59f361c48c48209e77de10b5dde42ef04e77a67558f3be5e6f3379812618e5a3c91f165e37aa5378e6b4acf
|
|
7
|
+
data.tar.gz: 17af435c3b3d7c8603a0a5a2de323671f04eefe5cbec6100e89bef862cfe4beab14fa63f3449b83233e001479f0d52a30b8f448400281db33f2789669b7cb31b
|
data/.travis.yml
CHANGED
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
|
@@ -1,109 +1,123 @@
|
|
|
1
|
-
# Phonetic
|
|
2
|
-
[](https://travis-ci.org/n7v/phonetic)
|
|
3
|
-
[](http://badge.fury.io/rb/phonetic)
|
|
4
|
-
[](https://coveralls.io/r/n7v/phonetic)
|
|
5
|
-
[](https://codeclimate.com/github/n7v/phonetic)
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
'
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
1
|
+
# Phonetic
|
|
2
|
+
[](https://travis-ci.org/n7v/phonetic)
|
|
3
|
+
[](http://badge.fury.io/rb/phonetic)
|
|
4
|
+
[](https://coveralls.io/r/n7v/phonetic)
|
|
5
|
+
[](https://codeclimate.com/github/n7v/phonetic)
|
|
6
|
+
[](https://gemnasium.com/n7v/phonetic)
|
|
7
|
+
|
|
8
|
+
Ruby library for phonetic algorithms.
|
|
9
|
+
It supports Soundex, Metaphone, Double Metaphone, Caverphone, NYSIIS and others.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
Add this line to your application's Gemfile:
|
|
14
|
+
|
|
15
|
+
gem 'phonetic'
|
|
16
|
+
|
|
17
|
+
And then execute:
|
|
18
|
+
|
|
19
|
+
```shell
|
|
20
|
+
$ bundle
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or install it yourself as:
|
|
24
|
+
|
|
25
|
+
```shell
|
|
26
|
+
$ gem install phonetic
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Dependencies
|
|
30
|
+
|
|
31
|
+
Ruby >= 1.9, JRuby 1.7.6, Rubinius 2.1.1
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
require 'phonetic'
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### Soundex
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
'Ackerman'.soundex # => 'A265'
|
|
43
|
+
'ammonium'.soundex # => 'A500'
|
|
44
|
+
'implementation'.soundex # => 'I514'
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Refined Soundex
|
|
48
|
+
|
|
49
|
+
```ruby
|
|
50
|
+
'Caren'.refined_soundex # => 'C30908'
|
|
51
|
+
'Hayers'.refined_soundex # => 'H093'
|
|
52
|
+
'Lambard'.refined_soundex # => 'L7081096'
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### Metaphone
|
|
56
|
+
|
|
57
|
+
```ruby
|
|
58
|
+
'Accola'.metaphone # => 'AKKL'
|
|
59
|
+
'Nikki'.metaphone # => 'NK'
|
|
60
|
+
'Wright'.metaphone #=> 'RT'
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Double Metaphone
|
|
64
|
+
|
|
65
|
+
```ruby
|
|
66
|
+
'czerny'.double_metaphone # => ['SRN', 'XRN']
|
|
67
|
+
'dumb'.double_metaphone # => ['TM', 'TM']
|
|
68
|
+
'edgar'.double_metaphone # => ['ATKR', 'ATKR']
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
or use alias:
|
|
72
|
+
|
|
73
|
+
```ruby
|
|
74
|
+
'czerny'.metaphone2 # => ['SRN', 'XRN']
|
|
75
|
+
'dumb'.metaphone2 # => ['TM', 'TM']
|
|
76
|
+
'edgar'.metaphone2 # => ['ATKR', 'ATKR']
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Caverphone
|
|
80
|
+
|
|
81
|
+
```ruby
|
|
82
|
+
'Lashaunda'.caverphone # => 'LSNT11'
|
|
83
|
+
'Vidaurri'.caverphone # => 'FTR111'
|
|
84
|
+
````
|
|
85
|
+
|
|
86
|
+
### Caverphone 2
|
|
87
|
+
|
|
88
|
+
```ruby
|
|
89
|
+
'Stevenson'.caverphone2 # => 'STFNSN1111'
|
|
90
|
+
'Peter'.caverphone2 # => 'PTA1111111'
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### NYSIIS
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
'Alexandra'.nysiis # => 'ALAXANDR'
|
|
97
|
+
'Aumont'.nysiis # => 'AANAD'
|
|
98
|
+
'Bonnie'.nysiis # => 'BANY'
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Refined NYSIIS
|
|
102
|
+
|
|
103
|
+
```ruby
|
|
104
|
+
'Aumont'.refined_nysiis # => 'ANAD'
|
|
105
|
+
'Phoenix'.refined_nysiis # => 'FANAC'
|
|
106
|
+
'Schmidt'.refined_nysiis # => 'SNAD'
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Daitch–Mokotoff Soundex (D–M Soundex)
|
|
110
|
+
|
|
111
|
+
```ruby
|
|
112
|
+
'Anja'.dm_soundex # => ['060000', '064000']
|
|
113
|
+
'Schwarz'.dm_soundex # => ['474000', '479400']
|
|
114
|
+
'Schtolteheim'.dm_soundex # => ['283560']
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Contributing
|
|
118
|
+
|
|
119
|
+
1. Fork it
|
|
120
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
|
121
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
|
122
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
|
123
|
+
5. Create new Pull Request
|
data/lib/phonetic.rb
CHANGED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
require 'phonetic/refined_nysiis'
|
|
2
|
+
|
|
3
|
+
class String
|
|
4
|
+
# Refined NYSIIS value of string.
|
|
5
|
+
# @example
|
|
6
|
+
# 'Aumont'.refined_nysiis # => 'ANAD'
|
|
7
|
+
# 'Phoenix'.refined_nysiis # => 'FANAC'
|
|
8
|
+
# 'Schmidt'.refined_nysiis # => 'SNAD'
|
|
9
|
+
def refined_nysiis(options = { trim: true })
|
|
10
|
+
Phonetic::RefinedNYSIIS.encode(self, options)
|
|
11
|
+
end
|
|
12
|
+
end
|
data/lib/phonetic/dm_soundex.rb
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
require 'phonetic/algorithm'
|
|
2
|
-
require 'phonetic/
|
|
2
|
+
require 'phonetic/dm_soundex/map'
|
|
3
|
+
require 'phonetic/dm_soundex/code'
|
|
3
4
|
|
|
4
5
|
module Phonetic
|
|
5
6
|
# Daitch–Mokotoff Soundex (D–M Soundex) is a phonetic algorithm invented
|
|
@@ -19,7 +20,7 @@ module Phonetic
|
|
|
19
20
|
def self.encode_word(word, options = {})
|
|
20
21
|
w = word.strip.upcase.gsub(/[^A-Z]+/, '')
|
|
21
22
|
i = 0
|
|
22
|
-
code =
|
|
23
|
+
code = Code.new
|
|
23
24
|
while i < w.size
|
|
24
25
|
if w[i] != w[i + 1]
|
|
25
26
|
c = find_code(MAP, w, i)
|
|
@@ -37,29 +38,11 @@ module Phonetic
|
|
|
37
38
|
end
|
|
38
39
|
i += 1
|
|
39
40
|
end
|
|
40
|
-
code.
|
|
41
|
+
code.results
|
|
41
42
|
end
|
|
42
43
|
|
|
43
44
|
private
|
|
44
45
|
|
|
45
|
-
def self.init_code
|
|
46
|
-
code = [[]]
|
|
47
|
-
def code.add(a)
|
|
48
|
-
case a
|
|
49
|
-
when Array
|
|
50
|
-
c = self.map{|w| w.last != a[1] ? w + [a[1]] : w}
|
|
51
|
-
self.map!{|w| w.last != a[0] ? w + [a[0]] : w}
|
|
52
|
-
self.push(*c)
|
|
53
|
-
else
|
|
54
|
-
self.map!{|w| w.last != a ? w + [a] : w}
|
|
55
|
-
end
|
|
56
|
-
end
|
|
57
|
-
def code.result
|
|
58
|
-
self.map{|w| w.join[0..5].ljust(6, '0')}.uniq
|
|
59
|
-
end
|
|
60
|
-
code
|
|
61
|
-
end
|
|
62
|
-
|
|
63
46
|
def self.find_code(map, w, i, last = nil, count = 0)
|
|
64
47
|
elem = map[w[i]]
|
|
65
48
|
r = case elem
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
module Phonetic
|
|
2
|
+
class DMSoundex
|
|
3
|
+
class Code
|
|
4
|
+
def initialize
|
|
5
|
+
@codes = [[]]
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def add(a)
|
|
9
|
+
case a
|
|
10
|
+
when Array
|
|
11
|
+
c1 = add_code(a[0])
|
|
12
|
+
c2 = add_code(a[1])
|
|
13
|
+
@codes = c1 + c2
|
|
14
|
+
else
|
|
15
|
+
@codes = add_code(a)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def results
|
|
20
|
+
@codes.map{|w| w.join[0..5].ljust(6, '0')}.uniq
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def add_code(code)
|
|
26
|
+
@codes.map{|w| w.last != code ? w + [code] : w}
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
File without changes
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# encoding: utf-8
|
|
2
2
|
|
|
3
3
|
require 'phonetic/algorithm'
|
|
4
|
+
require 'phonetic/double_metaphone/code'
|
|
4
5
|
|
|
5
6
|
module Phonetic
|
|
6
7
|
# The Double Metaphone phonetic encoding algorithm is the second generation
|
|
@@ -22,15 +23,39 @@ module Phonetic
|
|
|
22
23
|
# Phonetic::Metaphone2.encode('dumb') # => ['TM', 'TM']
|
|
23
24
|
# Phonetic::Metaphone2.encode('edgar') # => ['ATKR', 'ATKR']
|
|
24
25
|
class DoubleMetaphone < Algorithm
|
|
26
|
+
START_OF_WORD_MAP = {
|
|
27
|
+
# skip these when at start of word
|
|
28
|
+
/^([GKP]N|WR|PS)/ => ['', '', 1],
|
|
29
|
+
# initial 'X' is pronounced 'Z' e.g. 'Xavier'
|
|
30
|
+
/^X/ => ['S', 'S', 1],
|
|
31
|
+
# all init vowels now map to 'A'
|
|
32
|
+
/^[AEIOUY]/ => ['A', 'A', 1],
|
|
33
|
+
# special case 'caesar'
|
|
34
|
+
/^CAESAR/ => ['S', 'S', 1],
|
|
35
|
+
# special case 'sugar-'
|
|
36
|
+
/^SUGAR/ => ['X', 'S', 1],
|
|
37
|
+
# -ges-, -gep-, -gel-, -gie- at beginning
|
|
38
|
+
/^G(Y|E[SPBLYIR]|I[BLNE])/ => ['K', 'J', 2],
|
|
39
|
+
# keep H if first & before vowel
|
|
40
|
+
/^H[AEIOUY]/ => ['H', 'H', 2],
|
|
41
|
+
# german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
|
|
42
|
+
/^S[MNLW]/ => ['S', 'X', 1],
|
|
43
|
+
# ghislane, ghiradelli
|
|
44
|
+
/^GHI/ => ['J', 'J', 2],
|
|
45
|
+
/^GH/ => ['K', 'K', 2],
|
|
46
|
+
# greek roots e.g. 'chemistry', 'chorus'
|
|
47
|
+
/^CH(ARAC|ARIS|OR[^E]|YM|EM)/ => ['K', 'K', 2],
|
|
48
|
+
# Wasserman should match Vasserman
|
|
49
|
+
/^W[AEIOUY]/ => ['A', 'F', 0],
|
|
50
|
+
# need Uomo to match Womo
|
|
51
|
+
/^WH/ => ['A', 'A', 0]
|
|
52
|
+
}
|
|
53
|
+
|
|
25
54
|
# Encode word to its Double Metaphone code.
|
|
26
55
|
def self.encode_word(word, options = { size: 4 })
|
|
27
56
|
code_size = options[:size] || 4
|
|
28
57
|
w = word.strip.upcase
|
|
29
|
-
code =
|
|
30
|
-
def code.add(primary, secondary)
|
|
31
|
-
self[0] += primary
|
|
32
|
-
self[1] += secondary
|
|
33
|
-
end
|
|
58
|
+
code = Code.new
|
|
34
59
|
i = 0
|
|
35
60
|
len = w.size
|
|
36
61
|
last = len - 1
|
|
@@ -47,22 +72,12 @@ module Phonetic
|
|
|
47
72
|
when 'Ç', 'ç'
|
|
48
73
|
code.add 'S', 'S'
|
|
49
74
|
i += 1
|
|
50
|
-
when 'C'
|
|
51
|
-
i +=
|
|
52
|
-
when 'D'
|
|
53
|
-
i += encode_d(w, i, len, code)
|
|
75
|
+
when 'C', 'D'
|
|
76
|
+
i += char_encode(w, i, len, code)
|
|
54
77
|
when 'F', 'K', 'N'
|
|
55
78
|
i += gen_encode(w, i, w[i], w[i], code)
|
|
56
|
-
when 'G'
|
|
57
|
-
i +=
|
|
58
|
-
when 'H'
|
|
59
|
-
i += encode_h(w, i, len, code)
|
|
60
|
-
when 'J'
|
|
61
|
-
i += encode_j(w, i, len, code)
|
|
62
|
-
when 'L'
|
|
63
|
-
i += encode_l(w, i, len, code)
|
|
64
|
-
when 'M'
|
|
65
|
-
i += encode_m(w, i, len, code)
|
|
79
|
+
when 'G', 'H', 'J', 'L', 'M'
|
|
80
|
+
i += char_encode(w, i, len, code)
|
|
66
81
|
when 'Ñ', 'ñ'
|
|
67
82
|
code.add 'N', 'N'
|
|
68
83
|
i += 1
|
|
@@ -70,25 +85,17 @@ module Phonetic
|
|
|
70
85
|
i += encode_p(w, i, len, code)
|
|
71
86
|
when 'Q'
|
|
72
87
|
i += gen_encode(w, i, 'K', 'K', code)
|
|
73
|
-
when 'R'
|
|
74
|
-
i +=
|
|
75
|
-
when 'S'
|
|
76
|
-
i += encode_s(w, i, len, code)
|
|
77
|
-
when 'T'
|
|
78
|
-
i += encode_t(w, i, len, code)
|
|
88
|
+
when 'R', 'S', 'T'
|
|
89
|
+
i += char_encode(w, i, len, code)
|
|
79
90
|
when 'V'
|
|
80
91
|
i += gen_encode(w, i, 'F', 'F', code)
|
|
81
|
-
when 'W'
|
|
82
|
-
i +=
|
|
83
|
-
when 'X'
|
|
84
|
-
i += encode_x(w, i, len, code)
|
|
85
|
-
when 'Z'
|
|
86
|
-
i += encode_z(w, i, len, code)
|
|
92
|
+
when 'W', 'X', 'Z'
|
|
93
|
+
i += char_encode(w, i, len, code)
|
|
87
94
|
else
|
|
88
95
|
i += 1
|
|
89
96
|
end
|
|
90
97
|
end
|
|
91
|
-
|
|
98
|
+
code.results(code_size)
|
|
92
99
|
end
|
|
93
100
|
|
|
94
101
|
def self.encode(str, options = { size: 4 })
|
|
@@ -99,19 +106,12 @@ module Phonetic
|
|
|
99
106
|
|
|
100
107
|
def self.encode_start_of_word(w, code)
|
|
101
108
|
i = 0
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
i = 1
|
|
109
|
-
elsif w[0] =~ /[AEIOUY]/
|
|
110
|
-
code.add 'A', 'A' # all init vowels now map to 'A'
|
|
111
|
-
i = 1
|
|
112
|
-
elsif w[0, 6] == 'CAESAR' # special case 'caesar'
|
|
113
|
-
code.add 'S', 'S'
|
|
114
|
-
i = 1
|
|
109
|
+
START_OF_WORD_MAP.each do |r, v|
|
|
110
|
+
if w =~ r
|
|
111
|
+
code.add v[0], v[1]
|
|
112
|
+
i = v[2]
|
|
113
|
+
break
|
|
114
|
+
end
|
|
115
115
|
end
|
|
116
116
|
i
|
|
117
117
|
end
|
|
@@ -121,6 +121,10 @@ module Phonetic
|
|
|
121
121
|
w[i + 1] == w[i] ? 2 : 1
|
|
122
122
|
end
|
|
123
123
|
|
|
124
|
+
def self.char_encode(w, i, len, code)
|
|
125
|
+
self.send "encode_#{w[i].downcase}", w, i, len, code
|
|
126
|
+
end
|
|
127
|
+
|
|
124
128
|
def self.encode_c(w, i, len, code)
|
|
125
129
|
r = 1
|
|
126
130
|
case
|
|
@@ -129,8 +133,7 @@ module Phonetic
|
|
|
129
133
|
code.add 'K', 'K'
|
|
130
134
|
r += 1
|
|
131
135
|
when w[i, 2] == 'CH'
|
|
132
|
-
encode_ch(w, i, len, code)
|
|
133
|
-
r += 1
|
|
136
|
+
r += encode_ch(w, i, len, code)
|
|
134
137
|
when w[i, 2] == 'CZ' && !(i > 1 && w[i - 2, 4] == 'WICZ')
|
|
135
138
|
# e.g, 'czerny'
|
|
136
139
|
code.add 'S', 'X'
|
|
@@ -145,13 +148,12 @@ module Phonetic
|
|
|
145
148
|
when w[i, 2] =~ /C[KGQ]/
|
|
146
149
|
code.add 'K', 'K'
|
|
147
150
|
r += 1
|
|
151
|
+
# italian vs. english
|
|
152
|
+
when w[i, 3] =~ /CI[OEA]/
|
|
153
|
+
code.add 'S', 'X'
|
|
154
|
+
r += 1
|
|
148
155
|
when w[i, 2] =~ /C[IEY]/
|
|
149
|
-
|
|
150
|
-
if w[i, 3] =~ /CI[OEA]/
|
|
151
|
-
code.add 'S', 'X'
|
|
152
|
-
else
|
|
153
|
-
code.add 'S', 'S'
|
|
154
|
-
end
|
|
156
|
+
code.add 'S', 'S'
|
|
155
157
|
r += 1
|
|
156
158
|
else
|
|
157
159
|
code.add 'K', 'K'
|
|
@@ -167,17 +169,16 @@ module Phonetic
|
|
|
167
169
|
|
|
168
170
|
def self.encode_d(w, i, len, code)
|
|
169
171
|
r = 1
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
elsif w[i, 2] =~ /D[TD]/
|
|
172
|
+
case
|
|
173
|
+
when w[i + 1, 2] =~ /G[IEY]/
|
|
174
|
+
# e.g. 'edge'
|
|
175
|
+
code.add 'J', 'J'
|
|
176
|
+
r += 2
|
|
177
|
+
when w[i + 1] == 'G'
|
|
178
|
+
# e.g. 'edgar'
|
|
179
|
+
code.add 'TK', 'TK'
|
|
180
|
+
r += 1
|
|
181
|
+
when w[i + 1] =~ /[TD]/
|
|
181
182
|
code.add 'T', 'T'
|
|
182
183
|
r += 1
|
|
183
184
|
else
|
|
@@ -188,22 +189,19 @@ module Phonetic
|
|
|
188
189
|
|
|
189
190
|
def self.encode_g(w, i, len, code)
|
|
190
191
|
r = 2
|
|
191
|
-
|
|
192
|
+
case
|
|
193
|
+
when w[i + 1] == 'H'
|
|
192
194
|
encode_gh(w, i, code)
|
|
193
|
-
|
|
195
|
+
when w[i + 1] == 'N'
|
|
194
196
|
encode_gn(w, i, code)
|
|
195
197
|
# 'tagliaro'
|
|
196
|
-
|
|
198
|
+
when w[i + 1, 2] == 'LI' && !slavo_germanic?(w)
|
|
197
199
|
code.add 'KL', 'L'
|
|
198
|
-
# -ges-, -gep-, -gel-, -gie- at beginning
|
|
199
|
-
elsif i == 0 && w[1, 2] =~ /^Y|E[SPBLYIR]|I[BLNE]/
|
|
200
|
-
code.add 'K', 'J'
|
|
201
200
|
# -ger-, -gy-
|
|
202
|
-
|
|
201
|
+
when g_ger_or_gy?(w, i)
|
|
203
202
|
code.add 'K', 'J'
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
if w[0, 4] =~ /^(VAN |VON |SCH)/ || w[i + 1, 2] == 'ET'
|
|
203
|
+
when g_italian?(w, i)
|
|
204
|
+
if w[0, 4] =~ /^(V[AO]N\s|SCH)/ || w[i + 1, 2] == 'ET'
|
|
207
205
|
code.add 'K', 'K'
|
|
208
206
|
elsif w[i + 1, 4] =~ /IER\s/
|
|
209
207
|
code.add 'J', 'J'
|
|
@@ -219,8 +217,8 @@ module Phonetic
|
|
|
219
217
|
|
|
220
218
|
def self.encode_h(w, i, len, code)
|
|
221
219
|
r = 1
|
|
222
|
-
#
|
|
223
|
-
if
|
|
220
|
+
# keep if btw. 2 vowels
|
|
221
|
+
if i > 0 && vowel?(w[i - 1]) && vowel?(w[i + 1])
|
|
224
222
|
code.add 'H', 'H'
|
|
225
223
|
r += 1
|
|
226
224
|
end
|
|
@@ -307,39 +305,27 @@ module Phonetic
|
|
|
307
305
|
def self.encode_s(w, i, len, code)
|
|
308
306
|
r = 1
|
|
309
307
|
last = len - 1
|
|
308
|
+
case
|
|
310
309
|
# special cases 'island', 'isle', 'carlisle', 'carlysle'
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
code.add 'X', 'S'
|
|
315
|
-
elsif w[i, 2] == 'SH'
|
|
316
|
-
# germanic
|
|
317
|
-
if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
|
|
318
|
-
code.add 'S', 'S'
|
|
319
|
-
else
|
|
320
|
-
code.add 'X', 'X'
|
|
321
|
-
end
|
|
322
|
-
r += 1
|
|
310
|
+
when i > 0 && w[i - 1, 3] =~ /[IY]SL/
|
|
311
|
+
when w[i, 2] == 'SH'
|
|
312
|
+
r += encode_sh(w, i, code)
|
|
323
313
|
# italian & armenian
|
|
324
|
-
|
|
314
|
+
when w[i, 3] =~ /SI[OA]/
|
|
325
315
|
if !slavo_germanic?(w)
|
|
326
316
|
code.add 'S', 'X'
|
|
327
317
|
else
|
|
328
318
|
code.add 'S', 'S'
|
|
329
319
|
end
|
|
330
320
|
r += 2
|
|
331
|
-
#
|
|
332
|
-
|
|
333
|
-
# hungarian it is pronounced 's'
|
|
334
|
-
elsif i == 0 && w[i + 1] =~ /[MNLW]/ || w[i + 1] == 'Z'
|
|
321
|
+
# -sz- in slavic language altho in hungarian it is pronounced 's'
|
|
322
|
+
when w[i, 2] == 'SZ'
|
|
335
323
|
code.add 'S', 'X'
|
|
336
|
-
r += 1
|
|
337
|
-
|
|
338
|
-
encode_sc(w, i, code)
|
|
339
|
-
r += 2
|
|
340
|
-
# french e.g. 'resnais', 'artois'
|
|
324
|
+
r += 1
|
|
325
|
+
when w[i, 2] == 'SC'
|
|
326
|
+
r += encode_sc(w, i, code)
|
|
341
327
|
else
|
|
342
|
-
if
|
|
328
|
+
if s_french?(w, i, last)
|
|
343
329
|
code.add '', 'S'
|
|
344
330
|
else
|
|
345
331
|
code.add 'S', 'S'
|
|
@@ -377,18 +363,9 @@ module Phonetic
|
|
|
377
363
|
code.add 'R', 'R'
|
|
378
364
|
r += 1
|
|
379
365
|
else
|
|
380
|
-
if i == 0 && (vowel?(w[i + 1]) || w[i, 2] == 'WH')
|
|
381
|
-
# Wasserman should match Vasserman
|
|
382
|
-
if vowel?(w[i + 1])
|
|
383
|
-
code.add 'A', 'F'
|
|
384
|
-
else
|
|
385
|
-
# need Uomo to match Womo
|
|
386
|
-
code.add 'A', 'A'
|
|
387
|
-
end
|
|
388
|
-
end
|
|
389
366
|
# Arnow should match Arnoff
|
|
390
367
|
if i == last && i > 0 && vowel?(w[i - 1]) ||
|
|
391
|
-
i > 0 && w[i - 1, 5] =~ /
|
|
368
|
+
i > 0 && w[i - 1, 5] =~ /[EO]WSK[IY]/ ||
|
|
392
369
|
w[0, 3] == 'SCH'
|
|
393
370
|
code.add '', 'F'
|
|
394
371
|
elsif w[i, 4] =~ /WICZ|WITZ/
|
|
@@ -432,9 +409,6 @@ module Phonetic
|
|
|
432
409
|
# find 'michael'
|
|
433
410
|
when i > 0 && w[i, 4] == 'CHAE'
|
|
434
411
|
code.add 'K', 'X'
|
|
435
|
-
# greek roots e.g. 'chemistry', 'chorus'
|
|
436
|
-
when ch_greek_roots?(w, i)
|
|
437
|
-
code.add 'K', 'K'
|
|
438
412
|
# germanic, greek, or otherwise 'ch' for 'kh' sound
|
|
439
413
|
when ch_germanic_or_greek?(w, i, len)
|
|
440
414
|
code.add 'K', 'K'
|
|
@@ -446,6 +420,7 @@ module Phonetic
|
|
|
446
420
|
else
|
|
447
421
|
code.add 'X', 'K'
|
|
448
422
|
end
|
|
423
|
+
1
|
|
449
424
|
end
|
|
450
425
|
|
|
451
426
|
def self.encode_cc(w, i, code)
|
|
@@ -470,19 +445,12 @@ module Phonetic
|
|
|
470
445
|
def self.encode_gh(w, i, code)
|
|
471
446
|
if i > 0 && !vowel?(w[i - 1])
|
|
472
447
|
code.add 'K', 'K'
|
|
473
|
-
elsif i == 0
|
|
474
|
-
# ghislane, ghiradelli
|
|
475
|
-
if w[i + 2] == 'I'
|
|
476
|
-
code.add 'J', 'J'
|
|
477
|
-
else
|
|
478
|
-
code.add 'K', 'K'
|
|
479
|
-
end
|
|
480
448
|
# Parker's rule (with some further refinements)
|
|
481
449
|
elsif !(i > 1 && w[i - 2] =~ /[BHD]/ || # e.g., 'hugh'
|
|
482
450
|
i > 2 && w[i - 3] =~ /[BHD]/ || # e.g., 'bough'
|
|
483
|
-
i > 3 && w[i - 4] =~ /[BH]/)
|
|
451
|
+
i > 3 && w[i - 4] =~ /[BH]/) # e.g., 'broughton'
|
|
484
452
|
# e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
|
|
485
|
-
if i > 2 && w[i -
|
|
453
|
+
if i > 2 && w[i - 3, 3] =~ /[CGLRT].U/
|
|
486
454
|
code.add 'F', 'F'
|
|
487
455
|
elsif i > 0 && w[i - 1] != 'I'
|
|
488
456
|
code.add 'K', 'K'
|
|
@@ -501,6 +469,16 @@ module Phonetic
|
|
|
501
469
|
end
|
|
502
470
|
end
|
|
503
471
|
|
|
472
|
+
def self.encode_sh(w, i, code)
|
|
473
|
+
# germanic
|
|
474
|
+
if w[i + 1, 4] =~ /H(EIM|OEK|OL[MZ])/
|
|
475
|
+
code.add 'S', 'S'
|
|
476
|
+
else
|
|
477
|
+
code.add 'X', 'X'
|
|
478
|
+
end
|
|
479
|
+
1
|
|
480
|
+
end
|
|
481
|
+
|
|
504
482
|
def self.encode_sc(w, i, code)
|
|
505
483
|
# Schlesinger's rule
|
|
506
484
|
if w[i + 2] == 'H'
|
|
@@ -520,6 +498,7 @@ module Phonetic
|
|
|
520
498
|
else
|
|
521
499
|
code.add 'SK', 'SK'
|
|
522
500
|
end
|
|
501
|
+
2
|
|
523
502
|
end
|
|
524
503
|
|
|
525
504
|
def self.slavo_germanic?(w)
|
|
@@ -532,15 +511,7 @@ module Phonetic
|
|
|
532
511
|
|
|
533
512
|
def self.c_germanic?(w, i)
|
|
534
513
|
# various germanic
|
|
535
|
-
i > 1 &&
|
|
536
|
-
!vowel?(w[i - 2]) &&
|
|
537
|
-
w[i - 1, 3] == 'ACH' &&
|
|
538
|
-
(w[i + 2] !~ /[IE]/ || w[i - 2, 6] =~ /[BM]ACHER/)
|
|
539
|
-
end
|
|
540
|
-
|
|
541
|
-
def self.ch_greek_roots?(w, i)
|
|
542
|
-
# greek roots e.g. 'chemistry', 'chorus'
|
|
543
|
-
i == 0 && w[1, 5] =~ /^H(ARAC|ARIS|OR|YM|IA|EM)/ && w[0, 5] != 'CHORE'
|
|
514
|
+
i > 1 && w[i - 2, 6] =~ /(^[^AEIOUY]ACH[^IE])|([BM]ACHER)/
|
|
544
515
|
end
|
|
545
516
|
|
|
546
517
|
def self.ch_germanic_or_greek?(w, i, len)
|
|
@@ -562,6 +533,11 @@ module Phonetic
|
|
|
562
533
|
!(i > 0 && w[i - 1, 3] =~ /[RO]GY/)
|
|
563
534
|
end
|
|
564
535
|
|
|
536
|
+
def self.g_italian?(w, i)
|
|
537
|
+
# italian e.g, 'biaggi'
|
|
538
|
+
w[i + 1] =~ /[EIY]/ || (i > 0 && w[i - 1, 4] =~ /[AO]GGI/)
|
|
539
|
+
end
|
|
540
|
+
|
|
565
541
|
def self.j_spanish_pron?(w, i)
|
|
566
542
|
# spanish pron. of e.g. 'bajador'
|
|
567
543
|
i > 0 && vowel?(w[i - 1]) && !slavo_germanic?(w) && w[i + 1] =~ /[AO]/
|
|
@@ -582,6 +558,11 @@ module Phonetic
|
|
|
582
558
|
!(i > 3 && w[i - 4, 2] =~ /M[EA]/)
|
|
583
559
|
end
|
|
584
560
|
|
|
561
|
+
def self.s_french?(w, i, last)
|
|
562
|
+
# french e.g. 'resnais', 'artois'
|
|
563
|
+
i == last && i > 1 && w[i - 2, 2] =~ /[AO]I/
|
|
564
|
+
end
|
|
565
|
+
|
|
585
566
|
def self.x_french?(w, i, last)
|
|
586
567
|
# french e.g. breaux
|
|
587
568
|
i == last && (i > 2 && w[i - 3, 3] =~ /[IE]AU/ || i > 1 && w[i - 2, 2] =~ /[AO]U/)
|