phonetic 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ require 'phonetic/algorithm'
2
+
3
+ module Phonetic
4
+ class DoubleMetaphone < Algorithm
5
+ class Code
6
+ def initialize
7
+ @codes = ['', '']
8
+ end
9
+
10
+ def add(primary, secondary)
11
+ @codes[0] += primary
12
+ @codes[1] += secondary
13
+ end
14
+
15
+ def results(size)
16
+ [first[0, size], last[0, size]]
17
+ end
18
+
19
+ def first
20
+ @codes.first
21
+ end
22
+
23
+ def last
24
+ @codes.last
25
+ end
26
+ end
27
+ end
28
+ end
@@ -28,93 +28,40 @@ module Phonetic
28
28
  for n in 0..(l - 1)
29
29
  break unless metaph.size < code_size
30
30
  symb = w[n]
31
- if symb == 'C' || n == 0 || w[n - 1] != symb
32
- case
33
- when vowel?(symb) && n == 0
34
- metaph = symb
35
- when symb == 'B'
36
- metaph += symb if n != l - 1 || w[n - 1] != 'M'
37
- when symb == 'C'
38
- if n == 0 || w[n - 1] != 'S' || !front_vowel?(w[n + 1])
39
- if w[n + 1, 2] == 'IA'
40
- metaph += 'X'
41
- elsif front_vowel?(w[n + 1])
42
- metaph += 'S'
43
- elsif n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
44
- metaph += 'K'
45
- elsif w[n + 1] == 'H'
46
- if n == 0 && !vowel?(w[n + 2])
47
- metaph += 'K'
48
- else
49
- metaph += 'X'
50
- end
51
- else
52
- metaph += 'K'
53
- end
54
- end
55
- when symb == 'D'
56
- if w[n + 1] == 'G' && front_vowel?(w[n + 2])
57
- metaph += 'J'
58
- else
59
- metaph += 'T'
60
- end
61
- when symb == 'G'
62
- silent = (w[n + 1] == 'H' && !vowel?(w[n + 2]))
63
- if n > 0 && (w[n + 1] == 'N' || w[n + 1, 3] == 'NED')
64
- silent = true
65
- end
66
- if n > 0 && w[n - 1] == 'D' && front_vowel?(w[n + 1])
67
- silent = true
68
- end
69
- hard = (n > 0 && w[n - 1] == 'G')
70
- unless silent
71
- if front_vowel?(w[n + 1]) && !hard
72
- metaph += 'J'
73
- else
74
- metaph += 'K'
75
- end
76
- end
77
- when symb == 'H'
78
- if !(n == l - 1 || (n > 0 && VARSON[w[n - 1]]))
79
- metaph += 'H' if vowel?(w[n + 1])
80
- end
81
- when symb =~ /[FJLMNR]/
82
- metaph += symb
83
- when symb == 'K'
84
- if n > 0 && w[n - 1] != 'C'
85
- metaph += 'K'
86
- elsif n == 0
87
- metaph = 'K'
88
- end
89
- when symb == 'P'
90
- metaph += w[n + 1] == 'H' ? 'F' : 'P'
91
- when symb == 'Q'
92
- metaph += 'K'
93
- when symb == 'S'
94
- if w[n + 1, 2] =~ /I[OA]/
95
- metaph += 'X'
96
- elsif w[n + 1] == 'H'
97
- metaph += 'X'
98
- else
99
- metaph += 'S'
100
- end
101
- when symb == 'T'
102
- if w[n + 1, 2] =~ /I[OA]/
103
- metaph += 'X'
104
- elsif w[n + 1] == 'H'
105
- metaph += '0' if n == 0 || w[n - 1] != 'T'
106
- else
107
- metaph += 'T' if w[n + 1, 2] != 'CH'
108
- end
109
- when symb == 'V'
110
- metaph += 'F'
111
- when symb =~ /[WY]/
112
- metaph += symb if vowel?(w[n + 1])
113
- when symb == 'X'
114
- metaph += 'KS'
115
- when symb == 'Z'
116
- metaph += 'S'
117
- end
31
+ next unless symb == 'C' || n == 0 || w[n - 1] != symb
32
+ case
33
+ when vowel?(symb) && n == 0
34
+ metaph = symb
35
+ when symb == 'B'
36
+ metaph += symb if n != l - 1 || w[n - 1] != 'M'
37
+ when symb == 'C'
38
+ metaph += encode_c(w, n)
39
+ when symb == 'D'
40
+ metaph += encode_d(w, n)
41
+ when symb == 'G'
42
+ metaph += encode_g(w, n)
43
+ when symb == 'H'
44
+ metaph += encode_h(w, n)
45
+ when symb =~ /[FJLMNR]/
46
+ metaph += symb
47
+ when symb == 'K'
48
+ metaph += encode_k(w, n)
49
+ when symb == 'P'
50
+ metaph += w[n + 1] == 'H' ? 'F' : 'P'
51
+ when symb == 'Q'
52
+ metaph += 'K'
53
+ when symb == 'S'
54
+ metaph += encode_s(w, n)
55
+ when symb == 'T'
56
+ metaph += encode_t(w, n)
57
+ when symb == 'V'
58
+ metaph += 'F'
59
+ when symb =~ /[WY]/
60
+ metaph += symb if vowel?(w[n + 1])
61
+ when symb == 'X'
62
+ metaph += 'KS'
63
+ when symb == 'Z'
64
+ metaph += 'S'
118
65
  end
119
66
  end
120
67
  metaph
@@ -131,5 +78,94 @@ module Phonetic
131
78
  v = FRONT_VOWELS[symbol.to_s]
132
79
  !v.nil? && !v.empty?
133
80
  end
81
+
82
+ def self.encode_c(w, n)
83
+ metaph = ''
84
+ if n == 0 || w[n - 1] != 'S' || !front_vowel?(w[n + 1])
85
+ if w[n + 1, 2] == 'IA'
86
+ metaph = 'X'
87
+ elsif front_vowel?(w[n + 1])
88
+ metaph = 'S'
89
+ elsif n > 0 && w[n + 1] == 'H' && w[n - 1] == 'S'
90
+ metaph = 'K'
91
+ elsif w[n + 1] == 'H'
92
+ if n == 0 && !vowel?(w[n + 2])
93
+ metaph = 'K'
94
+ else
95
+ metaph = 'X'
96
+ end
97
+ else
98
+ metaph = 'K'
99
+ end
100
+ end
101
+ metaph
102
+ end
103
+
104
+ def self.encode_d(w, n)
105
+ w[n + 1] == 'G' && front_vowel?(w[n + 2]) ? 'J' : 'T'
106
+ end
107
+
108
+ def self.encode_g(w, n)
109
+ metaph = ''
110
+ silent = (w[n + 1] == 'H' && !vowel?(w[n + 2]))
111
+ if n > 0 && (w[n + 1] == 'N' || w[n + 1, 3] == 'NED')
112
+ silent = true
113
+ end
114
+ if n > 0 && w[n - 1] == 'D' && front_vowel?(w[n + 1])
115
+ silent = true
116
+ end
117
+ hard = (n > 0 && w[n - 1] == 'G')
118
+ unless silent
119
+ if front_vowel?(w[n + 1]) && !hard
120
+ metaph = 'J'
121
+ else
122
+ metaph = 'K'
123
+ end
124
+ end
125
+ metaph
126
+ end
127
+
128
+ def self.encode_h(w, n)
129
+ metaph = ''
130
+ unless n == w.size - 1 || (n > 0 && VARSON[w[n - 1]])
131
+ metaph = 'H' if vowel?(w[n + 1])
132
+ end
133
+ metaph
134
+ end
135
+
136
+ def self.encode_k(w, n)
137
+ metaph = ''
138
+ if n > 0 && w[n - 1] != 'C'
139
+ metaph = 'K'
140
+ elsif n == 0
141
+ metaph = 'K'
142
+ end
143
+ metaph
144
+ end
145
+
146
+ def self.encode_s(w, n)
147
+ metaph = ''
148
+ if w[n + 1, 2] =~ /I[OA]/
149
+ metaph = 'X'
150
+ elsif w[n + 1] == 'H'
151
+ metaph = 'X'
152
+ else
153
+ metaph = 'S'
154
+ end
155
+ metaph
156
+ end
157
+
158
+ def self.encode_t(w, n)
159
+ metaph = ''
160
+ if w[n + 1, 2] =~ /I[OA]/
161
+ metaph = 'X'
162
+ elsif w[n + 1] == 'H'
163
+ metaph = '0' if n == 0 || w[n - 1] != 'T'
164
+ else
165
+ metaph = 'T' if w[n + 1, 2] != 'CH'
166
+ end
167
+ metaph
168
+ end
169
+
134
170
  end
135
171
  end
@@ -0,0 +1,72 @@
1
+ require 'phonetic/algorithm'
2
+
3
+ module Phonetic
4
+ # This class implements Refined NYSIIS algorithm.
5
+ # @see http://www.dropby.com/NYSIIS.html NYSIIS Code
6
+ # @example
7
+ # Phonetic::RefinedNYSIIS.encode('Aumont') # => 'ANAD'
8
+ # Phonetic::RefinedNYSIIS.encode('Schmidt') # => 'SNAD'
9
+ # Phonetic::RefinedNYSIIS.encode('Phoenix') # => 'FANAC'
10
+ class RefinedNYSIIS < Algorithm
11
+ FIRST_MAP = {
12
+ /[SZ]+$/ => '',
13
+ /^MAC/ => 'MC',
14
+ /^PF/ => 'F',
15
+ /IX$/ => 'IC',
16
+ /EX$/ => 'EC',
17
+ /(YE|EE|IE)$/ => 'Y',
18
+ /(DT|RT|RD|NT|ND)$/ => 'D',
19
+ /(.)EV/ => '\1EF'
20
+ }
21
+
22
+ SECOND_MAP = {
23
+ /([AEIOU])W/ => '\1',
24
+ /[AEIOU]+/ => 'A',
25
+ 'GHT' => 'GT',
26
+ 'DG' => 'G',
27
+ 'PH' => 'F',
28
+ 'AH' => 'A',
29
+ /(.)HA/ => '\1A',
30
+ 'KN' => 'N',
31
+ 'K' => 'C',
32
+ /(.)M/ => '\1N',
33
+ /(.)Q/ => '\1G',
34
+ 'SH' => 'S',
35
+ 'SCH' => 'S',
36
+ 'YW' => 'Y',
37
+ /(.)Y(.)/ => '\1A\2',
38
+ 'WR' => 'R',
39
+ /(.)Z/ => '\1S',
40
+ /AY$/ => 'Y',
41
+ /A+$/ => '',
42
+ /[^\w\s]|(.)(?=\1)/ => ''
43
+ }
44
+
45
+ # Convert string to Refined NYSIIS code
46
+ def self.encode(str, options = { trim: true })
47
+ self.encode_word(str, options)
48
+ end
49
+
50
+ # Convert word to its Refined NYSIIS code
51
+ def self.encode_word(word, options = { trim: true })
52
+ return '' if !word or word.empty?
53
+ trim = options[:trim]
54
+ w = word.upcase.strip
55
+ w.gsub! /\s([IV]+|[JS]R)$/, ''
56
+ w.gsub! /[^A-Z]/, ''
57
+ return if w.empty?
58
+ FIRST_MAP.each{ |rx, v| w.gsub!(rx, v) }
59
+ first_char = w[0]
60
+ SECOND_MAP.each{ |rx, v| w.gsub!(rx, v) }
61
+ w.gsub! /^A*/, first_char if vowel?(first_char)
62
+ w = w[0..5] if trim
63
+ w
64
+ end
65
+
66
+ private
67
+
68
+ def self.vowel?(char)
69
+ char =~ /^[AEIOU]/
70
+ end
71
+ end
72
+ end
@@ -1,3 +1,3 @@
1
1
  module Phonetic
2
- VERSION = '1.1.0'
2
+ VERSION = '1.2.0'
3
3
  end
@@ -1,27 +1,29 @@
1
- # coding: utf-8
2
- lib = File.expand_path('../lib', __FILE__)
3
- $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'phonetic/version'
5
-
6
- Gem::Specification.new do |spec|
7
- spec.name = 'phonetic'
8
- spec.version = Phonetic::VERSION
9
- spec.authors = ['n7v']
10
- spec.email = ['novsem@gmail.com']
11
- spec.description = %q{Ruby library for phonetic algorithms. It supports Soundex, Metaphone, Caverphone, NYSIIS and others.}
12
- spec.summary = %q{Ruby library for phonetic algorithms.}
13
- spec.homepage = 'http://github.com/n7v/phonetic'
14
- spec.license = 'MIT'
15
-
16
- spec.files = `git ls-files`.split($/)
17
- spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
- spec.require_paths = ['lib']
20
-
21
- spec.add_development_dependency 'bundler', '~> 1.3'
22
- spec.add_development_dependency 'rake'
23
- spec.add_development_dependency 'rspec'
24
- spec.add_development_dependency 'simplecov'
25
- spec.add_development_dependency 'coveralls'
26
- spec.add_development_dependency 'yard'
27
- end
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'phonetic/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'phonetic'
8
+ spec.version = Phonetic::VERSION
9
+ spec.authors = ['n7v']
10
+ spec.email = ['novsem@gmail.com']
11
+ spec.description = %q{Ruby library for phonetic algorithms. It supports Soundex, Metaphone, Caverphone, NYSIIS and others.}
12
+ spec.summary = %q{Ruby library for phonetic algorithms.}
13
+ spec.homepage = 'http://github.com/n7v/phonetic'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'rspec'
24
+ spec.add_development_dependency 'simplecov'
25
+ spec.add_development_dependency 'coveralls'
26
+ spec.add_development_dependency 'yard'
27
+
28
+ spec.required_ruby_version = '>= 1.9'
29
+ end
@@ -1,61 +1,10 @@
1
1
  require 'spec_helper'
2
-
3
- # Examples are taken from Caversham Project's paper
4
- # http://caversham.otago.ac.nz/files/working/ctp150804.pdf
5
- CAVERPHONE2_TEST_TABLE = {
6
- 'STFNSN1111' => ['Stevenson'],
7
- 'PTA1111111' => ['Peter'],
8
- 'AT11111111' => [
9
- 'add', 'aid', 'at', 'art', 'eat', 'earth', 'head', 'hit', 'hot',
10
- 'hold', 'hard', 'heart', 'it', 'out', 'old'
11
- ],
12
- 'RTA1111111' => ['rather', 'ready', 'writer'],
13
- 'SSA1111111' => ['social'],
14
- 'APA1111111' => ['able', 'appear'],
15
- 'TTA1111111' => [
16
- 'Darda', 'Datha', 'Dedie', 'Deedee', 'Deerdre', 'Deidre', 'Deirdre',
17
- 'Detta', 'Didi', 'Didier', 'Dido', 'Dierdre', 'Dieter', 'Dita',
18
- 'Ditter', 'Dodi', 'Dodie', 'Dody', 'Doherty', 'Dorthea', 'Dorthy',
19
- 'Doti', 'Dotti', 'Dottie', 'Dotty', 'Doty', 'Doughty', 'Douty',
20
- 'Dowdell', 'Duthie', 'Tada', 'Taddeo', 'Tadeo', 'Tadio', 'Tati',
21
- 'Teador', 'Tedda', 'Tedder', 'Teddi', 'Teddie', 'Teddy', 'Tedi',
22
- 'Tedie', 'Teeter', 'Teodoor', 'Teodor', 'Terti', 'Theda', 'Theodor',
23
- 'Theodore', 'Theta', 'Thilda', 'Thordia', 'Tilda', 'Tildi', 'Tildie',
24
- 'Tildy', 'Tita', 'Tito', 'Tjader', 'Toddie', 'Toddy', 'Torto', 'Tuddor',
25
- 'Tudor', 'Turtle', 'Tuttle', 'Tutto'
26
- ],
27
- 'KLN1111111' => [
28
- 'Cailean', 'Calan', 'Calen', 'Callahan', 'Callan', 'Callean',
29
- 'Carleen', 'Carlen', 'Carlene', 'Carlin', 'Carline', 'Carlyn',
30
- 'Carlynn', 'Carlynne', 'Charlean', 'Charleen', 'Charlene',
31
- 'Charline', 'Cherlyn', 'Chirlin', 'Clein', 'Cleon', 'Cline',
32
- 'Cohleen', 'Colan', 'Coleen', 'Colene', 'Colin', 'Colleen',
33
- 'Collen', 'Collin', 'Colline', 'Colon', 'Cullan', 'Cullen',
34
- 'Cullin', 'Gaelan', 'Galan', 'Galen', 'Garlan', 'Garlen',
35
- 'Gaulin', 'Gayleen', 'Gaylene', 'Giliane', 'Gillan', 'Gillian',
36
- 'Glen', 'Glenn', 'Glyn', 'Glynn', 'Gollin', 'Gorlin', 'Kalin',
37
- 'Karlan', 'Karleen', 'Karlen', 'Karlene', 'Karlin', 'Karlyn',
38
- 'Kaylyn', 'Keelin', 'Kellen', 'Kellene', 'Kellyann', 'Kellyn',
39
- 'Khalin', 'Kilan', 'Kilian', 'Killen', 'Killian', 'Killion',
40
- 'Klein', 'Kleon', 'Kline', 'Koerlin', 'Kylen', 'Kylynn', 'Quillan',
41
- 'Quillon', 'Qulllon', 'Xylon'
42
- ],
43
- 'TN11111111' => [
44
- 'Dan', 'Dane', 'Dann', 'Darn', 'Daune', 'Dawn', 'Ddene', 'Dean', 'Deane',
45
- 'Deanne', 'DeeAnn', 'Deeann', 'Deeanne', 'Deeyn', 'Den', 'Dene', 'Denn',
46
- 'Deonne', 'Diahann', 'Dian', 'Diane', 'Diann', 'Dianne', 'Diannne', 'Dine',
47
- 'Dion', 'Dione', 'Dionne', 'Doane', 'Doehne', 'Don', 'Donn', 'Doone', 'Dorn',
48
- 'Down', 'Downe', 'Duane', 'Dun', 'Dunn', 'Duyne', 'Dyan', 'Dyane', 'Dyann',
49
- 'Dyanne', 'Dyun', 'Tan', 'Tann', 'Teahan', 'Ten', 'Tenn', 'Terhune', 'Thain',
50
- 'Thaine', 'Thane', 'Thanh', 'Thayne', 'Theone', 'Thin', 'Thorn', 'Thorne',
51
- 'Thun', 'Thynne', 'Tien', 'Tine', 'Tjon', 'Town', 'Towne', 'Turne', 'Tyne'
52
- ]
53
- }
2
+ require 'support/caverphone2_data'
54
3
 
55
4
  describe Phonetic::Caverphone2 do
56
5
  describe '.encode' do
57
6
  it 'should return Caverphone 2 value of string' do
58
- CAVERPHONE2_TEST_TABLE.each do |code, words|
7
+ Phonetic::CAVERPHONE2_TEST_TABLE.each do |code, words|
59
8
  words.each do |word|
60
9
  res = Phonetic::Caverphone2.encode(word)
61
10
  res.should eq(code), "expected: #{code}\ngot: #{res}\nword: #{word}"