soundcord 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/soundcord.rb +3 -5
- data/lib/{algorithm.rb → soundcord/algorithm.rb} +19 -16
- data/lib/{config.rb → soundcord/config.rb} +5 -9
- data/soundcord.gemspec +1 -1
- data/test/{test_soundcord.rb → languages/en/test_soundcord.rb} +8 -52
- data/test/languages/pt_br/test_soundcord.rb +13 -8
- data/test/test_config.rb +6 -8
- metadata +41 -22
- data/lib/soundcord/version.rb +0 -8
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'rake/testtask'
|
|
2
2
|
|
3
3
|
Rake::TestTask.new do |t|
|
4
4
|
t.libs << "test"
|
5
|
-
t.test_files = FileList['test/test*.rb']
|
5
|
+
t.test_files = FileList['test/test*.rb'] + FileList['test/languages/pt_br/test*.rb'] + FileList['test/languages/en/test*.rb']
|
6
6
|
t.verbose = true
|
7
7
|
end
|
8
8
|
|
data/lib/soundcord.rb
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require 'soundcord/
|
4
|
-
require 'soundcord/
|
5
|
-
require 'algorithm'
|
6
|
-
require 'config'
|
3
|
+
require 'soundcord/algorithm'
|
4
|
+
require 'soundcord/config'
|
7
5
|
|
8
6
|
class SoundCord
|
9
7
|
def self.phonetize text
|
10
|
-
process_text
|
8
|
+
process_text text
|
11
9
|
end
|
12
10
|
|
13
11
|
def self.compare term_1, term_2
|
@@ -1,29 +1,32 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'soundcord/integrations/string'
|
4
|
+
require 'soundcord/integrations/array'
|
5
|
+
|
3
6
|
class SoundCord
|
4
|
-
|
7
|
+
private
|
5
8
|
def self.process_text text
|
6
9
|
load_language unless language
|
7
10
|
|
8
11
|
text = text.downcase
|
9
12
|
|
10
13
|
lang_yml.each do |key, values|
|
11
|
-
if key ==
|
14
|
+
if key == 'terminations'
|
12
15
|
text = process_group text, values, :terminations => true
|
13
|
-
elsif key ==
|
16
|
+
elsif key == 'initiations'
|
14
17
|
text = process_group text, values, :initiations => true
|
15
|
-
elsif key ==
|
18
|
+
elsif key == 'follow_ups'
|
16
19
|
text = process_follow_ups text, values, options
|
17
|
-
elsif key ==
|
20
|
+
elsif key == 'second_followed'
|
18
21
|
text = process_second_followed text, values, options
|
19
|
-
elsif key ==
|
20
|
-
text =
|
21
|
-
elsif !key.include?
|
22
|
+
elsif key == 'vowels_pronunciation_insignificance'
|
23
|
+
text = process_vowels_pronunciation_insignificance text, values, options
|
24
|
+
elsif !key.include? 'duplicate'
|
22
25
|
text = process_group text, values, options
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
26
|
-
text = remove_duplicity text, :duplicate_exceptions => (lang_yml[
|
29
|
+
text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
|
27
30
|
|
28
31
|
text.upcase
|
29
32
|
end
|
@@ -71,11 +74,11 @@ class SoundCord
|
|
71
74
|
return text
|
72
75
|
end
|
73
76
|
|
74
|
-
def
|
77
|
+
def self.process_vowels_pronunciation_insignificance text, group, options = {}
|
75
78
|
group.each do |key, value|
|
76
|
-
regexp =
|
79
|
+
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
77
80
|
text =~ regexp
|
78
|
-
text = text.gsub regexp, $1
|
81
|
+
text = text.gsub regexp, ($1 || '')
|
79
82
|
end
|
80
83
|
return text
|
81
84
|
end
|
@@ -117,11 +120,11 @@ class SoundCord
|
|
117
120
|
|
118
121
|
def self.mount_second_followed_by_regexp char, group
|
119
122
|
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
120
|
-
eval
|
123
|
+
eval regexp
|
121
124
|
end
|
122
125
|
|
123
|
-
def self.
|
124
|
-
eval "/([aeiou])#{char}(
|
126
|
+
def self.mount_vowels_pronunciation_insignificance_regexp char
|
127
|
+
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
125
128
|
end
|
126
129
|
|
127
130
|
def self.mount_followed_by_consonant_regexp char
|
@@ -131,4 +134,4 @@ class SoundCord
|
|
131
134
|
def self.not_first char
|
132
135
|
"([^#{char}]|^)"
|
133
136
|
end
|
134
|
-
end
|
137
|
+
end
|
@@ -4,21 +4,17 @@ class SoundCord
|
|
4
4
|
DEFAULT_LANGUAGE = 'pt-BR'
|
5
5
|
LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
|
6
6
|
|
7
|
+
class << self
|
8
|
+
attr_reader :language, :options
|
9
|
+
end
|
10
|
+
|
7
11
|
def self.load_language lang = DEFAULT_LANGUAGE
|
8
12
|
@language = lang
|
9
13
|
@lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
|
10
14
|
@options = { :use_vowels => false }
|
11
15
|
end
|
12
16
|
|
13
|
-
|
14
|
-
@language
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.options
|
18
|
-
@options
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
17
|
+
private
|
22
18
|
def self.lang_yml
|
23
19
|
@lang_yml
|
24
20
|
end
|
data/soundcord.gemspec
CHANGED
@@ -3,7 +3,7 @@ Gem::Specification.new do |s|
|
|
3
3
|
s.author = 'Lukas Alexandre'
|
4
4
|
s.email = 'lukeskytm@gmail.com'
|
5
5
|
s.homepage = 'http://lukasalexandre.github.com/soundcord'
|
6
|
-
s.version = "0.2.
|
6
|
+
s.version = "0.2.1"
|
7
7
|
s.date = Date.today
|
8
8
|
s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
|
9
9
|
s.description = %q{"Make comparisons of phonetically similar terms easier."}
|
@@ -4,50 +4,6 @@ require 'test/unit'
|
|
4
4
|
require 'soundcord'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
|
-
# pt-BR
|
8
|
-
def test_simple_words_pt_br
|
9
|
-
SoundCord.load_language 'pt-BR'
|
10
|
-
|
11
|
-
assert_equal "J", "João".phonetize
|
12
|
-
assert_equal "MR", "Maria".phonetize
|
13
|
-
assert_equal "LM", "Helena".phonetize
|
14
|
-
assert_equal "VLM", "Valmir".phonetize
|
15
|
-
assert_equal "VLM", "Walmir".phonetize
|
16
|
-
end
|
17
|
-
def test_simple_comparisons_pt_br
|
18
|
-
SoundCord.load_language 'pt-BR'
|
19
|
-
|
20
|
-
assert_equal true, "Joao".homophone?("João")
|
21
|
-
assert_equal true, "Helena".homophone?("Elena")
|
22
|
-
assert_equal true, "Walmir".homophone?("Valmir")
|
23
|
-
assert_equal true, "Marria".homophone?("Maria")
|
24
|
-
assert_equal true, "Wagner".homophone?("Vagner")
|
25
|
-
assert_equal true, "Mirela".homophone?("Mirella")
|
26
|
-
assert_equal true, "Artur".homophone?("Arthur")
|
27
|
-
assert_equal true, "Diego".homophone?("Dyego")
|
28
|
-
assert_equal true, "Felipe".homophone?("Phelipe")
|
29
|
-
assert_equal true, "Filipe".homophone?("Felipe")
|
30
|
-
assert_equal true, "Phelipe".homophone?("Filipe")
|
31
|
-
assert_equal true, "Philippe".homophone?("Felipe")
|
32
|
-
end
|
33
|
-
def test_special_chars_pt_br
|
34
|
-
SoundCord.load_language 'pt-BR'
|
35
|
-
|
36
|
-
assert_equal true, "Luçia".homophone?("lucia")
|
37
|
-
assert_equal true, "Lúcio".homophone?("lucio")
|
38
|
-
end
|
39
|
-
def test_find_in_collection_pt_br
|
40
|
-
SoundCord.load_language 'pt-BR'
|
41
|
-
|
42
|
-
list = %w( saola paulo saulo ricardo sallo )
|
43
|
-
expected = %w( saola saulo sallo )
|
44
|
-
assert_equal expected, list.homophones("saulo")
|
45
|
-
list = %w( leonardo lucene rodrigo luciana lussene )
|
46
|
-
expected = %w( lucene luciana lussene )
|
47
|
-
assert_equal expected, list.homophones("lucene")
|
48
|
-
end
|
49
|
-
|
50
|
-
# en
|
51
7
|
def test_initiations_en
|
52
8
|
SoundCord.load_language 'en'
|
53
9
|
|
@@ -59,7 +15,6 @@ class SoundCordTest < Test::Unit::TestCase
|
|
59
15
|
assert_equal "NF", "knife".phonetize
|
60
16
|
assert_equal "NMNK", "pneumonic".phonetize
|
61
17
|
end
|
62
|
-
|
63
18
|
def test_unusual_combinations_en
|
64
19
|
SoundCord.load_language 'en'
|
65
20
|
|
@@ -70,26 +25,22 @@ class SoundCordTest < Test::Unit::TestCase
|
|
70
25
|
assert_equal "FS", "phase".phonetize
|
71
26
|
assert_equal "BKR", "beggar".phonetize
|
72
27
|
end
|
73
|
-
|
74
28
|
def test_terminations_en
|
75
29
|
SoundCord.load_language 'en'
|
76
30
|
|
77
31
|
assert_equal "LM", "lmb".phonetize
|
78
32
|
end
|
79
|
-
|
80
33
|
def test_middle_en
|
81
34
|
SoundCord.load_language 'en'
|
82
35
|
|
83
36
|
# couldn't remember a better word with SCH in the middle
|
84
37
|
assert_equal "PRSK", "porsche".phonetize
|
85
38
|
end
|
86
|
-
|
87
39
|
def test_duplicate_exceptions_en
|
88
40
|
SoundCord.load_language 'en'
|
89
41
|
|
90
42
|
assert_equal "GKLS", "goggles".phonetize
|
91
43
|
end
|
92
|
-
|
93
44
|
def test_special_chars_en
|
94
45
|
SoundCord.load_language 'en'
|
95
46
|
|
@@ -99,17 +50,22 @@ class SoundCordTest < Test::Unit::TestCase
|
|
99
50
|
assert_equal true, "falue".homophone?("value")
|
100
51
|
assert_equal true, "data".homophone?("tada")
|
101
52
|
end
|
102
|
-
|
103
53
|
def test_second_follwed_by_en
|
104
54
|
SoundCord.load_language 'en'
|
105
55
|
|
106
56
|
assert_equal "JM", "ogema".phonetize
|
107
57
|
end
|
108
|
-
|
109
58
|
def test_vowels_pronunciation_insignificance_en
|
110
59
|
SoundCord.load_language 'en'
|
111
60
|
|
112
61
|
assert_equal "MSX", "messiah".phonetize
|
113
62
|
assert_equal "ML", "mehlia".phonetize
|
114
63
|
end
|
115
|
-
|
64
|
+
def test_find_in_collection_en
|
65
|
+
SoundCord.load_language 'en'
|
66
|
+
|
67
|
+
list = %w( mail male main Maine mane )
|
68
|
+
expected = %w( main Maine mane )
|
69
|
+
assert_equal expected, list.homophones("main")
|
70
|
+
end
|
71
|
+
end
|
@@ -4,14 +4,18 @@ require 'test/unit'
|
|
4
4
|
require 'soundcord'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
|
-
def
|
7
|
+
def test_simple_words_pt_br
|
8
|
+
SoundCord.load_language 'pt-BR'
|
9
|
+
|
8
10
|
assert_equal "J", "João".phonetize
|
9
11
|
assert_equal "MR", "Maria".phonetize
|
10
12
|
assert_equal "LM", "Helena".phonetize
|
11
13
|
assert_equal "VLM", "Valmir".phonetize
|
12
14
|
assert_equal "VLM", "Walmir".phonetize
|
13
15
|
end
|
14
|
-
def
|
16
|
+
def test_simple_comparisons_pt_br
|
17
|
+
SoundCord.load_language 'pt-BR'
|
18
|
+
|
15
19
|
assert_equal true, "Joao".homophone?("João")
|
16
20
|
assert_equal true, "Helena".homophone?("Elena")
|
17
21
|
assert_equal true, "Walmir".homophone?("Valmir")
|
@@ -25,14 +29,15 @@ class SoundCordTest < Test::Unit::TestCase
|
|
25
29
|
assert_equal true, "Phelipe".homophone?("Filipe")
|
26
30
|
assert_equal true, "Philippe".homophone?("Felipe")
|
27
31
|
end
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
def test_special_chars
|
32
|
+
def test_special_chars_pt_br
|
33
|
+
SoundCord.load_language 'pt-BR'
|
34
|
+
|
32
35
|
assert_equal true, "Luçia".homophone?("lucia")
|
33
36
|
assert_equal true, "Lúcio".homophone?("lucio")
|
34
37
|
end
|
35
|
-
def
|
38
|
+
def test_find_in_collection_pt_br
|
39
|
+
SoundCord.load_language 'pt-BR'
|
40
|
+
|
36
41
|
list = %w( saola paulo saulo ricardo sallo )
|
37
42
|
expected = %w( saola saulo sallo )
|
38
43
|
assert_equal expected, list.homophones("saulo")
|
@@ -40,4 +45,4 @@ class SoundCordTest < Test::Unit::TestCase
|
|
40
45
|
expected = %w( lucene luciana lussene )
|
41
46
|
assert_equal expected, list.homophones("lucene")
|
42
47
|
end
|
43
|
-
end
|
48
|
+
end
|
data/test/test_config.rb
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'test/unit'
|
4
|
-
require 'config'
|
4
|
+
require 'soundcord/config'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
7
|
def test_language_set_up
|
8
|
-
|
9
|
-
|
8
|
+
%w(pt-BR en).each do |lang|
|
9
|
+
SoundCord.load_language lang
|
10
|
+
assert_equal lang, SoundCord.language
|
11
|
+
end
|
10
12
|
end
|
11
|
-
|
12
|
-
SoundCord.load_language "en"
|
13
|
-
assert_equal "en", SoundCord.language
|
14
|
-
end
|
15
|
-
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,58 +1,77 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: soundcord
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Lukas Alexandre
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
17
|
+
|
18
|
+
date: 2012-10-24 00:00:00 Z
|
13
19
|
dependencies: []
|
14
|
-
|
20
|
+
|
21
|
+
description: "\"Make comparisons of phonetically similar terms easier.\""
|
15
22
|
email: lukeskytm@gmail.com
|
16
23
|
executables: []
|
24
|
+
|
17
25
|
extensions: []
|
26
|
+
|
18
27
|
extra_rdoc_files: []
|
19
|
-
|
20
|
-
|
21
|
-
- lib/
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/soundcord/algorithm.rb
|
31
|
+
- lib/soundcord/config.rb
|
22
32
|
- lib/soundcord/integrations/array.rb
|
23
33
|
- lib/soundcord/integrations/string.rb
|
24
|
-
- lib/soundcord/version.rb
|
25
34
|
- lib/soundcord.rb
|
35
|
+
- test/languages/en/test_soundcord.rb
|
26
36
|
- test/languages/pt_br/test_soundcord.rb
|
27
37
|
- test/test_array.rb
|
28
38
|
- test/test_config.rb
|
29
39
|
- test/test_performance.rb
|
30
|
-
- test/test_soundcord.rb
|
31
40
|
- test/test_string.rb
|
32
41
|
- Rakefile
|
33
42
|
- soundcord.gemspec
|
34
43
|
homepage: http://lukasalexandre.github.com/soundcord
|
35
44
|
licenses: []
|
45
|
+
|
36
46
|
post_install_message:
|
37
47
|
rdoc_options: []
|
38
|
-
|
48
|
+
|
49
|
+
require_paths:
|
39
50
|
- lib
|
40
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
52
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
61
|
none: false
|
48
|
-
requirements:
|
49
|
-
- -
|
50
|
-
- !ruby/object:Gem::Version
|
51
|
-
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
52
69
|
requirements: []
|
70
|
+
|
53
71
|
rubyforge_project:
|
54
72
|
rubygems_version: 1.8.24
|
55
73
|
signing_key:
|
56
74
|
specification_version: 3
|
57
75
|
summary: A phonetic algorithm for indexing of words by their pronunciation.
|
58
76
|
test_files: []
|
77
|
+
|