soundcord 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/soundcord.rb +3 -5
- data/lib/{algorithm.rb → soundcord/algorithm.rb} +19 -16
- data/lib/{config.rb → soundcord/config.rb} +5 -9
- data/soundcord.gemspec +1 -1
- data/test/{test_soundcord.rb → languages/en/test_soundcord.rb} +8 -52
- data/test/languages/pt_br/test_soundcord.rb +13 -8
- data/test/test_config.rb +6 -8
- metadata +41 -22
- data/lib/soundcord/version.rb +0 -8
data/Rakefile
CHANGED
@@ -2,7 +2,7 @@ require 'rake/testtask'
|
|
2
2
|
|
3
3
|
Rake::TestTask.new do |t|
|
4
4
|
t.libs << "test"
|
5
|
-
t.test_files = FileList['test/test*.rb']
|
5
|
+
t.test_files = FileList['test/test*.rb'] + FileList['test/languages/pt_br/test*.rb'] + FileList['test/languages/en/test*.rb']
|
6
6
|
t.verbose = true
|
7
7
|
end
|
8
8
|
|
data/lib/soundcord.rb
CHANGED
@@ -1,13 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require 'soundcord/
|
4
|
-
require 'soundcord/
|
5
|
-
require 'algorithm'
|
6
|
-
require 'config'
|
3
|
+
require 'soundcord/algorithm'
|
4
|
+
require 'soundcord/config'
|
7
5
|
|
8
6
|
class SoundCord
|
9
7
|
def self.phonetize text
|
10
|
-
process_text
|
8
|
+
process_text text
|
11
9
|
end
|
12
10
|
|
13
11
|
def self.compare term_1, term_2
|
@@ -1,29 +1,32 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'soundcord/integrations/string'
|
4
|
+
require 'soundcord/integrations/array'
|
5
|
+
|
3
6
|
class SoundCord
|
4
|
-
|
7
|
+
private
|
5
8
|
def self.process_text text
|
6
9
|
load_language unless language
|
7
10
|
|
8
11
|
text = text.downcase
|
9
12
|
|
10
13
|
lang_yml.each do |key, values|
|
11
|
-
if key ==
|
14
|
+
if key == 'terminations'
|
12
15
|
text = process_group text, values, :terminations => true
|
13
|
-
elsif key ==
|
16
|
+
elsif key == 'initiations'
|
14
17
|
text = process_group text, values, :initiations => true
|
15
|
-
elsif key ==
|
18
|
+
elsif key == 'follow_ups'
|
16
19
|
text = process_follow_ups text, values, options
|
17
|
-
elsif key ==
|
20
|
+
elsif key == 'second_followed'
|
18
21
|
text = process_second_followed text, values, options
|
19
|
-
elsif key ==
|
20
|
-
text =
|
21
|
-
elsif !key.include?
|
22
|
+
elsif key == 'vowels_pronunciation_insignificance'
|
23
|
+
text = process_vowels_pronunciation_insignificance text, values, options
|
24
|
+
elsif !key.include? 'duplicate'
|
22
25
|
text = process_group text, values, options
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
26
|
-
text = remove_duplicity text, :duplicate_exceptions => (lang_yml[
|
29
|
+
text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
|
27
30
|
|
28
31
|
text.upcase
|
29
32
|
end
|
@@ -71,11 +74,11 @@ class SoundCord
|
|
71
74
|
return text
|
72
75
|
end
|
73
76
|
|
74
|
-
def
|
77
|
+
def self.process_vowels_pronunciation_insignificance text, group, options = {}
|
75
78
|
group.each do |key, value|
|
76
|
-
regexp =
|
79
|
+
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
77
80
|
text =~ regexp
|
78
|
-
text = text.gsub regexp, $1
|
81
|
+
text = text.gsub regexp, ($1 || '')
|
79
82
|
end
|
80
83
|
return text
|
81
84
|
end
|
@@ -117,11 +120,11 @@ class SoundCord
|
|
117
120
|
|
118
121
|
def self.mount_second_followed_by_regexp char, group
|
119
122
|
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
120
|
-
eval
|
123
|
+
eval regexp
|
121
124
|
end
|
122
125
|
|
123
|
-
def self.
|
124
|
-
eval "/([aeiou])#{char}(
|
126
|
+
def self.mount_vowels_pronunciation_insignificance_regexp char
|
127
|
+
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
125
128
|
end
|
126
129
|
|
127
130
|
def self.mount_followed_by_consonant_regexp char
|
@@ -131,4 +134,4 @@ class SoundCord
|
|
131
134
|
def self.not_first char
|
132
135
|
"([^#{char}]|^)"
|
133
136
|
end
|
134
|
-
end
|
137
|
+
end
|
@@ -4,21 +4,17 @@ class SoundCord
|
|
4
4
|
DEFAULT_LANGUAGE = 'pt-BR'
|
5
5
|
LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
|
6
6
|
|
7
|
+
class << self
|
8
|
+
attr_reader :language, :options
|
9
|
+
end
|
10
|
+
|
7
11
|
def self.load_language lang = DEFAULT_LANGUAGE
|
8
12
|
@language = lang
|
9
13
|
@lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
|
10
14
|
@options = { :use_vowels => false }
|
11
15
|
end
|
12
16
|
|
13
|
-
|
14
|
-
@language
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.options
|
18
|
-
@options
|
19
|
-
end
|
20
|
-
|
21
|
-
private
|
17
|
+
private
|
22
18
|
def self.lang_yml
|
23
19
|
@lang_yml
|
24
20
|
end
|
data/soundcord.gemspec
CHANGED
@@ -3,7 +3,7 @@ Gem::Specification.new do |s|
|
|
3
3
|
s.author = 'Lukas Alexandre'
|
4
4
|
s.email = 'lukeskytm@gmail.com'
|
5
5
|
s.homepage = 'http://lukasalexandre.github.com/soundcord'
|
6
|
-
s.version = "0.2.
|
6
|
+
s.version = "0.2.1"
|
7
7
|
s.date = Date.today
|
8
8
|
s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
|
9
9
|
s.description = %q{"Make comparisons of phonetically similar terms easier."}
|
@@ -4,50 +4,6 @@ require 'test/unit'
|
|
4
4
|
require 'soundcord'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
|
-
# pt-BR
|
8
|
-
def test_simple_words_pt_br
|
9
|
-
SoundCord.load_language 'pt-BR'
|
10
|
-
|
11
|
-
assert_equal "J", "João".phonetize
|
12
|
-
assert_equal "MR", "Maria".phonetize
|
13
|
-
assert_equal "LM", "Helena".phonetize
|
14
|
-
assert_equal "VLM", "Valmir".phonetize
|
15
|
-
assert_equal "VLM", "Walmir".phonetize
|
16
|
-
end
|
17
|
-
def test_simple_comparisons_pt_br
|
18
|
-
SoundCord.load_language 'pt-BR'
|
19
|
-
|
20
|
-
assert_equal true, "Joao".homophone?("João")
|
21
|
-
assert_equal true, "Helena".homophone?("Elena")
|
22
|
-
assert_equal true, "Walmir".homophone?("Valmir")
|
23
|
-
assert_equal true, "Marria".homophone?("Maria")
|
24
|
-
assert_equal true, "Wagner".homophone?("Vagner")
|
25
|
-
assert_equal true, "Mirela".homophone?("Mirella")
|
26
|
-
assert_equal true, "Artur".homophone?("Arthur")
|
27
|
-
assert_equal true, "Diego".homophone?("Dyego")
|
28
|
-
assert_equal true, "Felipe".homophone?("Phelipe")
|
29
|
-
assert_equal true, "Filipe".homophone?("Felipe")
|
30
|
-
assert_equal true, "Phelipe".homophone?("Filipe")
|
31
|
-
assert_equal true, "Philippe".homophone?("Felipe")
|
32
|
-
end
|
33
|
-
def test_special_chars_pt_br
|
34
|
-
SoundCord.load_language 'pt-BR'
|
35
|
-
|
36
|
-
assert_equal true, "Luçia".homophone?("lucia")
|
37
|
-
assert_equal true, "Lúcio".homophone?("lucio")
|
38
|
-
end
|
39
|
-
def test_find_in_collection_pt_br
|
40
|
-
SoundCord.load_language 'pt-BR'
|
41
|
-
|
42
|
-
list = %w( saola paulo saulo ricardo sallo )
|
43
|
-
expected = %w( saola saulo sallo )
|
44
|
-
assert_equal expected, list.homophones("saulo")
|
45
|
-
list = %w( leonardo lucene rodrigo luciana lussene )
|
46
|
-
expected = %w( lucene luciana lussene )
|
47
|
-
assert_equal expected, list.homophones("lucene")
|
48
|
-
end
|
49
|
-
|
50
|
-
# en
|
51
7
|
def test_initiations_en
|
52
8
|
SoundCord.load_language 'en'
|
53
9
|
|
@@ -59,7 +15,6 @@ class SoundCordTest < Test::Unit::TestCase
|
|
59
15
|
assert_equal "NF", "knife".phonetize
|
60
16
|
assert_equal "NMNK", "pneumonic".phonetize
|
61
17
|
end
|
62
|
-
|
63
18
|
def test_unusual_combinations_en
|
64
19
|
SoundCord.load_language 'en'
|
65
20
|
|
@@ -70,26 +25,22 @@ class SoundCordTest < Test::Unit::TestCase
|
|
70
25
|
assert_equal "FS", "phase".phonetize
|
71
26
|
assert_equal "BKR", "beggar".phonetize
|
72
27
|
end
|
73
|
-
|
74
28
|
def test_terminations_en
|
75
29
|
SoundCord.load_language 'en'
|
76
30
|
|
77
31
|
assert_equal "LM", "lmb".phonetize
|
78
32
|
end
|
79
|
-
|
80
33
|
def test_middle_en
|
81
34
|
SoundCord.load_language 'en'
|
82
35
|
|
83
36
|
# couldn't remember a better word with SCH in the middle
|
84
37
|
assert_equal "PRSK", "porsche".phonetize
|
85
38
|
end
|
86
|
-
|
87
39
|
def test_duplicate_exceptions_en
|
88
40
|
SoundCord.load_language 'en'
|
89
41
|
|
90
42
|
assert_equal "GKLS", "goggles".phonetize
|
91
43
|
end
|
92
|
-
|
93
44
|
def test_special_chars_en
|
94
45
|
SoundCord.load_language 'en'
|
95
46
|
|
@@ -99,17 +50,22 @@ class SoundCordTest < Test::Unit::TestCase
|
|
99
50
|
assert_equal true, "falue".homophone?("value")
|
100
51
|
assert_equal true, "data".homophone?("tada")
|
101
52
|
end
|
102
|
-
|
103
53
|
def test_second_follwed_by_en
|
104
54
|
SoundCord.load_language 'en'
|
105
55
|
|
106
56
|
assert_equal "JM", "ogema".phonetize
|
107
57
|
end
|
108
|
-
|
109
58
|
def test_vowels_pronunciation_insignificance_en
|
110
59
|
SoundCord.load_language 'en'
|
111
60
|
|
112
61
|
assert_equal "MSX", "messiah".phonetize
|
113
62
|
assert_equal "ML", "mehlia".phonetize
|
114
63
|
end
|
115
|
-
|
64
|
+
def test_find_in_collection_en
|
65
|
+
SoundCord.load_language 'en'
|
66
|
+
|
67
|
+
list = %w( mail male main Maine mane )
|
68
|
+
expected = %w( main Maine mane )
|
69
|
+
assert_equal expected, list.homophones("main")
|
70
|
+
end
|
71
|
+
end
|
@@ -4,14 +4,18 @@ require 'test/unit'
|
|
4
4
|
require 'soundcord'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
|
-
def
|
7
|
+
def test_simple_words_pt_br
|
8
|
+
SoundCord.load_language 'pt-BR'
|
9
|
+
|
8
10
|
assert_equal "J", "João".phonetize
|
9
11
|
assert_equal "MR", "Maria".phonetize
|
10
12
|
assert_equal "LM", "Helena".phonetize
|
11
13
|
assert_equal "VLM", "Valmir".phonetize
|
12
14
|
assert_equal "VLM", "Walmir".phonetize
|
13
15
|
end
|
14
|
-
def
|
16
|
+
def test_simple_comparisons_pt_br
|
17
|
+
SoundCord.load_language 'pt-BR'
|
18
|
+
|
15
19
|
assert_equal true, "Joao".homophone?("João")
|
16
20
|
assert_equal true, "Helena".homophone?("Elena")
|
17
21
|
assert_equal true, "Walmir".homophone?("Valmir")
|
@@ -25,14 +29,15 @@ class SoundCordTest < Test::Unit::TestCase
|
|
25
29
|
assert_equal true, "Phelipe".homophone?("Filipe")
|
26
30
|
assert_equal true, "Philippe".homophone?("Felipe")
|
27
31
|
end
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
def test_special_chars
|
32
|
+
def test_special_chars_pt_br
|
33
|
+
SoundCord.load_language 'pt-BR'
|
34
|
+
|
32
35
|
assert_equal true, "Luçia".homophone?("lucia")
|
33
36
|
assert_equal true, "Lúcio".homophone?("lucio")
|
34
37
|
end
|
35
|
-
def
|
38
|
+
def test_find_in_collection_pt_br
|
39
|
+
SoundCord.load_language 'pt-BR'
|
40
|
+
|
36
41
|
list = %w( saola paulo saulo ricardo sallo )
|
37
42
|
expected = %w( saola saulo sallo )
|
38
43
|
assert_equal expected, list.homophones("saulo")
|
@@ -40,4 +45,4 @@ class SoundCordTest < Test::Unit::TestCase
|
|
40
45
|
expected = %w( lucene luciana lussene )
|
41
46
|
assert_equal expected, list.homophones("lucene")
|
42
47
|
end
|
43
|
-
end
|
48
|
+
end
|
data/test/test_config.rb
CHANGED
@@ -1,15 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
require 'test/unit'
|
4
|
-
require 'config'
|
4
|
+
require 'soundcord/config'
|
5
5
|
|
6
6
|
class SoundCordTest < Test::Unit::TestCase
|
7
7
|
def test_language_set_up
|
8
|
-
|
9
|
-
|
8
|
+
%w(pt-BR en).each do |lang|
|
9
|
+
SoundCord.load_language lang
|
10
|
+
assert_equal lang, SoundCord.language
|
11
|
+
end
|
10
12
|
end
|
11
|
-
|
12
|
-
SoundCord.load_language "en"
|
13
|
-
assert_equal "en", SoundCord.language
|
14
|
-
end
|
15
|
-
end
|
13
|
+
end
|
metadata
CHANGED
@@ -1,58 +1,77 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: soundcord
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
5
5
|
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
- 1
|
10
|
+
version: 0.2.1
|
6
11
|
platform: ruby
|
7
|
-
authors:
|
12
|
+
authors:
|
8
13
|
- Lukas Alexandre
|
9
14
|
autorequire:
|
10
15
|
bindir: bin
|
11
16
|
cert_chain: []
|
12
|
-
|
17
|
+
|
18
|
+
date: 2012-10-24 00:00:00 Z
|
13
19
|
dependencies: []
|
14
|
-
|
20
|
+
|
21
|
+
description: "\"Make comparisons of phonetically similar terms easier.\""
|
15
22
|
email: lukeskytm@gmail.com
|
16
23
|
executables: []
|
24
|
+
|
17
25
|
extensions: []
|
26
|
+
|
18
27
|
extra_rdoc_files: []
|
19
|
-
|
20
|
-
|
21
|
-
- lib/
|
28
|
+
|
29
|
+
files:
|
30
|
+
- lib/soundcord/algorithm.rb
|
31
|
+
- lib/soundcord/config.rb
|
22
32
|
- lib/soundcord/integrations/array.rb
|
23
33
|
- lib/soundcord/integrations/string.rb
|
24
|
-
- lib/soundcord/version.rb
|
25
34
|
- lib/soundcord.rb
|
35
|
+
- test/languages/en/test_soundcord.rb
|
26
36
|
- test/languages/pt_br/test_soundcord.rb
|
27
37
|
- test/test_array.rb
|
28
38
|
- test/test_config.rb
|
29
39
|
- test/test_performance.rb
|
30
|
-
- test/test_soundcord.rb
|
31
40
|
- test/test_string.rb
|
32
41
|
- Rakefile
|
33
42
|
- soundcord.gemspec
|
34
43
|
homepage: http://lukasalexandre.github.com/soundcord
|
35
44
|
licenses: []
|
45
|
+
|
36
46
|
post_install_message:
|
37
47
|
rdoc_options: []
|
38
|
-
|
48
|
+
|
49
|
+
require_paths:
|
39
50
|
- lib
|
40
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
41
52
|
none: false
|
42
|
-
requirements:
|
43
|
-
- -
|
44
|
-
- !ruby/object:Gem::Version
|
45
|
-
|
46
|
-
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
47
61
|
none: false
|
48
|
-
requirements:
|
49
|
-
- -
|
50
|
-
- !ruby/object:Gem::Version
|
51
|
-
|
62
|
+
requirements:
|
63
|
+
- - ">="
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
hash: 3
|
66
|
+
segments:
|
67
|
+
- 0
|
68
|
+
version: "0"
|
52
69
|
requirements: []
|
70
|
+
|
53
71
|
rubyforge_project:
|
54
72
|
rubygems_version: 1.8.24
|
55
73
|
signing_key:
|
56
74
|
specification_version: 3
|
57
75
|
summary: A phonetic algorithm for indexing of words by their pronunciation.
|
58
76
|
test_files: []
|
77
|
+
|