soundcord 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/soundcord.rb +7 -3
- data/lib/soundcord/config.rb +2 -2
- data/lib/soundcord/regexable.rb +40 -0
- data/lib/soundcord/word.rb +96 -0
- data/soundcord.gemspec +3 -3
- metadata +20 -39
- data/lib/soundcord/algorithm.rb +0 -137
data/lib/soundcord.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require 'soundcord/algorithm'
|
4
3
|
require 'soundcord/config'
|
5
4
|
|
6
|
-
|
5
|
+
require 'soundcord/integrations/string'
|
6
|
+
require 'soundcord/integrations/array'
|
7
|
+
|
8
|
+
require 'soundcord/word'
|
9
|
+
|
10
|
+
module SoundCord
|
7
11
|
def self.phonetize text
|
8
|
-
|
12
|
+
Word.new(text).to_s
|
9
13
|
end
|
10
14
|
|
11
15
|
def self.compare term_1, term_2
|
data/lib/soundcord/config.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
|
-
|
3
|
+
module SoundCord
|
4
4
|
DEFAULT_LANGUAGE = 'pt-BR'
|
5
5
|
LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
|
6
6
|
|
@@ -8,7 +8,7 @@ class SoundCord
|
|
8
8
|
attr_reader :language, :options
|
9
9
|
end
|
10
10
|
|
11
|
-
def self.load_language
|
11
|
+
def self.load_language(lang = DEFAULT_LANGUAGE)
|
12
12
|
@language = lang
|
13
13
|
@lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
|
14
14
|
@options = { :use_vowels => false }
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Regexable
|
2
|
+
def mount_regexp sentence, options = { :terminations => false, :initiations => false }
|
3
|
+
regexp = "/"
|
4
|
+
regexp += "^" if options[:initiations]
|
5
|
+
regexp += "("
|
6
|
+
regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
|
7
|
+
regexp += ")"
|
8
|
+
regexp += "\\b" if options[:terminations]
|
9
|
+
regexp += "/"
|
10
|
+
eval(regexp)
|
11
|
+
end
|
12
|
+
|
13
|
+
def mount_follow_up_regexp prefix, sufix, options = {}
|
14
|
+
regexp = options[:not_eval] ? "" : "/"
|
15
|
+
regexp += prefix
|
16
|
+
regexp += "(?="
|
17
|
+
regexp += "("
|
18
|
+
regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
|
19
|
+
regexp += "))"
|
20
|
+
regexp += "/" unless options[:not_eval]
|
21
|
+
options[:not_eval] ? regexp : eval(regexp)
|
22
|
+
end
|
23
|
+
|
24
|
+
def mount_second_followed_by_regexp char, group
|
25
|
+
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
26
|
+
eval regexp
|
27
|
+
end
|
28
|
+
|
29
|
+
def mount_vowels_pronunciation_insignificance_regexp char
|
30
|
+
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
31
|
+
end
|
32
|
+
|
33
|
+
def mount_followed_by_consonant_regexp char
|
34
|
+
eval "[#{char}](?![aeiou])"
|
35
|
+
end
|
36
|
+
|
37
|
+
def not_first char
|
38
|
+
"([^#{char}]|^)"
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require_relative 'regexable'
|
2
|
+
|
3
|
+
class SoundCord::Word < Struct.new(:original, :homophone)
|
4
|
+
include Regexable
|
5
|
+
|
6
|
+
def to_s
|
7
|
+
self.homophone or self.process_text
|
8
|
+
end
|
9
|
+
|
10
|
+
protected
|
11
|
+
def process_text
|
12
|
+
SoundCord.load_language unless SoundCord.language
|
13
|
+
|
14
|
+
self.homophone = original.downcase
|
15
|
+
|
16
|
+
SoundCord.lang_yml.each do |key, values|
|
17
|
+
if key == 'terminations'
|
18
|
+
process_group! values, :terminations => true
|
19
|
+
elsif key == 'initiations'
|
20
|
+
process_group! values, :initiations => true
|
21
|
+
elsif key == 'follow_ups'
|
22
|
+
process_follow_ups! values, SoundCord.options
|
23
|
+
elsif key == 'second_followed'
|
24
|
+
process_second_followed! values, SoundCord.options
|
25
|
+
elsif key == 'vowels_pronunciation_insignificance'
|
26
|
+
process_vowels_pronunciation_insignificance! values, SoundCord.options
|
27
|
+
elsif !key.include? 'duplicate'
|
28
|
+
process_group! values, SoundCord.options
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
remove_duplicity! :duplicate_exceptions => (SoundCord.lang_yml['duplicate_exceptions'])
|
33
|
+
|
34
|
+
self.homophone.upcase
|
35
|
+
end
|
36
|
+
|
37
|
+
def remove_duplicity!(options)
|
38
|
+
options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
|
39
|
+
|
40
|
+
self.homophone = self.homophone.split(//).inject("") do |s, n|
|
41
|
+
last_s_char = s[s.length-1..s.length-1]
|
42
|
+
s + ((last_s_char === n and
|
43
|
+
!options[:duplicate_exceptions].include?(n)) ? '' : n )
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def process_group! group, options
|
48
|
+
group.each do |key, values|
|
49
|
+
if values
|
50
|
+
simple_replace! key, values, options
|
51
|
+
else
|
52
|
+
simple_replace! '', key, options
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def process_follow_ups! group, options = {}
|
58
|
+
group.each do |key, prefixes|
|
59
|
+
prefixes.each do |prefix, sufixes|
|
60
|
+
regexp = mount_follow_up_regexp prefix, sufixes
|
61
|
+
self.homophone.gsub! regexp, key
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def process_second_followed! group, options = {}
|
67
|
+
group.each do |key, prefixes|
|
68
|
+
prefixes.each do |prefix, sufixes|
|
69
|
+
regexp = mount_second_followed_by_regexp prefix, sufixes
|
70
|
+
self.homophone =~ regexp
|
71
|
+
replacing = ($1 ? $1 : '') + key
|
72
|
+
self.homophone.gsub! regexp, replacing
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def process_vowels_pronunciation_insignificance! group, options = {}
|
78
|
+
group.each do |key, value|
|
79
|
+
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
80
|
+
self.homophone =~ regexp
|
81
|
+
self.homophone.gsub! regexp, ($1 || '')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def process_followed_by_consonant_regexp! group
|
86
|
+
group.each do |key, value|
|
87
|
+
regexp = mount_followed_by_consonant_regexp value
|
88
|
+
self.homophone.gsub! regexp, ''
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def simple_replace! key, values, options
|
93
|
+
regexp = mount_regexp values, options
|
94
|
+
self.homophone.gsub! regexp, key.to_s
|
95
|
+
end
|
96
|
+
end
|
data/soundcord.gemspec
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = %q{soundcord}
|
3
3
|
s.author = 'Lukas Alexandre'
|
4
|
-
s.email = '
|
5
|
-
s.homepage = 'http://
|
6
|
-
s.version = "0.
|
4
|
+
s.email = 'lukasalexandre@me.com'
|
5
|
+
s.homepage = 'http://lukelex.github.com/soundcord'
|
6
|
+
s.version = "0.3.0"
|
7
7
|
s.date = Date.today
|
8
8
|
s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
|
9
9
|
s.description = %q{"Make comparisons of phonetically similar terms easier."}
|
metadata
CHANGED
@@ -1,36 +1,27 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: soundcord
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 1
|
10
|
-
version: 0.2.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Lukas Alexandre
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2012-10-24 00:00:00 Z
|
12
|
+
date: 2013-02-05 00:00:00.000000000 Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
14
|
description: "\"Make comparisons of phonetically similar terms easier.\""
|
22
|
-
email:
|
15
|
+
email: lukasalexandre@me.com
|
23
16
|
executables: []
|
24
|
-
|
25
17
|
extensions: []
|
26
|
-
|
27
18
|
extra_rdoc_files: []
|
28
|
-
|
29
|
-
files:
|
30
|
-
- lib/soundcord/algorithm.rb
|
19
|
+
files:
|
31
20
|
- lib/soundcord/config.rb
|
32
21
|
- lib/soundcord/integrations/array.rb
|
33
22
|
- lib/soundcord/integrations/string.rb
|
23
|
+
- lib/soundcord/regexable.rb
|
24
|
+
- lib/soundcord/word.rb
|
34
25
|
- lib/soundcord.rb
|
35
26
|
- test/languages/en/test_soundcord.rb
|
36
27
|
- test/languages/pt_br/test_soundcord.rb
|
@@ -40,38 +31,28 @@ files:
|
|
40
31
|
- test/test_string.rb
|
41
32
|
- Rakefile
|
42
33
|
- soundcord.gemspec
|
43
|
-
homepage: http://
|
34
|
+
homepage: http://lukelex.github.com/soundcord
|
44
35
|
licenses: []
|
45
|
-
|
46
36
|
post_install_message:
|
47
37
|
rdoc_options: []
|
48
|
-
|
49
|
-
require_paths:
|
38
|
+
require_paths:
|
50
39
|
- lib
|
51
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
41
|
none: false
|
53
|
-
requirements:
|
42
|
+
requirements:
|
54
43
|
- - ">="
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
|
58
|
-
- 0
|
59
|
-
version: "0"
|
60
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
47
|
none: false
|
62
|
-
requirements:
|
48
|
+
requirements:
|
63
49
|
- - ">="
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
|
66
|
-
segments:
|
67
|
-
- 0
|
68
|
-
version: "0"
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
69
52
|
requirements: []
|
70
|
-
|
71
53
|
rubyforge_project:
|
72
|
-
rubygems_version: 1.8.
|
54
|
+
rubygems_version: 1.8.25
|
73
55
|
signing_key:
|
74
56
|
specification_version: 3
|
75
57
|
summary: A phonetic algorithm for indexing of words by their pronunciation.
|
76
58
|
test_files: []
|
77
|
-
|
data/lib/soundcord/algorithm.rb
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'soundcord/integrations/string'
|
4
|
-
require 'soundcord/integrations/array'
|
5
|
-
|
6
|
-
class SoundCord
|
7
|
-
private
|
8
|
-
def self.process_text text
|
9
|
-
load_language unless language
|
10
|
-
|
11
|
-
text = text.downcase
|
12
|
-
|
13
|
-
lang_yml.each do |key, values|
|
14
|
-
if key == 'terminations'
|
15
|
-
text = process_group text, values, :terminations => true
|
16
|
-
elsif key == 'initiations'
|
17
|
-
text = process_group text, values, :initiations => true
|
18
|
-
elsif key == 'follow_ups'
|
19
|
-
text = process_follow_ups text, values, options
|
20
|
-
elsif key == 'second_followed'
|
21
|
-
text = process_second_followed text, values, options
|
22
|
-
elsif key == 'vowels_pronunciation_insignificance'
|
23
|
-
text = process_vowels_pronunciation_insignificance text, values, options
|
24
|
-
elsif !key.include? 'duplicate'
|
25
|
-
text = process_group text, values, options
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
|
30
|
-
|
31
|
-
text.upcase
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.remove_duplicity text, options
|
35
|
-
options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
|
36
|
-
|
37
|
-
text.split(//).inject("") do |s, n|
|
38
|
-
last_s_char = s[s.length-1..s.length-1]
|
39
|
-
s + ((last_s_char === n &&
|
40
|
-
!options[:duplicate_exceptions].include?(n)) ? '' : n )
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.process_group text, group, options
|
45
|
-
group.each do |key, values|
|
46
|
-
if values
|
47
|
-
text = simple_replace text, key, values, options
|
48
|
-
else
|
49
|
-
text = simple_replace text, '', key, options
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return text
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.process_follow_ups text, group, options = {}
|
56
|
-
group.each do |key, prefixes|
|
57
|
-
prefixes.each do |prefix, sufixes|
|
58
|
-
regexp = mount_follow_up_regexp prefix, sufixes
|
59
|
-
text = text.gsub regexp, key
|
60
|
-
end
|
61
|
-
end
|
62
|
-
return text
|
63
|
-
end
|
64
|
-
|
65
|
-
def self.process_second_followed text, group, options = {}
|
66
|
-
group.each do |key, prefixes|
|
67
|
-
prefixes.each do |prefix, sufixes|
|
68
|
-
regexp = mount_second_followed_by_regexp prefix, sufixes
|
69
|
-
text =~ regexp
|
70
|
-
replacing = ($1 ? $1 : '') + key
|
71
|
-
text = text.gsub regexp, replacing
|
72
|
-
end
|
73
|
-
end
|
74
|
-
return text
|
75
|
-
end
|
76
|
-
|
77
|
-
def self.process_vowels_pronunciation_insignificance text, group, options = {}
|
78
|
-
group.each do |key, value|
|
79
|
-
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
80
|
-
text =~ regexp
|
81
|
-
text = text.gsub regexp, ($1 || '')
|
82
|
-
end
|
83
|
-
return text
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.process_followed_by_consonant_regexp text, group
|
87
|
-
group.each do |key, value|
|
88
|
-
regexp = mount_followed_by_consonant_regexp value
|
89
|
-
text = text.gsub regexp, ''
|
90
|
-
end
|
91
|
-
return text
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.simple_replace text, key, values, options
|
95
|
-
regexp = mount_regexp values, options
|
96
|
-
text.gsub regexp, key.to_s
|
97
|
-
end
|
98
|
-
|
99
|
-
def self.mount_regexp sentence, options = { :terminations => false, :initiations => false }
|
100
|
-
regexp = "/"
|
101
|
-
regexp += "^" if options[:initiations]
|
102
|
-
regexp += "("
|
103
|
-
regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
|
104
|
-
regexp += ")"
|
105
|
-
regexp += "\\b" if options[:terminations]
|
106
|
-
regexp += "/"
|
107
|
-
eval(regexp)
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.mount_follow_up_regexp prefix, sufix, options = {}
|
111
|
-
regexp = options[:not_eval] ? "" : "/"
|
112
|
-
regexp += prefix
|
113
|
-
regexp += "(?="
|
114
|
-
regexp += "("
|
115
|
-
regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
|
116
|
-
regexp += "))"
|
117
|
-
regexp += "/" unless options[:not_eval]
|
118
|
-
options[:not_eval] ? regexp : eval(regexp)
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.mount_second_followed_by_regexp char, group
|
122
|
-
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
123
|
-
eval regexp
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.mount_vowels_pronunciation_insignificance_regexp char
|
127
|
-
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
128
|
-
end
|
129
|
-
|
130
|
-
def self.mount_followed_by_consonant_regexp char
|
131
|
-
eval "[#{char}](?![aeiou])"
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.not_first char
|
135
|
-
"([^#{char}]|^)"
|
136
|
-
end
|
137
|
-
end
|