soundcord 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/soundcord.rb +7 -3
- data/lib/soundcord/config.rb +2 -2
- data/lib/soundcord/regexable.rb +40 -0
- data/lib/soundcord/word.rb +96 -0
- data/soundcord.gemspec +3 -3
- metadata +20 -39
- data/lib/soundcord/algorithm.rb +0 -137
data/lib/soundcord.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require 'soundcord/algorithm'
|
4
3
|
require 'soundcord/config'
|
5
4
|
|
6
|
-
|
5
|
+
require 'soundcord/integrations/string'
|
6
|
+
require 'soundcord/integrations/array'
|
7
|
+
|
8
|
+
require 'soundcord/word'
|
9
|
+
|
10
|
+
module SoundCord
|
7
11
|
def self.phonetize text
|
8
|
-
|
12
|
+
Word.new(text).to_s
|
9
13
|
end
|
10
14
|
|
11
15
|
def self.compare term_1, term_2
|
data/lib/soundcord/config.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
|
3
|
-
|
3
|
+
module SoundCord
|
4
4
|
DEFAULT_LANGUAGE = 'pt-BR'
|
5
5
|
LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
|
6
6
|
|
@@ -8,7 +8,7 @@ class SoundCord
|
|
8
8
|
attr_reader :language, :options
|
9
9
|
end
|
10
10
|
|
11
|
-
def self.load_language
|
11
|
+
def self.load_language(lang = DEFAULT_LANGUAGE)
|
12
12
|
@language = lang
|
13
13
|
@lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
|
14
14
|
@options = { :use_vowels => false }
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Regexable
|
2
|
+
def mount_regexp sentence, options = { :terminations => false, :initiations => false }
|
3
|
+
regexp = "/"
|
4
|
+
regexp += "^" if options[:initiations]
|
5
|
+
regexp += "("
|
6
|
+
regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
|
7
|
+
regexp += ")"
|
8
|
+
regexp += "\\b" if options[:terminations]
|
9
|
+
regexp += "/"
|
10
|
+
eval(regexp)
|
11
|
+
end
|
12
|
+
|
13
|
+
def mount_follow_up_regexp prefix, sufix, options = {}
|
14
|
+
regexp = options[:not_eval] ? "" : "/"
|
15
|
+
regexp += prefix
|
16
|
+
regexp += "(?="
|
17
|
+
regexp += "("
|
18
|
+
regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
|
19
|
+
regexp += "))"
|
20
|
+
regexp += "/" unless options[:not_eval]
|
21
|
+
options[:not_eval] ? regexp : eval(regexp)
|
22
|
+
end
|
23
|
+
|
24
|
+
def mount_second_followed_by_regexp char, group
|
25
|
+
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
26
|
+
eval regexp
|
27
|
+
end
|
28
|
+
|
29
|
+
def mount_vowels_pronunciation_insignificance_regexp char
|
30
|
+
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
31
|
+
end
|
32
|
+
|
33
|
+
def mount_followed_by_consonant_regexp char
|
34
|
+
eval "[#{char}](?![aeiou])"
|
35
|
+
end
|
36
|
+
|
37
|
+
def not_first char
|
38
|
+
"([^#{char}]|^)"
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
require_relative 'regexable'
|
2
|
+
|
3
|
+
class SoundCord::Word < Struct.new(:original, :homophone)
|
4
|
+
include Regexable
|
5
|
+
|
6
|
+
def to_s
|
7
|
+
self.homophone or self.process_text
|
8
|
+
end
|
9
|
+
|
10
|
+
protected
|
11
|
+
def process_text
|
12
|
+
SoundCord.load_language unless SoundCord.language
|
13
|
+
|
14
|
+
self.homophone = original.downcase
|
15
|
+
|
16
|
+
SoundCord.lang_yml.each do |key, values|
|
17
|
+
if key == 'terminations'
|
18
|
+
process_group! values, :terminations => true
|
19
|
+
elsif key == 'initiations'
|
20
|
+
process_group! values, :initiations => true
|
21
|
+
elsif key == 'follow_ups'
|
22
|
+
process_follow_ups! values, SoundCord.options
|
23
|
+
elsif key == 'second_followed'
|
24
|
+
process_second_followed! values, SoundCord.options
|
25
|
+
elsif key == 'vowels_pronunciation_insignificance'
|
26
|
+
process_vowels_pronunciation_insignificance! values, SoundCord.options
|
27
|
+
elsif !key.include? 'duplicate'
|
28
|
+
process_group! values, SoundCord.options
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
remove_duplicity! :duplicate_exceptions => (SoundCord.lang_yml['duplicate_exceptions'])
|
33
|
+
|
34
|
+
self.homophone.upcase
|
35
|
+
end
|
36
|
+
|
37
|
+
def remove_duplicity!(options)
|
38
|
+
options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
|
39
|
+
|
40
|
+
self.homophone = self.homophone.split(//).inject("") do |s, n|
|
41
|
+
last_s_char = s[s.length-1..s.length-1]
|
42
|
+
s + ((last_s_char === n and
|
43
|
+
!options[:duplicate_exceptions].include?(n)) ? '' : n )
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def process_group! group, options
|
48
|
+
group.each do |key, values|
|
49
|
+
if values
|
50
|
+
simple_replace! key, values, options
|
51
|
+
else
|
52
|
+
simple_replace! '', key, options
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def process_follow_ups! group, options = {}
|
58
|
+
group.each do |key, prefixes|
|
59
|
+
prefixes.each do |prefix, sufixes|
|
60
|
+
regexp = mount_follow_up_regexp prefix, sufixes
|
61
|
+
self.homophone.gsub! regexp, key
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def process_second_followed! group, options = {}
|
67
|
+
group.each do |key, prefixes|
|
68
|
+
prefixes.each do |prefix, sufixes|
|
69
|
+
regexp = mount_second_followed_by_regexp prefix, sufixes
|
70
|
+
self.homophone =~ regexp
|
71
|
+
replacing = ($1 ? $1 : '') + key
|
72
|
+
self.homophone.gsub! regexp, replacing
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def process_vowels_pronunciation_insignificance! group, options = {}
|
78
|
+
group.each do |key, value|
|
79
|
+
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
80
|
+
self.homophone =~ regexp
|
81
|
+
self.homophone.gsub! regexp, ($1 || '')
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def process_followed_by_consonant_regexp! group
|
86
|
+
group.each do |key, value|
|
87
|
+
regexp = mount_followed_by_consonant_regexp value
|
88
|
+
self.homophone.gsub! regexp, ''
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def simple_replace! key, values, options
|
93
|
+
regexp = mount_regexp values, options
|
94
|
+
self.homophone.gsub! regexp, key.to_s
|
95
|
+
end
|
96
|
+
end
|
data/soundcord.gemspec
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = %q{soundcord}
|
3
3
|
s.author = 'Lukas Alexandre'
|
4
|
-
s.email = '
|
5
|
-
s.homepage = 'http://
|
6
|
-
s.version = "0.
|
4
|
+
s.email = 'lukasalexandre@me.com'
|
5
|
+
s.homepage = 'http://lukelex.github.com/soundcord'
|
6
|
+
s.version = "0.3.0"
|
7
7
|
s.date = Date.today
|
8
8
|
s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
|
9
9
|
s.description = %q{"Make comparisons of phonetically similar terms easier."}
|
metadata
CHANGED
@@ -1,36 +1,27 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: soundcord
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 2
|
9
|
-
- 1
|
10
|
-
version: 0.2.1
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Lukas Alexandre
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
date: 2012-10-24 00:00:00 Z
|
12
|
+
date: 2013-02-05 00:00:00.000000000 Z
|
19
13
|
dependencies: []
|
20
|
-
|
21
14
|
description: "\"Make comparisons of phonetically similar terms easier.\""
|
22
|
-
email:
|
15
|
+
email: lukasalexandre@me.com
|
23
16
|
executables: []
|
24
|
-
|
25
17
|
extensions: []
|
26
|
-
|
27
18
|
extra_rdoc_files: []
|
28
|
-
|
29
|
-
files:
|
30
|
-
- lib/soundcord/algorithm.rb
|
19
|
+
files:
|
31
20
|
- lib/soundcord/config.rb
|
32
21
|
- lib/soundcord/integrations/array.rb
|
33
22
|
- lib/soundcord/integrations/string.rb
|
23
|
+
- lib/soundcord/regexable.rb
|
24
|
+
- lib/soundcord/word.rb
|
34
25
|
- lib/soundcord.rb
|
35
26
|
- test/languages/en/test_soundcord.rb
|
36
27
|
- test/languages/pt_br/test_soundcord.rb
|
@@ -40,38 +31,28 @@ files:
|
|
40
31
|
- test/test_string.rb
|
41
32
|
- Rakefile
|
42
33
|
- soundcord.gemspec
|
43
|
-
homepage: http://
|
34
|
+
homepage: http://lukelex.github.com/soundcord
|
44
35
|
licenses: []
|
45
|
-
|
46
36
|
post_install_message:
|
47
37
|
rdoc_options: []
|
48
|
-
|
49
|
-
require_paths:
|
38
|
+
require_paths:
|
50
39
|
- lib
|
51
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
40
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
41
|
none: false
|
53
|
-
requirements:
|
42
|
+
requirements:
|
54
43
|
- - ">="
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
|
58
|
-
- 0
|
59
|
-
version: "0"
|
60
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
47
|
none: false
|
62
|
-
requirements:
|
48
|
+
requirements:
|
63
49
|
- - ">="
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
|
66
|
-
segments:
|
67
|
-
- 0
|
68
|
-
version: "0"
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
69
52
|
requirements: []
|
70
|
-
|
71
53
|
rubyforge_project:
|
72
|
-
rubygems_version: 1.8.
|
54
|
+
rubygems_version: 1.8.25
|
73
55
|
signing_key:
|
74
56
|
specification_version: 3
|
75
57
|
summary: A phonetic algorithm for indexing of words by their pronunciation.
|
76
58
|
test_files: []
|
77
|
-
|
data/lib/soundcord/algorithm.rb
DELETED
@@ -1,137 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require 'soundcord/integrations/string'
|
4
|
-
require 'soundcord/integrations/array'
|
5
|
-
|
6
|
-
class SoundCord
|
7
|
-
private
|
8
|
-
def self.process_text text
|
9
|
-
load_language unless language
|
10
|
-
|
11
|
-
text = text.downcase
|
12
|
-
|
13
|
-
lang_yml.each do |key, values|
|
14
|
-
if key == 'terminations'
|
15
|
-
text = process_group text, values, :terminations => true
|
16
|
-
elsif key == 'initiations'
|
17
|
-
text = process_group text, values, :initiations => true
|
18
|
-
elsif key == 'follow_ups'
|
19
|
-
text = process_follow_ups text, values, options
|
20
|
-
elsif key == 'second_followed'
|
21
|
-
text = process_second_followed text, values, options
|
22
|
-
elsif key == 'vowels_pronunciation_insignificance'
|
23
|
-
text = process_vowels_pronunciation_insignificance text, values, options
|
24
|
-
elsif !key.include? 'duplicate'
|
25
|
-
text = process_group text, values, options
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
|
30
|
-
|
31
|
-
text.upcase
|
32
|
-
end
|
33
|
-
|
34
|
-
def self.remove_duplicity text, options
|
35
|
-
options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
|
36
|
-
|
37
|
-
text.split(//).inject("") do |s, n|
|
38
|
-
last_s_char = s[s.length-1..s.length-1]
|
39
|
-
s + ((last_s_char === n &&
|
40
|
-
!options[:duplicate_exceptions].include?(n)) ? '' : n )
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.process_group text, group, options
|
45
|
-
group.each do |key, values|
|
46
|
-
if values
|
47
|
-
text = simple_replace text, key, values, options
|
48
|
-
else
|
49
|
-
text = simple_replace text, '', key, options
|
50
|
-
end
|
51
|
-
end
|
52
|
-
return text
|
53
|
-
end
|
54
|
-
|
55
|
-
def self.process_follow_ups text, group, options = {}
|
56
|
-
group.each do |key, prefixes|
|
57
|
-
prefixes.each do |prefix, sufixes|
|
58
|
-
regexp = mount_follow_up_regexp prefix, sufixes
|
59
|
-
text = text.gsub regexp, key
|
60
|
-
end
|
61
|
-
end
|
62
|
-
return text
|
63
|
-
end
|
64
|
-
|
65
|
-
def self.process_second_followed text, group, options = {}
|
66
|
-
group.each do |key, prefixes|
|
67
|
-
prefixes.each do |prefix, sufixes|
|
68
|
-
regexp = mount_second_followed_by_regexp prefix, sufixes
|
69
|
-
text =~ regexp
|
70
|
-
replacing = ($1 ? $1 : '') + key
|
71
|
-
text = text.gsub regexp, replacing
|
72
|
-
end
|
73
|
-
end
|
74
|
-
return text
|
75
|
-
end
|
76
|
-
|
77
|
-
def self.process_vowels_pronunciation_insignificance text, group, options = {}
|
78
|
-
group.each do |key, value|
|
79
|
-
regexp = mount_vowels_pronunciation_insignificance_regexp key
|
80
|
-
text =~ regexp
|
81
|
-
text = text.gsub regexp, ($1 || '')
|
82
|
-
end
|
83
|
-
return text
|
84
|
-
end
|
85
|
-
|
86
|
-
def self.process_followed_by_consonant_regexp text, group
|
87
|
-
group.each do |key, value|
|
88
|
-
regexp = mount_followed_by_consonant_regexp value
|
89
|
-
text = text.gsub regexp, ''
|
90
|
-
end
|
91
|
-
return text
|
92
|
-
end
|
93
|
-
|
94
|
-
def self.simple_replace text, key, values, options
|
95
|
-
regexp = mount_regexp values, options
|
96
|
-
text.gsub regexp, key.to_s
|
97
|
-
end
|
98
|
-
|
99
|
-
def self.mount_regexp sentence, options = { :terminations => false, :initiations => false }
|
100
|
-
regexp = "/"
|
101
|
-
regexp += "^" if options[:initiations]
|
102
|
-
regexp += "("
|
103
|
-
regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
|
104
|
-
regexp += ")"
|
105
|
-
regexp += "\\b" if options[:terminations]
|
106
|
-
regexp += "/"
|
107
|
-
eval(regexp)
|
108
|
-
end
|
109
|
-
|
110
|
-
def self.mount_follow_up_regexp prefix, sufix, options = {}
|
111
|
-
regexp = options[:not_eval] ? "" : "/"
|
112
|
-
regexp += prefix
|
113
|
-
regexp += "(?="
|
114
|
-
regexp += "("
|
115
|
-
regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
|
116
|
-
regexp += "))"
|
117
|
-
regexp += "/" unless options[:not_eval]
|
118
|
-
options[:not_eval] ? regexp : eval(regexp)
|
119
|
-
end
|
120
|
-
|
121
|
-
def self.mount_second_followed_by_regexp char, group
|
122
|
-
regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
|
123
|
-
eval regexp
|
124
|
-
end
|
125
|
-
|
126
|
-
def self.mount_vowels_pronunciation_insignificance_regexp char
|
127
|
-
eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
|
128
|
-
end
|
129
|
-
|
130
|
-
def self.mount_followed_by_consonant_regexp char
|
131
|
-
eval "[#{char}](?![aeiou])"
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.not_first char
|
135
|
-
"([^#{char}]|^)"
|
136
|
-
end
|
137
|
-
end
|