soundcord 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,11 +1,15 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'soundcord/algorithm'
4
3
  require 'soundcord/config'
5
4
 
6
- class SoundCord
5
+ require 'soundcord/integrations/string'
6
+ require 'soundcord/integrations/array'
7
+
8
+ require 'soundcord/word'
9
+
10
+ module SoundCord
7
11
  def self.phonetize text
8
- process_text text
12
+ Word.new(text).to_s
9
13
  end
10
14
 
11
15
  def self.compare term_1, term_2
@@ -1,6 +1,6 @@
1
1
  require 'yaml'
2
2
 
3
- class SoundCord
3
+ module SoundCord
4
4
  DEFAULT_LANGUAGE = 'pt-BR'
5
5
  LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
6
6
 
@@ -8,7 +8,7 @@ class SoundCord
8
8
  attr_reader :language, :options
9
9
  end
10
10
 
11
- def self.load_language lang = DEFAULT_LANGUAGE
11
+ def self.load_language(lang = DEFAULT_LANGUAGE)
12
12
  @language = lang
13
13
  @lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
14
14
  @options = { :use_vowels => false }
@@ -0,0 +1,40 @@
1
+ module Regexable
2
+ def mount_regexp sentence, options = { :terminations => false, :initiations => false }
3
+ regexp = "/"
4
+ regexp += "^" if options[:initiations]
5
+ regexp += "("
6
+ regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
7
+ regexp += ")"
8
+ regexp += "\\b" if options[:terminations]
9
+ regexp += "/"
10
+ eval(regexp)
11
+ end
12
+
13
+ def mount_follow_up_regexp prefix, sufix, options = {}
14
+ regexp = options[:not_eval] ? "" : "/"
15
+ regexp += prefix
16
+ regexp += "(?="
17
+ regexp += "("
18
+ regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
19
+ regexp += "))"
20
+ regexp += "/" unless options[:not_eval]
21
+ options[:not_eval] ? regexp : eval(regexp)
22
+ end
23
+
24
+ def mount_second_followed_by_regexp char, group
25
+ regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
26
+ eval regexp
27
+ end
28
+
29
+ def mount_vowels_pronunciation_insignificance_regexp char
30
+ eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
31
+ end
32
+
33
+ def mount_followed_by_consonant_regexp char
34
+ eval "[#{char}](?![aeiou])"
35
+ end
36
+
37
+ def not_first char
38
+ "([^#{char}]|^)"
39
+ end
40
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'regexable'
2
+
3
+ class SoundCord::Word < Struct.new(:original, :homophone)
4
+ include Regexable
5
+
6
+ def to_s
7
+ self.homophone or self.process_text
8
+ end
9
+
10
+ protected
11
+ def process_text
12
+ SoundCord.load_language unless SoundCord.language
13
+
14
+ self.homophone = original.downcase
15
+
16
+ SoundCord.lang_yml.each do |key, values|
17
+ if key == 'terminations'
18
+ process_group! values, :terminations => true
19
+ elsif key == 'initiations'
20
+ process_group! values, :initiations => true
21
+ elsif key == 'follow_ups'
22
+ process_follow_ups! values, SoundCord.options
23
+ elsif key == 'second_followed'
24
+ process_second_followed! values, SoundCord.options
25
+ elsif key == 'vowels_pronunciation_insignificance'
26
+ process_vowels_pronunciation_insignificance! values, SoundCord.options
27
+ elsif !key.include? 'duplicate'
28
+ process_group! values, SoundCord.options
29
+ end
30
+ end
31
+
32
+ remove_duplicity! :duplicate_exceptions => (SoundCord.lang_yml['duplicate_exceptions'])
33
+
34
+ self.homophone.upcase
35
+ end
36
+
37
+ def remove_duplicity!(options)
38
+ options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
39
+
40
+ self.homophone = self.homophone.split(//).inject("") do |s, n|
41
+ last_s_char = s[s.length-1..s.length-1]
42
+ s + ((last_s_char === n and
43
+ !options[:duplicate_exceptions].include?(n)) ? '' : n )
44
+ end
45
+ end
46
+
47
+ def process_group! group, options
48
+ group.each do |key, values|
49
+ if values
50
+ simple_replace! key, values, options
51
+ else
52
+ simple_replace! '', key, options
53
+ end
54
+ end
55
+ end
56
+
57
+ def process_follow_ups! group, options = {}
58
+ group.each do |key, prefixes|
59
+ prefixes.each do |prefix, sufixes|
60
+ regexp = mount_follow_up_regexp prefix, sufixes
61
+ self.homophone.gsub! regexp, key
62
+ end
63
+ end
64
+ end
65
+
66
+ def process_second_followed! group, options = {}
67
+ group.each do |key, prefixes|
68
+ prefixes.each do |prefix, sufixes|
69
+ regexp = mount_second_followed_by_regexp prefix, sufixes
70
+ self.homophone =~ regexp
71
+ replacing = ($1 ? $1 : '') + key
72
+ self.homophone.gsub! regexp, replacing
73
+ end
74
+ end
75
+ end
76
+
77
+ def process_vowels_pronunciation_insignificance! group, options = {}
78
+ group.each do |key, value|
79
+ regexp = mount_vowels_pronunciation_insignificance_regexp key
80
+ self.homophone =~ regexp
81
+ self.homophone.gsub! regexp, ($1 || '')
82
+ end
83
+ end
84
+
85
+ def process_followed_by_consonant_regexp! group
86
+ group.each do |key, value|
87
+ regexp = mount_followed_by_consonant_regexp value
88
+ self.homophone.gsub! regexp, ''
89
+ end
90
+ end
91
+
92
+ def simple_replace! key, values, options
93
+ regexp = mount_regexp values, options
94
+ self.homophone.gsub! regexp, key.to_s
95
+ end
96
+ end
@@ -1,9 +1,9 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = %q{soundcord}
3
3
  s.author = 'Lukas Alexandre'
4
- s.email = 'lukeskytm@gmail.com'
5
- s.homepage = 'http://lukasalexandre.github.com/soundcord'
6
- s.version = "0.2.1"
4
+ s.email = 'lukasalexandre@me.com'
5
+ s.homepage = 'http://lukelex.github.com/soundcord'
6
+ s.version = "0.3.0"
7
7
  s.date = Date.today
8
8
  s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
9
9
  s.description = %q{"Make comparisons of phonetically similar terms easier."}
metadata CHANGED
@@ -1,36 +1,27 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: soundcord
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Lukas Alexandre
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-10-24 00:00:00 Z
12
+ date: 2013-02-05 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: "\"Make comparisons of phonetically similar terms easier.\""
22
- email: lukeskytm@gmail.com
15
+ email: lukasalexandre@me.com
23
16
  executables: []
24
-
25
17
  extensions: []
26
-
27
18
  extra_rdoc_files: []
28
-
29
- files:
30
- - lib/soundcord/algorithm.rb
19
+ files:
31
20
  - lib/soundcord/config.rb
32
21
  - lib/soundcord/integrations/array.rb
33
22
  - lib/soundcord/integrations/string.rb
23
+ - lib/soundcord/regexable.rb
24
+ - lib/soundcord/word.rb
34
25
  - lib/soundcord.rb
35
26
  - test/languages/en/test_soundcord.rb
36
27
  - test/languages/pt_br/test_soundcord.rb
@@ -40,38 +31,28 @@ files:
40
31
  - test/test_string.rb
41
32
  - Rakefile
42
33
  - soundcord.gemspec
43
- homepage: http://lukasalexandre.github.com/soundcord
34
+ homepage: http://lukelex.github.com/soundcord
44
35
  licenses: []
45
-
46
36
  post_install_message:
47
37
  rdoc_options: []
48
-
49
- require_paths:
38
+ require_paths:
50
39
  - lib
51
- required_ruby_version: !ruby/object:Gem::Requirement
40
+ required_ruby_version: !ruby/object:Gem::Requirement
52
41
  none: false
53
- requirements:
42
+ requirements:
54
43
  - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
60
- required_rubygems_version: !ruby/object:Gem::Requirement
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
47
  none: false
62
- requirements:
48
+ requirements:
63
49
  - - ">="
64
- - !ruby/object:Gem::Version
65
- hash: 3
66
- segments:
67
- - 0
68
- version: "0"
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
69
52
  requirements: []
70
-
71
53
  rubyforge_project:
72
- rubygems_version: 1.8.24
54
+ rubygems_version: 1.8.25
73
55
  signing_key:
74
56
  specification_version: 3
75
57
  summary: A phonetic algorithm for indexing of words by their pronunciation.
76
58
  test_files: []
77
-
@@ -1,137 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'soundcord/integrations/string'
4
- require 'soundcord/integrations/array'
5
-
6
- class SoundCord
7
- private
8
- def self.process_text text
9
- load_language unless language
10
-
11
- text = text.downcase
12
-
13
- lang_yml.each do |key, values|
14
- if key == 'terminations'
15
- text = process_group text, values, :terminations => true
16
- elsif key == 'initiations'
17
- text = process_group text, values, :initiations => true
18
- elsif key == 'follow_ups'
19
- text = process_follow_ups text, values, options
20
- elsif key == 'second_followed'
21
- text = process_second_followed text, values, options
22
- elsif key == 'vowels_pronunciation_insignificance'
23
- text = process_vowels_pronunciation_insignificance text, values, options
24
- elsif !key.include? 'duplicate'
25
- text = process_group text, values, options
26
- end
27
- end
28
-
29
- text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
30
-
31
- text.upcase
32
- end
33
-
34
- def self.remove_duplicity text, options
35
- options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
36
-
37
- text.split(//).inject("") do |s, n|
38
- last_s_char = s[s.length-1..s.length-1]
39
- s + ((last_s_char === n &&
40
- !options[:duplicate_exceptions].include?(n)) ? '' : n )
41
- end
42
- end
43
-
44
- def self.process_group text, group, options
45
- group.each do |key, values|
46
- if values
47
- text = simple_replace text, key, values, options
48
- else
49
- text = simple_replace text, '', key, options
50
- end
51
- end
52
- return text
53
- end
54
-
55
- def self.process_follow_ups text, group, options = {}
56
- group.each do |key, prefixes|
57
- prefixes.each do |prefix, sufixes|
58
- regexp = mount_follow_up_regexp prefix, sufixes
59
- text = text.gsub regexp, key
60
- end
61
- end
62
- return text
63
- end
64
-
65
- def self.process_second_followed text, group, options = {}
66
- group.each do |key, prefixes|
67
- prefixes.each do |prefix, sufixes|
68
- regexp = mount_second_followed_by_regexp prefix, sufixes
69
- text =~ regexp
70
- replacing = ($1 ? $1 : '') + key
71
- text = text.gsub regexp, replacing
72
- end
73
- end
74
- return text
75
- end
76
-
77
- def self.process_vowels_pronunciation_insignificance text, group, options = {}
78
- group.each do |key, value|
79
- regexp = mount_vowels_pronunciation_insignificance_regexp key
80
- text =~ regexp
81
- text = text.gsub regexp, ($1 || '')
82
- end
83
- return text
84
- end
85
-
86
- def self.process_followed_by_consonant_regexp text, group
87
- group.each do |key, value|
88
- regexp = mount_followed_by_consonant_regexp value
89
- text = text.gsub regexp, ''
90
- end
91
- return text
92
- end
93
-
94
- def self.simple_replace text, key, values, options
95
- regexp = mount_regexp values, options
96
- text.gsub regexp, key.to_s
97
- end
98
-
99
- def self.mount_regexp sentence, options = { :terminations => false, :initiations => false }
100
- regexp = "/"
101
- regexp += "^" if options[:initiations]
102
- regexp += "("
103
- regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
104
- regexp += ")"
105
- regexp += "\\b" if options[:terminations]
106
- regexp += "/"
107
- eval(regexp)
108
- end
109
-
110
- def self.mount_follow_up_regexp prefix, sufix, options = {}
111
- regexp = options[:not_eval] ? "" : "/"
112
- regexp += prefix
113
- regexp += "(?="
114
- regexp += "("
115
- regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
116
- regexp += "))"
117
- regexp += "/" unless options[:not_eval]
118
- options[:not_eval] ? regexp : eval(regexp)
119
- end
120
-
121
- def self.mount_second_followed_by_regexp char, group
122
- regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
123
- eval regexp
124
- end
125
-
126
- def self.mount_vowels_pronunciation_insignificance_regexp char
127
- eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
128
- end
129
-
130
- def self.mount_followed_by_consonant_regexp char
131
- eval "[#{char}](?![aeiou])"
132
- end
133
-
134
- def self.not_first char
135
- "([^#{char}]|^)"
136
- end
137
- end