soundcord 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,15 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'soundcord/algorithm'
4
3
  require 'soundcord/config'
5
4
 
6
- class SoundCord
5
+ require 'soundcord/integrations/string'
6
+ require 'soundcord/integrations/array'
7
+
8
+ require 'soundcord/word'
9
+
10
+ module SoundCord
7
11
  def self.phonetize text
8
- process_text text
12
+ Word.new(text).to_s
9
13
  end
10
14
 
11
15
  def self.compare term_1, term_2
@@ -1,6 +1,6 @@
1
1
  require 'yaml'
2
2
 
3
- class SoundCord
3
+ module SoundCord
4
4
  DEFAULT_LANGUAGE = 'pt-BR'
5
5
  LANGUAGES_DIRECTORY = "#{Dir.pwd}/lib/soundcord/languages/"
6
6
 
@@ -8,7 +8,7 @@ class SoundCord
8
8
  attr_reader :language, :options
9
9
  end
10
10
 
11
- def self.load_language lang = DEFAULT_LANGUAGE
11
+ def self.load_language(lang = DEFAULT_LANGUAGE)
12
12
  @language = lang
13
13
  @lang_yml = YAML::load_file(LANGUAGES_DIRECTORY + "#{lang}.yml")[language]
14
14
  @options = { :use_vowels => false }
@@ -0,0 +1,40 @@
1
+ module Regexable
2
+ def mount_regexp sentence, options = { :terminations => false, :initiations => false }
3
+ regexp = "/"
4
+ regexp += "^" if options[:initiations]
5
+ regexp += "("
6
+ regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
7
+ regexp += ")"
8
+ regexp += "\\b" if options[:terminations]
9
+ regexp += "/"
10
+ eval(regexp)
11
+ end
12
+
13
+ def mount_follow_up_regexp prefix, sufix, options = {}
14
+ regexp = options[:not_eval] ? "" : "/"
15
+ regexp += prefix
16
+ regexp += "(?="
17
+ regexp += "("
18
+ regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
19
+ regexp += "))"
20
+ regexp += "/" unless options[:not_eval]
21
+ options[:not_eval] ? regexp : eval(regexp)
22
+ end
23
+
24
+ def mount_second_followed_by_regexp char, group
25
+ regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
26
+ eval regexp
27
+ end
28
+
29
+ def mount_vowels_pronunciation_insignificance_regexp char
30
+ eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
31
+ end
32
+
33
+ def mount_followed_by_consonant_regexp char
34
+ eval "[#{char}](?![aeiou])"
35
+ end
36
+
37
+ def not_first char
38
+ "([^#{char}]|^)"
39
+ end
40
+ end
@@ -0,0 +1,96 @@
1
+ require_relative 'regexable'
2
+
3
+ class SoundCord::Word < Struct.new(:original, :homophone)
4
+ include Regexable
5
+
6
+ def to_s
7
+ self.homophone or self.process_text
8
+ end
9
+
10
+ protected
11
+ def process_text
12
+ SoundCord.load_language unless SoundCord.language
13
+
14
+ self.homophone = original.downcase
15
+
16
+ SoundCord.lang_yml.each do |key, values|
17
+ if key == 'terminations'
18
+ process_group! values, :terminations => true
19
+ elsif key == 'initiations'
20
+ process_group! values, :initiations => true
21
+ elsif key == 'follow_ups'
22
+ process_follow_ups! values, SoundCord.options
23
+ elsif key == 'second_followed'
24
+ process_second_followed! values, SoundCord.options
25
+ elsif key == 'vowels_pronunciation_insignificance'
26
+ process_vowels_pronunciation_insignificance! values, SoundCord.options
27
+ elsif !key.include? 'duplicate'
28
+ process_group! values, SoundCord.options
29
+ end
30
+ end
31
+
32
+ remove_duplicity! :duplicate_exceptions => (SoundCord.lang_yml['duplicate_exceptions'])
33
+
34
+ self.homophone.upcase
35
+ end
36
+
37
+ def remove_duplicity!(options)
38
+ options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
39
+
40
+ self.homophone = self.homophone.split(//).inject("") do |s, n|
41
+ last_s_char = s[s.length-1..s.length-1]
42
+ s + ((last_s_char === n and
43
+ !options[:duplicate_exceptions].include?(n)) ? '' : n )
44
+ end
45
+ end
46
+
47
+ def process_group! group, options
48
+ group.each do |key, values|
49
+ if values
50
+ simple_replace! key, values, options
51
+ else
52
+ simple_replace! '', key, options
53
+ end
54
+ end
55
+ end
56
+
57
+ def process_follow_ups! group, options = {}
58
+ group.each do |key, prefixes|
59
+ prefixes.each do |prefix, sufixes|
60
+ regexp = mount_follow_up_regexp prefix, sufixes
61
+ self.homophone.gsub! regexp, key
62
+ end
63
+ end
64
+ end
65
+
66
+ def process_second_followed! group, options = {}
67
+ group.each do |key, prefixes|
68
+ prefixes.each do |prefix, sufixes|
69
+ regexp = mount_second_followed_by_regexp prefix, sufixes
70
+ self.homophone =~ regexp
71
+ replacing = ($1 ? $1 : '') + key
72
+ self.homophone.gsub! regexp, replacing
73
+ end
74
+ end
75
+ end
76
+
77
+ def process_vowels_pronunciation_insignificance! group, options = {}
78
+ group.each do |key, value|
79
+ regexp = mount_vowels_pronunciation_insignificance_regexp key
80
+ self.homophone =~ regexp
81
+ self.homophone.gsub! regexp, ($1 || '')
82
+ end
83
+ end
84
+
85
+ def process_followed_by_consonant_regexp! group
86
+ group.each do |key, value|
87
+ regexp = mount_followed_by_consonant_regexp value
88
+ self.homophone.gsub! regexp, ''
89
+ end
90
+ end
91
+
92
+ def simple_replace! key, values, options
93
+ regexp = mount_regexp values, options
94
+ self.homophone.gsub! regexp, key.to_s
95
+ end
96
+ end
@@ -1,9 +1,9 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = %q{soundcord}
3
3
  s.author = 'Lukas Alexandre'
4
- s.email = 'lukeskytm@gmail.com'
5
- s.homepage = 'http://lukasalexandre.github.com/soundcord'
6
- s.version = "0.2.1"
4
+ s.email = 'lukasalexandre@me.com'
5
+ s.homepage = 'http://lukelex.github.com/soundcord'
6
+ s.version = "0.3.0"
7
7
  s.date = Date.today
8
8
  s.summary = %q{A phonetic algorithm for indexing of words by their pronunciation.}
9
9
  s.description = %q{"Make comparisons of phonetically similar terms easier."}
metadata CHANGED
@@ -1,36 +1,27 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: soundcord
3
- version: !ruby/object:Gem::Version
4
- hash: 21
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 2
9
- - 1
10
- version: 0.2.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Lukas Alexandre
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-10-24 00:00:00 Z
12
+ date: 2013-02-05 00:00:00.000000000 Z
19
13
  dependencies: []
20
-
21
14
  description: "\"Make comparisons of phonetically similar terms easier.\""
22
- email: lukeskytm@gmail.com
15
+ email: lukasalexandre@me.com
23
16
  executables: []
24
-
25
17
  extensions: []
26
-
27
18
  extra_rdoc_files: []
28
-
29
- files:
30
- - lib/soundcord/algorithm.rb
19
+ files:
31
20
  - lib/soundcord/config.rb
32
21
  - lib/soundcord/integrations/array.rb
33
22
  - lib/soundcord/integrations/string.rb
23
+ - lib/soundcord/regexable.rb
24
+ - lib/soundcord/word.rb
34
25
  - lib/soundcord.rb
35
26
  - test/languages/en/test_soundcord.rb
36
27
  - test/languages/pt_br/test_soundcord.rb
@@ -40,38 +31,28 @@ files:
40
31
  - test/test_string.rb
41
32
  - Rakefile
42
33
  - soundcord.gemspec
43
- homepage: http://lukasalexandre.github.com/soundcord
34
+ homepage: http://lukelex.github.com/soundcord
44
35
  licenses: []
45
-
46
36
  post_install_message:
47
37
  rdoc_options: []
48
-
49
- require_paths:
38
+ require_paths:
50
39
  - lib
51
- required_ruby_version: !ruby/object:Gem::Requirement
40
+ required_ruby_version: !ruby/object:Gem::Requirement
52
41
  none: false
53
- requirements:
42
+ requirements:
54
43
  - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
60
- required_rubygems_version: !ruby/object:Gem::Requirement
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
47
  none: false
62
- requirements:
48
+ requirements:
63
49
  - - ">="
64
- - !ruby/object:Gem::Version
65
- hash: 3
66
- segments:
67
- - 0
68
- version: "0"
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
69
52
  requirements: []
70
-
71
53
  rubyforge_project:
72
- rubygems_version: 1.8.24
54
+ rubygems_version: 1.8.25
73
55
  signing_key:
74
56
  specification_version: 3
75
57
  summary: A phonetic algorithm for indexing of words by their pronunciation.
76
58
  test_files: []
77
-
@@ -1,137 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'soundcord/integrations/string'
4
- require 'soundcord/integrations/array'
5
-
6
- class SoundCord
7
- private
8
- def self.process_text text
9
- load_language unless language
10
-
11
- text = text.downcase
12
-
13
- lang_yml.each do |key, values|
14
- if key == 'terminations'
15
- text = process_group text, values, :terminations => true
16
- elsif key == 'initiations'
17
- text = process_group text, values, :initiations => true
18
- elsif key == 'follow_ups'
19
- text = process_follow_ups text, values, options
20
- elsif key == 'second_followed'
21
- text = process_second_followed text, values, options
22
- elsif key == 'vowels_pronunciation_insignificance'
23
- text = process_vowels_pronunciation_insignificance text, values, options
24
- elsif !key.include? 'duplicate'
25
- text = process_group text, values, options
26
- end
27
- end
28
-
29
- text = remove_duplicity text, :duplicate_exceptions => (lang_yml['duplicate_exceptions'])
30
-
31
- text.upcase
32
- end
33
-
34
- def self.remove_duplicity text, options
35
- options[:duplicate_exceptions] = [] unless options[:duplicate_exceptions]
36
-
37
- text.split(//).inject("") do |s, n|
38
- last_s_char = s[s.length-1..s.length-1]
39
- s + ((last_s_char === n &&
40
- !options[:duplicate_exceptions].include?(n)) ? '' : n )
41
- end
42
- end
43
-
44
- def self.process_group text, group, options
45
- group.each do |key, values|
46
- if values
47
- text = simple_replace text, key, values, options
48
- else
49
- text = simple_replace text, '', key, options
50
- end
51
- end
52
- return text
53
- end
54
-
55
- def self.process_follow_ups text, group, options = {}
56
- group.each do |key, prefixes|
57
- prefixes.each do |prefix, sufixes|
58
- regexp = mount_follow_up_regexp prefix, sufixes
59
- text = text.gsub regexp, key
60
- end
61
- end
62
- return text
63
- end
64
-
65
- def self.process_second_followed text, group, options = {}
66
- group.each do |key, prefixes|
67
- prefixes.each do |prefix, sufixes|
68
- regexp = mount_second_followed_by_regexp prefix, sufixes
69
- text =~ regexp
70
- replacing = ($1 ? $1 : '') + key
71
- text = text.gsub regexp, replacing
72
- end
73
- end
74
- return text
75
- end
76
-
77
- def self.process_vowels_pronunciation_insignificance text, group, options = {}
78
- group.each do |key, value|
79
- regexp = mount_vowels_pronunciation_insignificance_regexp key
80
- text =~ regexp
81
- text = text.gsub regexp, ($1 || '')
82
- end
83
- return text
84
- end
85
-
86
- def self.process_followed_by_consonant_regexp text, group
87
- group.each do |key, value|
88
- regexp = mount_followed_by_consonant_regexp value
89
- text = text.gsub regexp, ''
90
- end
91
- return text
92
- end
93
-
94
- def self.simple_replace text, key, values, options
95
- regexp = mount_regexp values, options
96
- text.gsub regexp, key.to_s
97
- end
98
-
99
- def self.mount_regexp sentence, options = { :terminations => false, :initiations => false }
100
- regexp = "/"
101
- regexp += "^" if options[:initiations]
102
- regexp += "("
103
- regexp += sentence.kind_of?(Array) ? sentence.join("|") : sentence
104
- regexp += ")"
105
- regexp += "\\b" if options[:terminations]
106
- regexp += "/"
107
- eval(regexp)
108
- end
109
-
110
- def self.mount_follow_up_regexp prefix, sufix, options = {}
111
- regexp = options[:not_eval] ? "" : "/"
112
- regexp += prefix
113
- regexp += "(?="
114
- regexp += "("
115
- regexp += sufix.kind_of?(Array) ? sufix.join("|") : sufix
116
- regexp += "))"
117
- regexp += "/" unless options[:not_eval]
118
- options[:not_eval] ? regexp : eval(regexp)
119
- end
120
-
121
- def self.mount_second_followed_by_regexp char, group
122
- regexp = "/" + not_first(char) + mount_follow_up_regexp(char, group, :not_eval => true) + "/"
123
- eval regexp
124
- end
125
-
126
- def self.mount_vowels_pronunciation_insignificance_regexp char
127
- eval "/([aeiou])#{char}(?=\\b|[^aeiou])/"
128
- end
129
-
130
- def self.mount_followed_by_consonant_regexp char
131
- eval "[#{char}](?![aeiou])"
132
- end
133
-
134
- def self.not_first char
135
- "([^#{char}]|^)"
136
- end
137
- end