fonemas 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NGY2NWJiYTI0ZGM3NjM3MmIzN2FjMzhiZTNmYjM1YjRhMzUxMzNlYw==
5
+ data.tar.gz: !binary |-
6
+ OTAxZWJmYTkzMzVhYWE4OTcxNzMxMTE1ODdhOTc0NWNlNGJhMjkzOA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ N2Y3MmQ0YjI4ZTlkNjBhMjNmOTVjYjkxNmZlNDMwOWEyMzRhNmEyZmNhNThi
10
+ MGEyM2Y1N2MwYTU1NDE5MmE2ZDRjMjc5OTZkYWM0Mzg5NjhkMGM1NDNkZTE5
11
+ YjE5N2E3YjQ2ZjMzNmY5ZmZiYjhlOTc3Y2M4NWU2MjEwZjc1NzU=
12
+ data.tar.gz: !binary |-
13
+ NDE5NjU0ZjU4NmIyNTQ0ZWFiZTYwYjE0ZDM3YmRhMmU4MDM4Yjk4ZGRjZjM0
14
+ M2Q0YzIzM2ZkZjc2OTQ0ZDE0MjMzYjMyOTdlZWFmZTZiZWNlN2IzZjI1ZDU5
15
+ ZjIwZWIzMTk3NzQxM2M2ZTFhMWI2NjY5NTdiYWE3NGNlYzk4NTg=
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fonemas.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Manuel Bahamondez Honores
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Fonemas
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fonemas'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fonemas
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/fonemas.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fonemas/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fonemas"
8
+ spec.version = Fonemas::VERSION
9
+ spec.authors = ["Manuel Bahamondez Honores"]
10
+ spec.email = ["manuel@bahamondez.com"]
11
+ spec.description = %q{Creación de fonemas para ser utilizadas en el reconocimiento de voz con cmu sphinx}
12
+ spec.summary = %q{Lista todas las pronunciaciones posibles para una palabra en Chileno}
13
+ spec.homepage = "http://www.b9.cl"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
data/lib/fonemas.rb ADDED
@@ -0,0 +1,300 @@
1
+ # coding: utf-8
2
+ require "fonemas/version"
3
+
4
+ module Fonemas
5
+ def self.clean(text)
6
+ s = text.gsub(/,/,' ')
7
+ s = s.gsub(/\s+/,' ')
8
+ s = s.chomp.strip
9
+ s = s.downcase
10
+ return s
11
+ end
12
+
13
+ def self.isTonica(word,i)
14
+ #falta considerar las palabras que poseen acento pero no tilde
15
+ tildes = %w(á é í ó ú)
16
+ if tildes.include? word[i]
17
+ return true
18
+ else
19
+ return false
20
+ end
21
+ end
22
+
23
+ def self.isFinal(word,i)
24
+ return word.size == i-1
25
+ end
26
+
27
+ def self.isFricativa(word,i)
28
+ fricativas = %w(f s c z j ll y g b w b v w s m b x d)
29
+ return fricativas.include? word[i]
30
+ end
31
+
32
+ def self.entreVocales(word,i)
33
+ if i == 0 || word.size - 1 == i
34
+ return false
35
+ else
36
+ return (isVocal(word,i-1) and isVocal(word,i+1))
37
+ end
38
+ end
39
+
40
+ def self.entreVocalyConsonante(word,i)
41
+ return ((isVocal(word,i-1) and !isVocal(word,i+1)) or (isVocal(word,i+1) and !isVocal(word,i-1)))
42
+
43
+ end
44
+
45
+ def self.isVocal(word,i)
46
+ vocales = %w(a e i o u á é í ó ú)
47
+ return vocales.include? word[i]
48
+ end
49
+
50
+ def self.isDiptongo(word,first,second)
51
+ f = word[first]
52
+ s = word[second]
53
+ abiertas = %w(a e o)
54
+ cerradas = %w(i u)
55
+ return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f))
56
+
57
+ end
58
+
59
+ def self.separar(word)
60
+ word = word.downcase
61
+ output = []
62
+ i = 0
63
+ while(i < word.length)
64
+ if word[i] == 'c' and word[i+1] == 'h'
65
+ output << "ch"
66
+ i+=1
67
+ elsif word[i] == 'l' and word[i+1] == 'l'
68
+ output << 'll'
69
+ i+=1
70
+ elsif word[i] == 'r' and word[i+1] == 'r'
71
+ output << 'rr'
72
+ i+=1
73
+ else
74
+ output << word[i]
75
+ end
76
+ i +=1
77
+ end
78
+ return output
79
+ end
80
+
81
+ def self.fonemas(word)
82
+ word = separar(word)
83
+ fonema = []
84
+ for i in 0..(word.length-1)
85
+ letra = word[i]
86
+ case letra
87
+ when 'á' then
88
+ fonema << 'aa'
89
+ when 'é' then
90
+ fonema << 'ee'
91
+ when 'í' then
92
+ fonema << 'ii'
93
+ when 'ó' then
94
+ fonema << 'oo'
95
+ when 'ú' then
96
+ fonema << 'uu'
97
+ when 'a' then
98
+ if isTonica(word,i)
99
+ fonema << 'aa'
100
+ else
101
+ fonema << 'a'
102
+ end
103
+ when 'b' then
104
+ if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
105
+ if entreVocales(word,i)
106
+ fonema << ['b','g','']
107
+ else
108
+ fonema << ['B','g']
109
+ end
110
+ elsif isFricativa(word,i-1)
111
+ fonema << 'b'
112
+ elsif isFinal(word,i)
113
+ fonema << 'b'
114
+ elsif entreVocales(word,i)
115
+ fonema << ['b','']
116
+ else
117
+ fonema << 'B'
118
+ end
119
+ when 'c' then
120
+ if word[i+1] == 'e' or word[i+1] == 'i'
121
+ fonema << 's'
122
+ else
123
+ fonema << 'k'
124
+ end
125
+ when 'ch' then
126
+ if entreVocales(word,i)
127
+ fonema << ['ch','sh','tch','j']
128
+ else
129
+ fonema << ['ch','sh','tch']
130
+ end
131
+ when 'd' then
132
+ if entreVocales(word,i) || i == word.size-1
133
+ fonema << ['d','']
134
+ elsif entreVocalyConsonante(word,i)
135
+ fonema << ['D','d']
136
+ else
137
+ fonema << 'd'
138
+ end
139
+ when 'e' then
140
+ if isTonica(word,i)
141
+ fonema << 'ee'
142
+ else
143
+ fonema << 'e'
144
+ end
145
+ when 'f' then
146
+ fonema << 'f'
147
+ when 'g' then
148
+ if word[i+1] == 'e' or word[i+1] == 'i'
149
+ fonema << 'j'
150
+ else
151
+ if !entreVocales(word,i)
152
+ fonema << 'G'
153
+ else
154
+ fonema << 'g'
155
+ end
156
+ end
157
+ when 'h' then
158
+ #nada
159
+ when 'i' then
160
+ if isTonica(word,i)
161
+ fonema << 'ii'
162
+ else
163
+ fonema << 'i'
164
+ end
165
+ when 'j' then
166
+ fonema << 'j'
167
+ when 'k' then
168
+ fonema << 'k'
169
+ when 'l' then
170
+ fonema << 'l'
171
+ when 'll' then
172
+ fonema << ['ll','lli','i']
173
+ when 'm' then
174
+ fonema << 'm'
175
+ when 'n' then
176
+ fonema << 'n'
177
+ when 'ñ' then
178
+ fonema << 'nh'
179
+ when 'o' then
180
+ if isTonica(word,i)
181
+ fonema << 'oo'
182
+ else
183
+ fonema << 'o'
184
+ end
185
+ when 'p' then
186
+ fonema << 'p'
187
+ when 'q' then
188
+ fonema << 'k'
189
+ when 'r' then
190
+ if i == 0
191
+ fonema << 'R'
192
+ else
193
+ fonema << 'r'
194
+ end
195
+ when 'rr' then
196
+ fonema << 'R'
197
+ when 's' then
198
+ if word[i-1] == 'r' or word[i-1] == 'd' or i == word.size-1
199
+ fonema << ['s','','h']
200
+ elsif entreVocalyConsonante(word,i)
201
+ fonema << ['s','h']
202
+ elsif word[i-1] == 'b' and word[i+1] == 't'
203
+ fonema << ['s','h']
204
+ elsif word[i-1] == 'b'
205
+ fonema << ['s','']
206
+ else
207
+ fonema << 's'
208
+ end
209
+ when 't' then
210
+ fonema << 't'
211
+ when 'ü' then
212
+ fonema << 'u'
213
+ when 'u' then
214
+ if word[i-1] == 'q' or word[i-1] == 'g'
215
+ #nada
216
+ else
217
+ if word[i-1] == 'h' || i == 0
218
+ fonema << 'gu'
219
+ elsif isTonica(word,i)
220
+ fonema << 'uu'
221
+ else
222
+ fonema << 'u'
223
+ end
224
+ end
225
+ when 'v' then
226
+ fonema << 'b'
227
+ when 'w' then
228
+ if i == 0
229
+ fonema << ['b','B']
230
+ elsif word[i-1] == 'o'
231
+ fonema << 'u'
232
+ elsif word[i+1] == 'i'
233
+ fonema << 'u'
234
+ elsif entreVocales(word,i)
235
+ fonema << 'gu'
236
+ else
237
+ fonema << 'Gu'
238
+ end
239
+ when 'x' then
240
+ fonema << ['ks','k','h']
241
+ when 'y' then
242
+ if i == word.size - 1
243
+ fonema << 'i'
244
+ else
245
+ fonema << ['ll','lli','i']
246
+ end
247
+ when 'z' then
248
+ if i == word.size - 1
249
+ fonema << ['s','h','']
250
+ else
251
+ fonema << 's'
252
+ end
253
+
254
+ else
255
+ fonema << letra
256
+ end
257
+
258
+
259
+
260
+ end
261
+
262
+ return generateFonemas(fonema)
263
+ end
264
+
265
+ def self.generateFonemas(fonema)
266
+ salidas = 1
267
+ for i in fonema
268
+ if i.class.name == 'Array'
269
+ salidas *= i.size
270
+ end
271
+ end
272
+ #puts "salidas: #{salidas}"
273
+
274
+ outputs = []
275
+ for j in 1..salidas
276
+ outputs[j-1] = []
277
+ end
278
+ for i in 0..(fonema.size-1)
279
+ c = fonema[i]
280
+ if c.class.name == 'Array'
281
+ for j in 1..salidas
282
+ t = c[(j-1)%c.size]
283
+ outputs[j-1] += [t] unless t.size == 0
284
+ end
285
+ else
286
+ for j in 1..salidas
287
+ outputs[j-1] += [c]
288
+ end
289
+ end
290
+
291
+ end
292
+
293
+ salida = []
294
+ for i in outputs
295
+ salida << i.join(" ")
296
+ end
297
+
298
+ return salida
299
+ end
300
+ end
@@ -0,0 +1,3 @@
1
+ module Fonemas
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fonemas
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Bahamondez Honores
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Creación de fonemas para ser utilizadas en el reconocimiento de voz con
42
+ cmu sphinx
43
+ email:
44
+ - manuel@bahamondez.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .idea/encodings.xml
51
+ - .idea/fonemas.iml
52
+ - .idea/misc.xml
53
+ - .idea/modules.xml
54
+ - .idea/scopes/scope_settings.xml
55
+ - .idea/vcs.xml
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.md
59
+ - Rakefile
60
+ - fonemas.gemspec
61
+ - lib/fonemas.rb
62
+ - lib/fonemas/version.rb
63
+ homepage: http://www.b9.cl
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.0.3
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Lista todas las pronunciaciones posibles para una palabra en Chileno
87
+ test_files: []