fonemas 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/fonemas.gemspec +23 -0
- data/lib/fonemas.rb +300 -0
- data/lib/fonemas/version.rb +3 -0
- metadata +87 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NGY2NWJiYTI0ZGM3NjM3MmIzN2FjMzhiZTNmYjM1YjRhMzUxMzNlYw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
OTAxZWJmYTkzMzVhYWE4OTcxNzMxMTE1ODdhOTc0NWNlNGJhMjkzOA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
N2Y3MmQ0YjI4ZTlkNjBhMjNmOTVjYjkxNmZlNDMwOWEyMzRhNmEyZmNhNThi
|
10
|
+
MGEyM2Y1N2MwYTU1NDE5MmE2ZDRjMjc5OTZkYWM0Mzg5NjhkMGM1NDNkZTE5
|
11
|
+
YjE5N2E3YjQ2ZjMzNmY5ZmZiYjhlOTc3Y2M4NWU2MjEwZjc1NzU=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
NDE5NjU0ZjU4NmIyNTQ0ZWFiZTYwYjE0ZDM3YmRhMmU4MDM4Yjk4ZGRjZjM0
|
14
|
+
M2Q0YzIzM2ZkZjc2OTQ0ZDE0MjMzYjMyOTdlZWFmZTZiZWNlN2IzZjI1ZDU5
|
15
|
+
ZjIwZWIzMTk3NzQxM2M2ZTFhMWI2NjY5NTdiYWE3NGNlYzk4NTg=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Manuel Bahamondez Honores
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Fonemas
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'fonemas'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install fonemas
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/fonemas.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'fonemas/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "fonemas"
|
8
|
+
spec.version = Fonemas::VERSION
|
9
|
+
spec.authors = ["Manuel Bahamondez Honores"]
|
10
|
+
spec.email = ["manuel@bahamondez.com"]
|
11
|
+
spec.description = %q{Creación de fonemas para ser utilizadas en el reconocimiento de voz con cmu sphinx}
|
12
|
+
spec.summary = %q{Lista todas las pronunciaciones posibles para una palabra en Chileno}
|
13
|
+
spec.homepage = "http://www.b9.cl"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
end
|
data/lib/fonemas.rb
ADDED
@@ -0,0 +1,300 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "fonemas/version"
|
3
|
+
|
4
|
+
module Fonemas
|
5
|
+
def self.clean(text)
|
6
|
+
s = text.gsub(/,/,' ')
|
7
|
+
s = s.gsub(/\s+/,' ')
|
8
|
+
s = s.chomp.strip
|
9
|
+
s = s.downcase
|
10
|
+
return s
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.isTonica(word,i)
|
14
|
+
#falta considerar las palabras que poseen acento pero no tilde
|
15
|
+
tildes = %w(á é í ó ú)
|
16
|
+
if tildes.include? word[i]
|
17
|
+
return true
|
18
|
+
else
|
19
|
+
return false
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.isFinal(word,i)
|
24
|
+
return word.size == i-1
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.isFricativa(word,i)
|
28
|
+
fricativas = %w(f s c z j ll y g b w b v w s m b x d)
|
29
|
+
return fricativas.include? word[i]
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.entreVocales(word,i)
|
33
|
+
if i == 0 || word.size - 1 == i
|
34
|
+
return false
|
35
|
+
else
|
36
|
+
return (isVocal(word,i-1) and isVocal(word,i+1))
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.entreVocalyConsonante(word,i)
|
41
|
+
return ((isVocal(word,i-1) and !isVocal(word,i+1)) or (isVocal(word,i+1) and !isVocal(word,i-1)))
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.isVocal(word,i)
|
46
|
+
vocales = %w(a e i o u á é í ó ú)
|
47
|
+
return vocales.include? word[i]
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.isDiptongo(word,first,second)
|
51
|
+
f = word[first]
|
52
|
+
s = word[second]
|
53
|
+
abiertas = %w(a e o)
|
54
|
+
cerradas = %w(i u)
|
55
|
+
return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f))
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.separar(word)
|
60
|
+
word = word.downcase
|
61
|
+
output = []
|
62
|
+
i = 0
|
63
|
+
while(i < word.length)
|
64
|
+
if word[i] == 'c' and word[i+1] == 'h'
|
65
|
+
output << "ch"
|
66
|
+
i+=1
|
67
|
+
elsif word[i] == 'l' and word[i+1] == 'l'
|
68
|
+
output << 'll'
|
69
|
+
i+=1
|
70
|
+
elsif word[i] == 'r' and word[i+1] == 'r'
|
71
|
+
output << 'rr'
|
72
|
+
i+=1
|
73
|
+
else
|
74
|
+
output << word[i]
|
75
|
+
end
|
76
|
+
i +=1
|
77
|
+
end
|
78
|
+
return output
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.fonemas(word)
|
82
|
+
word = separar(word)
|
83
|
+
fonema = []
|
84
|
+
for i in 0..(word.length-1)
|
85
|
+
letra = word[i]
|
86
|
+
case letra
|
87
|
+
when 'á' then
|
88
|
+
fonema << 'aa'
|
89
|
+
when 'é' then
|
90
|
+
fonema << 'ee'
|
91
|
+
when 'í' then
|
92
|
+
fonema << 'ii'
|
93
|
+
when 'ó' then
|
94
|
+
fonema << 'oo'
|
95
|
+
when 'ú' then
|
96
|
+
fonema << 'uu'
|
97
|
+
when 'a' then
|
98
|
+
if isTonica(word,i)
|
99
|
+
fonema << 'aa'
|
100
|
+
else
|
101
|
+
fonema << 'a'
|
102
|
+
end
|
103
|
+
when 'b' then
|
104
|
+
if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
|
105
|
+
if entreVocales(word,i)
|
106
|
+
fonema << ['b','g','']
|
107
|
+
else
|
108
|
+
fonema << ['B','g']
|
109
|
+
end
|
110
|
+
elsif isFricativa(word,i-1)
|
111
|
+
fonema << 'b'
|
112
|
+
elsif isFinal(word,i)
|
113
|
+
fonema << 'b'
|
114
|
+
elsif entreVocales(word,i)
|
115
|
+
fonema << ['b','']
|
116
|
+
else
|
117
|
+
fonema << 'B'
|
118
|
+
end
|
119
|
+
when 'c' then
|
120
|
+
if word[i+1] == 'e' or word[i+1] == 'i'
|
121
|
+
fonema << 's'
|
122
|
+
else
|
123
|
+
fonema << 'k'
|
124
|
+
end
|
125
|
+
when 'ch' then
|
126
|
+
if entreVocales(word,i)
|
127
|
+
fonema << ['ch','sh','tch','j']
|
128
|
+
else
|
129
|
+
fonema << ['ch','sh','tch']
|
130
|
+
end
|
131
|
+
when 'd' then
|
132
|
+
if entreVocales(word,i) || i == word.size-1
|
133
|
+
fonema << ['d','']
|
134
|
+
elsif entreVocalyConsonante(word,i)
|
135
|
+
fonema << ['D','d']
|
136
|
+
else
|
137
|
+
fonema << 'd'
|
138
|
+
end
|
139
|
+
when 'e' then
|
140
|
+
if isTonica(word,i)
|
141
|
+
fonema << 'ee'
|
142
|
+
else
|
143
|
+
fonema << 'e'
|
144
|
+
end
|
145
|
+
when 'f' then
|
146
|
+
fonema << 'f'
|
147
|
+
when 'g' then
|
148
|
+
if word[i+1] == 'e' or word[i+1] == 'i'
|
149
|
+
fonema << 'j'
|
150
|
+
else
|
151
|
+
if !entreVocales(word,i)
|
152
|
+
fonema << 'G'
|
153
|
+
else
|
154
|
+
fonema << 'g'
|
155
|
+
end
|
156
|
+
end
|
157
|
+
when 'h' then
|
158
|
+
#nada
|
159
|
+
when 'i' then
|
160
|
+
if isTonica(word,i)
|
161
|
+
fonema << 'ii'
|
162
|
+
else
|
163
|
+
fonema << 'i'
|
164
|
+
end
|
165
|
+
when 'j' then
|
166
|
+
fonema << 'j'
|
167
|
+
when 'k' then
|
168
|
+
fonema << 'k'
|
169
|
+
when 'l' then
|
170
|
+
fonema << 'l'
|
171
|
+
when 'll' then
|
172
|
+
fonema << ['ll','lli','i']
|
173
|
+
when 'm' then
|
174
|
+
fonema << 'm'
|
175
|
+
when 'n' then
|
176
|
+
fonema << 'n'
|
177
|
+
when 'ñ' then
|
178
|
+
fonema << 'nh'
|
179
|
+
when 'o' then
|
180
|
+
if isTonica(word,i)
|
181
|
+
fonema << 'oo'
|
182
|
+
else
|
183
|
+
fonema << 'o'
|
184
|
+
end
|
185
|
+
when 'p' then
|
186
|
+
fonema << 'p'
|
187
|
+
when 'q' then
|
188
|
+
fonema << 'k'
|
189
|
+
when 'r' then
|
190
|
+
if i == 0
|
191
|
+
fonema << 'R'
|
192
|
+
else
|
193
|
+
fonema << 'r'
|
194
|
+
end
|
195
|
+
when 'rr' then
|
196
|
+
fonema << 'R'
|
197
|
+
when 's' then
|
198
|
+
if word[i-1] == 'r' or word[i-1] == 'd' or i == word.size-1
|
199
|
+
fonema << ['s','','h']
|
200
|
+
elsif entreVocalyConsonante(word,i)
|
201
|
+
fonema << ['s','h']
|
202
|
+
elsif word[i-1] == 'b' and word[i+1] == 't'
|
203
|
+
fonema << ['s','h']
|
204
|
+
elsif word[i-1] == 'b'
|
205
|
+
fonema << ['s','']
|
206
|
+
else
|
207
|
+
fonema << 's'
|
208
|
+
end
|
209
|
+
when 't' then
|
210
|
+
fonema << 't'
|
211
|
+
when 'ü' then
|
212
|
+
fonema << 'u'
|
213
|
+
when 'u' then
|
214
|
+
if word[i-1] == 'q' or word[i-1] == 'g'
|
215
|
+
#nada
|
216
|
+
else
|
217
|
+
if word[i-1] == 'h' || i == 0
|
218
|
+
fonema << 'gu'
|
219
|
+
elsif isTonica(word,i)
|
220
|
+
fonema << 'uu'
|
221
|
+
else
|
222
|
+
fonema << 'u'
|
223
|
+
end
|
224
|
+
end
|
225
|
+
when 'v' then
|
226
|
+
fonema << 'b'
|
227
|
+
when 'w' then
|
228
|
+
if i == 0
|
229
|
+
fonema << ['b','B']
|
230
|
+
elsif word[i-1] == 'o'
|
231
|
+
fonema << 'u'
|
232
|
+
elsif word[i+1] == 'i'
|
233
|
+
fonema << 'u'
|
234
|
+
elsif entreVocales(word,i)
|
235
|
+
fonema << 'gu'
|
236
|
+
else
|
237
|
+
fonema << 'Gu'
|
238
|
+
end
|
239
|
+
when 'x' then
|
240
|
+
fonema << ['ks','k','h']
|
241
|
+
when 'y' then
|
242
|
+
if i == word.size - 1
|
243
|
+
fonema << 'i'
|
244
|
+
else
|
245
|
+
fonema << ['ll','lli','i']
|
246
|
+
end
|
247
|
+
when 'z' then
|
248
|
+
if i == word.size - 1
|
249
|
+
fonema << ['s','h','']
|
250
|
+
else
|
251
|
+
fonema << 's'
|
252
|
+
end
|
253
|
+
|
254
|
+
else
|
255
|
+
fonema << letra
|
256
|
+
end
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
end
|
261
|
+
|
262
|
+
return generateFonemas(fonema)
|
263
|
+
end
|
264
|
+
|
265
|
+
def self.generateFonemas(fonema)
|
266
|
+
salidas = 1
|
267
|
+
for i in fonema
|
268
|
+
if i.class.name == 'Array'
|
269
|
+
salidas *= i.size
|
270
|
+
end
|
271
|
+
end
|
272
|
+
#puts "salidas: #{salidas}"
|
273
|
+
|
274
|
+
outputs = []
|
275
|
+
for j in 1..salidas
|
276
|
+
outputs[j-1] = []
|
277
|
+
end
|
278
|
+
for i in 0..(fonema.size-1)
|
279
|
+
c = fonema[i]
|
280
|
+
if c.class.name == 'Array'
|
281
|
+
for j in 1..salidas
|
282
|
+
t = c[(j-1)%c.size]
|
283
|
+
outputs[j-1] += [t] unless t.size == 0
|
284
|
+
end
|
285
|
+
else
|
286
|
+
for j in 1..salidas
|
287
|
+
outputs[j-1] += [c]
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
salida = []
|
294
|
+
for i in outputs
|
295
|
+
salida << i.join(" ")
|
296
|
+
end
|
297
|
+
|
298
|
+
return salida
|
299
|
+
end
|
300
|
+
end
|
metadata
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fonemas
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Manuel Bahamondez Honores
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-06-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
description: Creación de fonemas para ser utilizadas en el reconocimiento de voz con
|
42
|
+
cmu sphinx
|
43
|
+
email:
|
44
|
+
- manuel@bahamondez.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files: []
|
48
|
+
files:
|
49
|
+
- .gitignore
|
50
|
+
- .idea/encodings.xml
|
51
|
+
- .idea/fonemas.iml
|
52
|
+
- .idea/misc.xml
|
53
|
+
- .idea/modules.xml
|
54
|
+
- .idea/scopes/scope_settings.xml
|
55
|
+
- .idea/vcs.xml
|
56
|
+
- Gemfile
|
57
|
+
- LICENSE.txt
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- fonemas.gemspec
|
61
|
+
- lib/fonemas.rb
|
62
|
+
- lib/fonemas/version.rb
|
63
|
+
homepage: http://www.b9.cl
|
64
|
+
licenses:
|
65
|
+
- MIT
|
66
|
+
metadata: {}
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
require_paths:
|
70
|
+
- lib
|
71
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
requirements:
|
78
|
+
- - ! '>='
|
79
|
+
- !ruby/object:Gem::Version
|
80
|
+
version: '0'
|
81
|
+
requirements: []
|
82
|
+
rubyforge_project:
|
83
|
+
rubygems_version: 2.0.3
|
84
|
+
signing_key:
|
85
|
+
specification_version: 4
|
86
|
+
summary: Lista todas las pronunciaciones posibles para una palabra en Chileno
|
87
|
+
test_files: []
|