fonemas 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NGY2NWJiYTI0ZGM3NjM3MmIzN2FjMzhiZTNmYjM1YjRhMzUxMzNlYw==
5
+ data.tar.gz: !binary |-
6
+ OTAxZWJmYTkzMzVhYWE4OTcxNzMxMTE1ODdhOTc0NWNlNGJhMjkzOA==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ N2Y3MmQ0YjI4ZTlkNjBhMjNmOTVjYjkxNmZlNDMwOWEyMzRhNmEyZmNhNThi
10
+ MGEyM2Y1N2MwYTU1NDE5MmE2ZDRjMjc5OTZkYWM0Mzg5NjhkMGM1NDNkZTE5
11
+ YjE5N2E3YjQ2ZjMzNmY5ZmZiYjhlOTc3Y2M4NWU2MjEwZjc1NzU=
12
+ data.tar.gz: !binary |-
13
+ NDE5NjU0ZjU4NmIyNTQ0ZWFiZTYwYjE0ZDM3YmRhMmU4MDM4Yjk4ZGRjZjM0
14
+ M2Q0YzIzM2ZkZjc2OTQ0ZDE0MjMzYjMyOTdlZWFmZTZiZWNlN2IzZjI1ZDU5
15
+ ZjIwZWIzMTk3NzQxM2M2ZTFhMWI2NjY5NTdiYWE3NGNlYzk4NTg=
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fonemas.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Manuel Bahamondez Honores
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Fonemas
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'fonemas'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install fonemas
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/fonemas.gemspec ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'fonemas/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "fonemas"
8
+ spec.version = Fonemas::VERSION
9
+ spec.authors = ["Manuel Bahamondez Honores"]
10
+ spec.email = ["manuel@bahamondez.com"]
11
+ spec.description = %q{Creación de fonemas para ser utilizadas en el reconocimiento de voz con cmu sphinx}
12
+ spec.summary = %q{Lista todas las pronunciaciones posibles para una palabra en Chileno}
13
+ spec.homepage = "http://www.b9.cl"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
data/lib/fonemas.rb ADDED
@@ -0,0 +1,300 @@
1
+ # coding: utf-8
2
+ require "fonemas/version"
3
+
4
+ module Fonemas
5
+ def self.clean(text)
6
+ s = text.gsub(/,/,' ')
7
+ s = s.gsub(/\s+/,' ')
8
+ s = s.chomp.strip
9
+ s = s.downcase
10
+ return s
11
+ end
12
+
13
+ def self.isTonica(word,i)
14
+ #falta considerar las palabras que poseen acento pero no tilde
15
+ tildes = %w(á é í ó ú)
16
+ if tildes.include? word[i]
17
+ return true
18
+ else
19
+ return false
20
+ end
21
+ end
22
+
23
+ def self.isFinal(word,i)
24
+ return word.size == i-1
25
+ end
26
+
27
+ def self.isFricativa(word,i)
28
+ fricativas = %w(f s c z j ll y g b w b v w s m b x d)
29
+ return fricativas.include? word[i]
30
+ end
31
+
32
+ def self.entreVocales(word,i)
33
+ if i == 0 || word.size - 1 == i
34
+ return false
35
+ else
36
+ return (isVocal(word,i-1) and isVocal(word,i+1))
37
+ end
38
+ end
39
+
40
+ def self.entreVocalyConsonante(word,i)
41
+ return ((isVocal(word,i-1) and !isVocal(word,i+1)) or (isVocal(word,i+1) and !isVocal(word,i-1)))
42
+
43
+ end
44
+
45
+ def self.isVocal(word,i)
46
+ vocales = %w(a e i o u á é í ó ú)
47
+ return vocales.include? word[i]
48
+ end
49
+
50
+ def self.isDiptongo(word,first,second)
51
+ f = word[first]
52
+ s = word[second]
53
+ abiertas = %w(a e o)
54
+ cerradas = %w(i u)
55
+ return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f))
56
+
57
+ end
58
+
59
+ def self.separar(word)
60
+ word = word.downcase
61
+ output = []
62
+ i = 0
63
+ while(i < word.length)
64
+ if word[i] == 'c' and word[i+1] == 'h'
65
+ output << "ch"
66
+ i+=1
67
+ elsif word[i] == 'l' and word[i+1] == 'l'
68
+ output << 'll'
69
+ i+=1
70
+ elsif word[i] == 'r' and word[i+1] == 'r'
71
+ output << 'rr'
72
+ i+=1
73
+ else
74
+ output << word[i]
75
+ end
76
+ i +=1
77
+ end
78
+ return output
79
+ end
80
+
81
+ def self.fonemas(word)
82
+ word = separar(word)
83
+ fonema = []
84
+ for i in 0..(word.length-1)
85
+ letra = word[i]
86
+ case letra
87
+ when 'á' then
88
+ fonema << 'aa'
89
+ when 'é' then
90
+ fonema << 'ee'
91
+ when 'í' then
92
+ fonema << 'ii'
93
+ when 'ó' then
94
+ fonema << 'oo'
95
+ when 'ú' then
96
+ fonema << 'uu'
97
+ when 'a' then
98
+ if isTonica(word,i)
99
+ fonema << 'aa'
100
+ else
101
+ fonema << 'a'
102
+ end
103
+ when 'b' then
104
+ if word[i+1] == 'u' and isDiptongo(word,i+1,i+2)
105
+ if entreVocales(word,i)
106
+ fonema << ['b','g','']
107
+ else
108
+ fonema << ['B','g']
109
+ end
110
+ elsif isFricativa(word,i-1)
111
+ fonema << 'b'
112
+ elsif isFinal(word,i)
113
+ fonema << 'b'
114
+ elsif entreVocales(word,i)
115
+ fonema << ['b','']
116
+ else
117
+ fonema << 'B'
118
+ end
119
+ when 'c' then
120
+ if word[i+1] == 'e' or word[i+1] == 'i'
121
+ fonema << 's'
122
+ else
123
+ fonema << 'k'
124
+ end
125
+ when 'ch' then
126
+ if entreVocales(word,i)
127
+ fonema << ['ch','sh','tch','j']
128
+ else
129
+ fonema << ['ch','sh','tch']
130
+ end
131
+ when 'd' then
132
+ if entreVocales(word,i) || i == word.size-1
133
+ fonema << ['d','']
134
+ elsif entreVocalyConsonante(word,i)
135
+ fonema << ['D','d']
136
+ else
137
+ fonema << 'd'
138
+ end
139
+ when 'e' then
140
+ if isTonica(word,i)
141
+ fonema << 'ee'
142
+ else
143
+ fonema << 'e'
144
+ end
145
+ when 'f' then
146
+ fonema << 'f'
147
+ when 'g' then
148
+ if word[i+1] == 'e' or word[i+1] == 'i'
149
+ fonema << 'j'
150
+ else
151
+ if !entreVocales(word,i)
152
+ fonema << 'G'
153
+ else
154
+ fonema << 'g'
155
+ end
156
+ end
157
+ when 'h' then
158
+ #nada
159
+ when 'i' then
160
+ if isTonica(word,i)
161
+ fonema << 'ii'
162
+ else
163
+ fonema << 'i'
164
+ end
165
+ when 'j' then
166
+ fonema << 'j'
167
+ when 'k' then
168
+ fonema << 'k'
169
+ when 'l' then
170
+ fonema << 'l'
171
+ when 'll' then
172
+ fonema << ['ll','lli','i']
173
+ when 'm' then
174
+ fonema << 'm'
175
+ when 'n' then
176
+ fonema << 'n'
177
+ when 'ñ' then
178
+ fonema << 'nh'
179
+ when 'o' then
180
+ if isTonica(word,i)
181
+ fonema << 'oo'
182
+ else
183
+ fonema << 'o'
184
+ end
185
+ when 'p' then
186
+ fonema << 'p'
187
+ when 'q' then
188
+ fonema << 'k'
189
+ when 'r' then
190
+ if i == 0
191
+ fonema << 'R'
192
+ else
193
+ fonema << 'r'
194
+ end
195
+ when 'rr' then
196
+ fonema << 'R'
197
+ when 's' then
198
+ if word[i-1] == 'r' or word[i-1] == 'd' or i == word.size-1
199
+ fonema << ['s','','h']
200
+ elsif entreVocalyConsonante(word,i)
201
+ fonema << ['s','h']
202
+ elsif word[i-1] == 'b' and word[i+1] == 't'
203
+ fonema << ['s','h']
204
+ elsif word[i-1] == 'b'
205
+ fonema << ['s','']
206
+ else
207
+ fonema << 's'
208
+ end
209
+ when 't' then
210
+ fonema << 't'
211
+ when 'ü' then
212
+ fonema << 'u'
213
+ when 'u' then
214
+ if word[i-1] == 'q' or word[i-1] == 'g'
215
+ #nada
216
+ else
217
+ if word[i-1] == 'h' || i == 0
218
+ fonema << 'gu'
219
+ elsif isTonica(word,i)
220
+ fonema << 'uu'
221
+ else
222
+ fonema << 'u'
223
+ end
224
+ end
225
+ when 'v' then
226
+ fonema << 'b'
227
+ when 'w' then
228
+ if i == 0
229
+ fonema << ['b','B']
230
+ elsif word[i-1] == 'o'
231
+ fonema << 'u'
232
+ elsif word[i+1] == 'i'
233
+ fonema << 'u'
234
+ elsif entreVocales(word,i)
235
+ fonema << 'gu'
236
+ else
237
+ fonema << 'Gu'
238
+ end
239
+ when 'x' then
240
+ fonema << ['ks','k','h']
241
+ when 'y' then
242
+ if i == word.size - 1
243
+ fonema << 'i'
244
+ else
245
+ fonema << ['ll','lli','i']
246
+ end
247
+ when 'z' then
248
+ if i == word.size - 1
249
+ fonema << ['s','h','']
250
+ else
251
+ fonema << 's'
252
+ end
253
+
254
+ else
255
+ fonema << letra
256
+ end
257
+
258
+
259
+
260
+ end
261
+
262
+ return generateFonemas(fonema)
263
+ end
264
+
265
+ def self.generateFonemas(fonema)
266
+ salidas = 1
267
+ for i in fonema
268
+ if i.class.name == 'Array'
269
+ salidas *= i.size
270
+ end
271
+ end
272
+ #puts "salidas: #{salidas}"
273
+
274
+ outputs = []
275
+ for j in 1..salidas
276
+ outputs[j-1] = []
277
+ end
278
+ for i in 0..(fonema.size-1)
279
+ c = fonema[i]
280
+ if c.class.name == 'Array'
281
+ for j in 1..salidas
282
+ t = c[(j-1)%c.size]
283
+ outputs[j-1] += [t] unless t.size == 0
284
+ end
285
+ else
286
+ for j in 1..salidas
287
+ outputs[j-1] += [c]
288
+ end
289
+ end
290
+
291
+ end
292
+
293
+ salida = []
294
+ for i in outputs
295
+ salida << i.join(" ")
296
+ end
297
+
298
+ return salida
299
+ end
300
+ end
@@ -0,0 +1,3 @@
1
+ module Fonemas
2
+ VERSION = "0.0.2"
3
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fonemas
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Bahamondez Honores
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-06-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: Creación de fonemas para ser utilizadas en el reconocimiento de voz con
42
+ cmu sphinx
43
+ email:
44
+ - manuel@bahamondez.com
45
+ executables: []
46
+ extensions: []
47
+ extra_rdoc_files: []
48
+ files:
49
+ - .gitignore
50
+ - .idea/encodings.xml
51
+ - .idea/fonemas.iml
52
+ - .idea/misc.xml
53
+ - .idea/modules.xml
54
+ - .idea/scopes/scope_settings.xml
55
+ - .idea/vcs.xml
56
+ - Gemfile
57
+ - LICENSE.txt
58
+ - README.md
59
+ - Rakefile
60
+ - fonemas.gemspec
61
+ - lib/fonemas.rb
62
+ - lib/fonemas/version.rb
63
+ homepage: http://www.b9.cl
64
+ licenses:
65
+ - MIT
66
+ metadata: {}
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ! '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - ! '>='
79
+ - !ruby/object:Gem::Version
80
+ version: '0'
81
+ requirements: []
82
+ rubyforge_project:
83
+ rubygems_version: 2.0.3
84
+ signing_key:
85
+ specification_version: 4
86
+ summary: Lista todas las pronunciaciones posibles para una palabra en Chileno
87
+ test_files: []