fonemas 0.4.16 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/audioupload +2 -2
- data/fonemas.gemspec +0 -1
- data/lib/fonemas.rb +165 -11
- data/lib/fonemas/version.rb +1 -1
- data/spec/fonemas/fonema_spec.rb +29 -2
- data/spec/spec_helper.rb +6 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjlhNGZlOWUyZjhlMDg4ZmM5OGZjZTRiYzY0NGRiYjVlMDk1NDVjOQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YTRhMWRmZGQyNTFlYzllNWI0MzNiNDFiYmIyMzNmZmI0NjQ5NzA1ZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MGU1NmZmMzc0NDJhNGNlNjBjMzVmYzViOTljYmI2MWFiYjgyMzBlOWYxYThi
|
10
|
+
NWQwODk1YTViMmFjMThlMDU1YWEzYzQ4ZmUxOThkOGI5NGEwMmJkMzNiMmNh
|
11
|
+
ZWU1Y2VjNWZkYmE2MWU4OWQ3NWRlOWI3MDkwMWQyOTQ2OGRmMDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDU1YWI4YzZkNDkwYWNjMjE3MWVmNGFiNjk0MzFkMWE5MmY2MjM5ZWE4ZTBh
|
14
|
+
Zjc4ODcwMWVhN2NhMzA1NzBhNWQ2ODc0NzNiM2IyN2M3Y2I3YjMyMjM5MGM0
|
15
|
+
MTMzZDg1ZGY1MWM5OTk0NDc4N2M5OTgzNDg2OWJjYTdhZTJjMWM=
|
data/bin/audioupload
CHANGED
@@ -51,7 +51,7 @@ end
|
|
51
51
|
|
52
52
|
|
53
53
|
|
54
|
-
if
|
54
|
+
if f[3].nil?
|
55
55
|
puts "uploading..."
|
56
56
|
upload_audio(filename,ARGV[0],public,nil,ARGV[4])
|
57
57
|
else
|
@@ -60,7 +60,7 @@ else
|
|
60
60
|
lr = `ffmpeg -i "#{filename}" 2>&1 | grep 'Duration'| cut -d ' ' -f 4 | cut -d ',' -f 1`
|
61
61
|
lr = lr.split(':')
|
62
62
|
length = lr[0].to_i*3600 + lr[1].to_i*60 + lr[2].to_f
|
63
|
-
puts "largo original archivo: #{
|
63
|
+
puts "largo original archivo: #{length}"
|
64
64
|
start_time = 0
|
65
65
|
counter = 0
|
66
66
|
max_parts = (length/limit).ceil
|
data/fonemas.gemspec
CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_runtime_dependency "text-hyphen", '~> 1.4.1'
|
25
24
|
spec.add_runtime_dependency "unicode_utils", '~> 1.4.0'
|
26
25
|
spec.add_runtime_dependency "rest-client", "~> 1.6.7"
|
27
26
|
spec.add_runtime_dependency 'mime-types'
|
data/lib/fonemas.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
require "fonemas/version"
|
3
3
|
|
4
4
|
module Fonemas
|
5
|
-
require 'text/hyphen'
|
6
5
|
require 'unicode_utils'
|
7
6
|
|
8
7
|
def self.version
|
@@ -33,18 +32,134 @@ module Fonemas
|
|
33
32
|
return false
|
34
33
|
end
|
35
34
|
|
35
|
+
def self.silabar(palabra)
|
36
|
+
#puts "silabar: #{palabra}"
|
37
|
+
#algoritmo adaptado desde python
|
38
|
+
#codigo original extraido desde:
|
39
|
+
#https://github.com/xergio/silabas/blob/master/home/silabea.py
|
40
|
+
silabas = []
|
41
|
+
letra = 0
|
42
|
+
salto = 0
|
43
|
+
while silabas.join('').length < palabra.length
|
44
|
+
#puts "silabas antes: #{silabas}"
|
45
|
+
#puts "letra: #{letra}"
|
46
|
+
#puts "palabra length: #{palabra.length}"
|
47
|
+
silaba = ''
|
48
|
+
salto = 0
|
49
|
+
if isConsonante(palabra[letra])
|
50
|
+
if isInseparables(palabra[letra..letra+1])
|
51
|
+
salto += 2
|
52
|
+
else
|
53
|
+
salto += 1
|
54
|
+
end
|
55
|
+
else
|
56
|
+
salto += 0
|
57
|
+
end
|
58
|
+
|
59
|
+
#puts "salto: #{salto}"
|
60
|
+
if isDiptongoConH(palabra,letra+salto,letra+salto+2)
|
61
|
+
#puts "diptongo con h"
|
62
|
+
salto += 3
|
63
|
+
elsif isDiptongo(palabra,letra+salto,letra+salto+1)
|
64
|
+
salto += 2
|
65
|
+
elsif isTriptongo(palabra,letra+salto,letra+salto+2)
|
66
|
+
salto += 3
|
67
|
+
elsif isDieresis(palabra,letra+salto,letra+salto+1)
|
68
|
+
salto += 2
|
69
|
+
else
|
70
|
+
salto += 1
|
71
|
+
end
|
72
|
+
#puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
|
73
|
+
|
74
|
+
salto += coda(palabra[letra+salto,palabra.length])
|
75
|
+
|
76
|
+
#puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
|
77
|
+
|
78
|
+
|
79
|
+
silaba = palabra[letra,salto]
|
80
|
+
letra += salto
|
81
|
+
silabas << silaba
|
82
|
+
|
83
|
+
#puts "Dletra: #{letra}"
|
84
|
+
#puts "Dsalto: #{salto}"
|
85
|
+
|
86
|
+
end
|
87
|
+
return silabas.join("-")
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.isInseparables(trozo)
|
91
|
+
#puts "isInspearable? #{trozo}"
|
92
|
+
inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
|
93
|
+
return inseparables.include? trozo
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.coda(trozo)
|
97
|
+
#puts "coda: #{trozo}"
|
98
|
+
l = trozo.length
|
99
|
+
if l == 0
|
100
|
+
return 0
|
101
|
+
elsif l == 1 and isConsonante(trozo)
|
102
|
+
return 1
|
103
|
+
elsif l > 1 and isInseparables(trozo[0,2])
|
104
|
+
return 0
|
105
|
+
elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
|
106
|
+
return 0
|
107
|
+
elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
|
108
|
+
return 1
|
109
|
+
elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
|
110
|
+
return 1
|
111
|
+
elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
|
112
|
+
return 2
|
113
|
+
elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
|
114
|
+
return 2
|
115
|
+
else
|
116
|
+
return 0
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def self.calcularPosicionSilabas(silabada)
|
122
|
+
#puts "calcular posicion #{silabada}."
|
123
|
+
output = []
|
124
|
+
text = silabada
|
125
|
+
while(!text.index("-").nil?)
|
126
|
+
i = text.index("-")
|
127
|
+
text = text.slice(0,i) + text.slice(i+1,text.length)
|
128
|
+
output << i
|
129
|
+
end
|
130
|
+
return output
|
131
|
+
end
|
132
|
+
|
133
|
+
|
36
134
|
def self.isTonica(word,i)
|
135
|
+
test = _isTonica(word,i)
|
136
|
+
if test
|
137
|
+
if _isTonica(word,i+1)
|
138
|
+
return false
|
139
|
+
else
|
140
|
+
return test
|
141
|
+
end
|
142
|
+
else
|
143
|
+
return false
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def self._isTonica(word,i)
|
149
|
+
return false if isConsonante(word,i)
|
37
150
|
#falta considerar las palabras que poseen acento pero no tilde
|
38
|
-
return true if word.size == 1
|
39
151
|
tildes = %w(á é í ó ú ã ä ë)
|
40
152
|
w = word.join
|
41
|
-
|
153
|
+
#puts "isTonica? #{w}: #{i}"
|
154
|
+
return true if w.size == 1
|
155
|
+
|
156
|
+
|
157
|
+
if tildes.include? w[i]
|
42
158
|
return true
|
43
159
|
else
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
hh = es.visualize(w).split("-")
|
160
|
+
g = silabar(w)
|
161
|
+
hh = g.split("-")
|
162
|
+
p = calcularPosicionSilabas(g)
|
48
163
|
|
49
164
|
if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
|
50
165
|
#caso johan
|
@@ -90,7 +205,8 @@ module Fonemas
|
|
90
205
|
end
|
91
206
|
elsif hh.size >= 3
|
92
207
|
#puts hh.join("-")
|
93
|
-
|
208
|
+
#puts "hhsize3 i: #{i}, p:#{p}"
|
209
|
+
if i >= p[p.size-1]
|
94
210
|
if w =~ /[nsaeiou]$/
|
95
211
|
return false
|
96
212
|
else
|
@@ -132,20 +248,58 @@ module Fonemas
|
|
132
248
|
|
133
249
|
end
|
134
250
|
|
135
|
-
def self.isVocal(word,i)
|
251
|
+
def self.isVocal(word,i=0)
|
136
252
|
vocales = %w(a e i o u á é í ó ú)
|
137
253
|
return vocales.include? word[i]
|
138
254
|
end
|
139
255
|
|
256
|
+
def self.isConsonante(word,i=0)
|
257
|
+
return !isVocal(word,i)
|
258
|
+
end
|
259
|
+
|
260
|
+
def self.isTriptongo(palabra,first,third)
|
261
|
+
t = palabra[first,third]
|
262
|
+
return false if t.length < 3
|
263
|
+
triptongos = %w(iai iei uai uei uau iau uay uey)
|
264
|
+
return triptongos.include? t
|
265
|
+
end
|
266
|
+
|
267
|
+
def self.isDieresis(palabra,first,second)
|
268
|
+
t = palabra[first,second]
|
269
|
+
return false if t.length < 2
|
270
|
+
dieresis = %w(ue ui)
|
271
|
+
return dieresis.include? t
|
272
|
+
|
273
|
+
end
|
274
|
+
|
140
275
|
def self.isDiptongo(word,first,second)
|
276
|
+
trozo = word[first..second]
|
277
|
+
return false if trozo.length != 2
|
278
|
+
#puts "diptongo word #{word}, first: #{first}, second: #{second}"
|
279
|
+
#puts "test diptongo #{word[first] + word[second]}"
|
141
280
|
f = word[first]
|
142
281
|
s = word[second]
|
143
|
-
abiertas = %w(a e o)
|
144
|
-
cerradas = %w(i u)
|
282
|
+
abiertas = %w(a e o á é ó)
|
283
|
+
cerradas = %w(i u í ú)
|
145
284
|
return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))
|
146
285
|
|
147
286
|
end
|
148
287
|
|
288
|
+
def self.isDiptongoConH(word,first,third)
|
289
|
+
test = word[first..third]
|
290
|
+
#puts "test diptongo con h: #{test}"
|
291
|
+
if test[1] == 'h'
|
292
|
+
if test[2,2] == 'ue'
|
293
|
+
return false
|
294
|
+
else
|
295
|
+
test = test.gsub(/h/,'')
|
296
|
+
end
|
297
|
+
else
|
298
|
+
return false
|
299
|
+
end
|
300
|
+
return isDiptongo(test,0,1)
|
301
|
+
end
|
302
|
+
|
149
303
|
def self.separar(word)
|
150
304
|
word = downcase(word)
|
151
305
|
output = []
|
data/lib/fonemas/version.rb
CHANGED
data/spec/fonemas/fonema_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
describe Fonemas do
|
4
|
-
it 'test acentos'
|
4
|
+
it 'test acentos' do
|
5
5
|
Fonemas.fonemas('hasta').should include("aa s t a")
|
6
6
|
Fonemas.fonemas('torta').should include("t oo r t a")
|
7
7
|
Fonemas.fonemas('ungüento').should include("u n g u ee n t o")
|
@@ -29,7 +29,6 @@ describe Fonemas do
|
|
29
29
|
Fonemas.fonemas('guatón').should include('g u a t oo n')
|
30
30
|
Fonemas.fonemas('gu').should include('gg u')
|
31
31
|
Fonemas.fonemas('guagua').should include('gu aa g u a')
|
32
|
-
Fonemas.fonemas('joão').should include('ll o aa o')
|
33
32
|
Fonemas.fonemas('johan').should include('ll oo j a n')
|
34
33
|
Fonemas.fonemas('adquirir').should include('a d k i r ii r')
|
35
34
|
for i in Fonemas.fonemas('adskribir')
|
@@ -108,8 +107,36 @@ describe Fonemas do
|
|
108
107
|
output.should include('a b e')
|
109
108
|
output.should include('a c d')
|
110
109
|
output.should include('a c e')
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'debe saber separar silabas' do
|
113
|
+
Fonemas.silabar('áfrica').should eql('á-fri-ca')
|
114
|
+
Fonemas.silabar('abstraer').should eql('abs-tra-er')
|
115
|
+
Fonemas.silabar('ahuyentar').should eql('ahu-yen-tar')
|
116
|
+
Fonemas.silabar('acaban').should eql('a-ca-ban')
|
117
|
+
Fonemas.silabar('pino').should eql('pi-no')
|
118
|
+
Fonemas.silabar('camión').should eql('ca-mión')
|
119
|
+
Fonemas.silabar('holanda').should eql('ho-lan-da')
|
120
|
+
Fonemas.silabar('abuela').should eql('a-bue-la')
|
121
|
+
|
122
|
+
end
|
111
123
|
|
124
|
+
it 'marcar inicios de cada silaba' do
|
125
|
+
Fonemas.calcularPosicionSilabas('ho-lan-da').should eql([2,5])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'identificar sílaba tónica' do
|
129
|
+
word = Fonemas.separar('acaban')
|
130
|
+
Fonemas.isTonica(word,0).should be_false
|
131
|
+
Fonemas.isTonica(word,2).should be_true
|
132
|
+
Fonemas.isTonica(word,4).should be_false
|
133
|
+
|
134
|
+
end
|
112
135
|
|
136
|
+
it 'sólo debe existir una sílaba acentuada' do
|
137
|
+
fonemas = Fonemas.fonemas('acaban')
|
138
|
+
fonemas.should_not include('aa k aa b a n')
|
139
|
+
fonemas.should include('a k aa b a n')
|
113
140
|
end
|
114
141
|
|
115
142
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fonemas
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Bahamondez Honores
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-09-
|
11
|
+
date: 2013-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - ! '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: text-hyphen
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ~>
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 1.4.1
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ~>
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 1.4.1
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: unicode_utils
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|