fonemas 0.4.16 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/bin/audioupload +2 -2
- data/fonemas.gemspec +0 -1
- data/lib/fonemas.rb +165 -11
- data/lib/fonemas/version.rb +1 -1
- data/spec/fonemas/fonema_spec.rb +29 -2
- data/spec/spec_helper.rb +6 -1
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZjlhNGZlOWUyZjhlMDg4ZmM5OGZjZTRiYzY0NGRiYjVlMDk1NDVjOQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YTRhMWRmZGQyNTFlYzllNWI0MzNiNDFiYmIyMzNmZmI0NjQ5NzA1ZA==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MGU1NmZmMzc0NDJhNGNlNjBjMzVmYzViOTljYmI2MWFiYjgyMzBlOWYxYThi
|
10
|
+
NWQwODk1YTViMmFjMThlMDU1YWEzYzQ4ZmUxOThkOGI5NGEwMmJkMzNiMmNh
|
11
|
+
ZWU1Y2VjNWZkYmE2MWU4OWQ3NWRlOWI3MDkwMWQyOTQ2OGRmMDI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZDU1YWI4YzZkNDkwYWNjMjE3MWVmNGFiNjk0MzFkMWE5MmY2MjM5ZWE4ZTBh
|
14
|
+
Zjc4ODcwMWVhN2NhMzA1NzBhNWQ2ODc0NzNiM2IyN2M3Y2I3YjMyMjM5MGM0
|
15
|
+
MTMzZDg1ZGY1MWM5OTk0NDc4N2M5OTgzNDg2OWJjYTdhZTJjMWM=
|
data/bin/audioupload
CHANGED
@@ -51,7 +51,7 @@ end
|
|
51
51
|
|
52
52
|
|
53
53
|
|
54
|
-
if
|
54
|
+
if f[3].nil?
|
55
55
|
puts "uploading..."
|
56
56
|
upload_audio(filename,ARGV[0],public,nil,ARGV[4])
|
57
57
|
else
|
@@ -60,7 +60,7 @@ else
|
|
60
60
|
lr = `ffmpeg -i "#{filename}" 2>&1 | grep 'Duration'| cut -d ' ' -f 4 | cut -d ',' -f 1`
|
61
61
|
lr = lr.split(':')
|
62
62
|
length = lr[0].to_i*3600 + lr[1].to_i*60 + lr[2].to_f
|
63
|
-
puts "largo original archivo: #{
|
63
|
+
puts "largo original archivo: #{length}"
|
64
64
|
start_time = 0
|
65
65
|
counter = 0
|
66
66
|
max_parts = (length/limit).ceil
|
data/fonemas.gemspec
CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.add_development_dependency "bundler", "~> 1.3"
|
22
22
|
spec.add_development_dependency "rake"
|
23
23
|
spec.add_development_dependency "rspec"
|
24
|
-
spec.add_runtime_dependency "text-hyphen", '~> 1.4.1'
|
25
24
|
spec.add_runtime_dependency "unicode_utils", '~> 1.4.0'
|
26
25
|
spec.add_runtime_dependency "rest-client", "~> 1.6.7"
|
27
26
|
spec.add_runtime_dependency 'mime-types'
|
data/lib/fonemas.rb
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
require "fonemas/version"
|
3
3
|
|
4
4
|
module Fonemas
|
5
|
-
require 'text/hyphen'
|
6
5
|
require 'unicode_utils'
|
7
6
|
|
8
7
|
def self.version
|
@@ -33,18 +32,134 @@ module Fonemas
|
|
33
32
|
return false
|
34
33
|
end
|
35
34
|
|
35
|
+
def self.silabar(palabra)
|
36
|
+
#puts "silabar: #{palabra}"
|
37
|
+
#algoritmo adaptado desde python
|
38
|
+
#codigo original extraido desde:
|
39
|
+
#https://github.com/xergio/silabas/blob/master/home/silabea.py
|
40
|
+
silabas = []
|
41
|
+
letra = 0
|
42
|
+
salto = 0
|
43
|
+
while silabas.join('').length < palabra.length
|
44
|
+
#puts "silabas antes: #{silabas}"
|
45
|
+
#puts "letra: #{letra}"
|
46
|
+
#puts "palabra length: #{palabra.length}"
|
47
|
+
silaba = ''
|
48
|
+
salto = 0
|
49
|
+
if isConsonante(palabra[letra])
|
50
|
+
if isInseparables(palabra[letra..letra+1])
|
51
|
+
salto += 2
|
52
|
+
else
|
53
|
+
salto += 1
|
54
|
+
end
|
55
|
+
else
|
56
|
+
salto += 0
|
57
|
+
end
|
58
|
+
|
59
|
+
#puts "salto: #{salto}"
|
60
|
+
if isDiptongoConH(palabra,letra+salto,letra+salto+2)
|
61
|
+
#puts "diptongo con h"
|
62
|
+
salto += 3
|
63
|
+
elsif isDiptongo(palabra,letra+salto,letra+salto+1)
|
64
|
+
salto += 2
|
65
|
+
elsif isTriptongo(palabra,letra+salto,letra+salto+2)
|
66
|
+
salto += 3
|
67
|
+
elsif isDieresis(palabra,letra+salto,letra+salto+1)
|
68
|
+
salto += 2
|
69
|
+
else
|
70
|
+
salto += 1
|
71
|
+
end
|
72
|
+
#puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
|
73
|
+
|
74
|
+
salto += coda(palabra[letra+salto,palabra.length])
|
75
|
+
|
76
|
+
#puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
|
77
|
+
|
78
|
+
|
79
|
+
silaba = palabra[letra,salto]
|
80
|
+
letra += salto
|
81
|
+
silabas << silaba
|
82
|
+
|
83
|
+
#puts "Dletra: #{letra}"
|
84
|
+
#puts "Dsalto: #{salto}"
|
85
|
+
|
86
|
+
end
|
87
|
+
return silabas.join("-")
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.isInseparables(trozo)
|
91
|
+
#puts "isInspearable? #{trozo}"
|
92
|
+
inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
|
93
|
+
return inseparables.include? trozo
|
94
|
+
end
|
95
|
+
|
96
|
+
def self.coda(trozo)
|
97
|
+
#puts "coda: #{trozo}"
|
98
|
+
l = trozo.length
|
99
|
+
if l == 0
|
100
|
+
return 0
|
101
|
+
elsif l == 1 and isConsonante(trozo)
|
102
|
+
return 1
|
103
|
+
elsif l > 1 and isInseparables(trozo[0,2])
|
104
|
+
return 0
|
105
|
+
elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
|
106
|
+
return 0
|
107
|
+
elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
|
108
|
+
return 1
|
109
|
+
elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
|
110
|
+
return 1
|
111
|
+
elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
|
112
|
+
return 2
|
113
|
+
elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
|
114
|
+
return 2
|
115
|
+
else
|
116
|
+
return 0
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def self.calcularPosicionSilabas(silabada)
|
122
|
+
#puts "calcular posicion #{silabada}."
|
123
|
+
output = []
|
124
|
+
text = silabada
|
125
|
+
while(!text.index("-").nil?)
|
126
|
+
i = text.index("-")
|
127
|
+
text = text.slice(0,i) + text.slice(i+1,text.length)
|
128
|
+
output << i
|
129
|
+
end
|
130
|
+
return output
|
131
|
+
end
|
132
|
+
|
133
|
+
|
36
134
|
def self.isTonica(word,i)
|
135
|
+
test = _isTonica(word,i)
|
136
|
+
if test
|
137
|
+
if _isTonica(word,i+1)
|
138
|
+
return false
|
139
|
+
else
|
140
|
+
return test
|
141
|
+
end
|
142
|
+
else
|
143
|
+
return false
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
def self._isTonica(word,i)
|
149
|
+
return false if isConsonante(word,i)
|
37
150
|
#falta considerar las palabras que poseen acento pero no tilde
|
38
|
-
return true if word.size == 1
|
39
151
|
tildes = %w(á é í ó ú ã ä ë)
|
40
152
|
w = word.join
|
41
|
-
|
153
|
+
#puts "isTonica? #{w}: #{i}"
|
154
|
+
return true if w.size == 1
|
155
|
+
|
156
|
+
|
157
|
+
if tildes.include? w[i]
|
42
158
|
return true
|
43
159
|
else
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
hh = es.visualize(w).split("-")
|
160
|
+
g = silabar(w)
|
161
|
+
hh = g.split("-")
|
162
|
+
p = calcularPosicionSilabas(g)
|
48
163
|
|
49
164
|
if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
|
50
165
|
#caso johan
|
@@ -90,7 +205,8 @@ module Fonemas
|
|
90
205
|
end
|
91
206
|
elsif hh.size >= 3
|
92
207
|
#puts hh.join("-")
|
93
|
-
|
208
|
+
#puts "hhsize3 i: #{i}, p:#{p}"
|
209
|
+
if i >= p[p.size-1]
|
94
210
|
if w =~ /[nsaeiou]$/
|
95
211
|
return false
|
96
212
|
else
|
@@ -132,20 +248,58 @@ module Fonemas
|
|
132
248
|
|
133
249
|
end
|
134
250
|
|
135
|
-
def self.isVocal(word,i)
|
251
|
+
def self.isVocal(word,i=0)
|
136
252
|
vocales = %w(a e i o u á é í ó ú)
|
137
253
|
return vocales.include? word[i]
|
138
254
|
end
|
139
255
|
|
256
|
+
def self.isConsonante(word,i=0)
|
257
|
+
return !isVocal(word,i)
|
258
|
+
end
|
259
|
+
|
260
|
+
def self.isTriptongo(palabra,first,third)
|
261
|
+
t = palabra[first,third]
|
262
|
+
return false if t.length < 3
|
263
|
+
triptongos = %w(iai iei uai uei uau iau uay uey)
|
264
|
+
return triptongos.include? t
|
265
|
+
end
|
266
|
+
|
267
|
+
def self.isDieresis(palabra,first,second)
|
268
|
+
t = palabra[first,second]
|
269
|
+
return false if t.length < 2
|
270
|
+
dieresis = %w(ue ui)
|
271
|
+
return dieresis.include? t
|
272
|
+
|
273
|
+
end
|
274
|
+
|
140
275
|
def self.isDiptongo(word,first,second)
|
276
|
+
trozo = word[first..second]
|
277
|
+
return false if trozo.length != 2
|
278
|
+
#puts "diptongo word #{word}, first: #{first}, second: #{second}"
|
279
|
+
#puts "test diptongo #{word[first] + word[second]}"
|
141
280
|
f = word[first]
|
142
281
|
s = word[second]
|
143
|
-
abiertas = %w(a e o)
|
144
|
-
cerradas = %w(i u)
|
282
|
+
abiertas = %w(a e o á é ó)
|
283
|
+
cerradas = %w(i u í ú)
|
145
284
|
return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))
|
146
285
|
|
147
286
|
end
|
148
287
|
|
288
|
+
def self.isDiptongoConH(word,first,third)
|
289
|
+
test = word[first..third]
|
290
|
+
#puts "test diptongo con h: #{test}"
|
291
|
+
if test[1] == 'h'
|
292
|
+
if test[2,2] == 'ue'
|
293
|
+
return false
|
294
|
+
else
|
295
|
+
test = test.gsub(/h/,'')
|
296
|
+
end
|
297
|
+
else
|
298
|
+
return false
|
299
|
+
end
|
300
|
+
return isDiptongo(test,0,1)
|
301
|
+
end
|
302
|
+
|
149
303
|
def self.separar(word)
|
150
304
|
word = downcase(word)
|
151
305
|
output = []
|
data/lib/fonemas/version.rb
CHANGED
data/spec/fonemas/fonema_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require 'spec_helper'
|
3
3
|
describe Fonemas do
|
4
|
-
it 'test acentos'
|
4
|
+
it 'test acentos' do
|
5
5
|
Fonemas.fonemas('hasta').should include("aa s t a")
|
6
6
|
Fonemas.fonemas('torta').should include("t oo r t a")
|
7
7
|
Fonemas.fonemas('ungüento').should include("u n g u ee n t o")
|
@@ -29,7 +29,6 @@ describe Fonemas do
|
|
29
29
|
Fonemas.fonemas('guatón').should include('g u a t oo n')
|
30
30
|
Fonemas.fonemas('gu').should include('gg u')
|
31
31
|
Fonemas.fonemas('guagua').should include('gu aa g u a')
|
32
|
-
Fonemas.fonemas('joão').should include('ll o aa o')
|
33
32
|
Fonemas.fonemas('johan').should include('ll oo j a n')
|
34
33
|
Fonemas.fonemas('adquirir').should include('a d k i r ii r')
|
35
34
|
for i in Fonemas.fonemas('adskribir')
|
@@ -108,8 +107,36 @@ describe Fonemas do
|
|
108
107
|
output.should include('a b e')
|
109
108
|
output.should include('a c d')
|
110
109
|
output.should include('a c e')
|
110
|
+
end
|
111
|
+
|
112
|
+
it 'debe saber separar silabas' do
|
113
|
+
Fonemas.silabar('áfrica').should eql('á-fri-ca')
|
114
|
+
Fonemas.silabar('abstraer').should eql('abs-tra-er')
|
115
|
+
Fonemas.silabar('ahuyentar').should eql('ahu-yen-tar')
|
116
|
+
Fonemas.silabar('acaban').should eql('a-ca-ban')
|
117
|
+
Fonemas.silabar('pino').should eql('pi-no')
|
118
|
+
Fonemas.silabar('camión').should eql('ca-mión')
|
119
|
+
Fonemas.silabar('holanda').should eql('ho-lan-da')
|
120
|
+
Fonemas.silabar('abuela').should eql('a-bue-la')
|
121
|
+
|
122
|
+
end
|
111
123
|
|
124
|
+
it 'marcar inicios de cada silaba' do
|
125
|
+
Fonemas.calcularPosicionSilabas('ho-lan-da').should eql([2,5])
|
126
|
+
end
|
127
|
+
|
128
|
+
it 'identificar sílaba tónica' do
|
129
|
+
word = Fonemas.separar('acaban')
|
130
|
+
Fonemas.isTonica(word,0).should be_false
|
131
|
+
Fonemas.isTonica(word,2).should be_true
|
132
|
+
Fonemas.isTonica(word,4).should be_false
|
133
|
+
|
134
|
+
end
|
112
135
|
|
136
|
+
it 'sólo debe existir una sílaba acentuada' do
|
137
|
+
fonemas = Fonemas.fonemas('acaban')
|
138
|
+
fonemas.should_not include('aa k aa b a n')
|
139
|
+
fonemas.should include('a k aa b a n')
|
113
140
|
end
|
114
141
|
|
115
142
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fonemas
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Manuel Bahamondez Honores
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-09-
|
11
|
+
date: 2013-09-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - ! '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: text-hyphen
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ~>
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 1.4.1
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ~>
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 1.4.1
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: unicode_utils
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|