fonemas 0.4.16 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2JiMjdhZGRkYjI5ZmRkNzM2ZWI1MmRjMTMwMjAwZTlmNmIzMzc3Yg==
4
+ ZjlhNGZlOWUyZjhlMDg4ZmM5OGZjZTRiYzY0NGRiYjVlMDk1NDVjOQ==
5
5
  data.tar.gz: !binary |-
6
- YWI3ZTMwOTFkYmMxYzIwNzdjNDEzNzI1YzliMjEwYzk1M2M4MDQxNA==
6
+ YTRhMWRmZGQyNTFlYzllNWI0MzNiNDFiYmIyMzNmZmI0NjQ5NzA1ZA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- OGU0Mzg4NGRkMzhkM2Y1ODJkOGVmNmY0MzQ2MTQyYmViY2M3OGQ0NWJlNmZl
10
- NjdjMGI1ZmIzODJmMGY1NzcxYTU4MTIxYzk0Nzg3ZWYwNWI5ZjQ1YmQyNGM4
11
- ZWI4NGE1ZDUxZDc3ZjQwNGRhMjBjYzcyMWUwZWExZGE3MzEwYWQ=
9
+ MGU1NmZmMzc0NDJhNGNlNjBjMzVmYzViOTljYmI2MWFiYjgyMzBlOWYxYThi
10
+ NWQwODk1YTViMmFjMThlMDU1YWEzYzQ4ZmUxOThkOGI5NGEwMmJkMzNiMmNh
11
+ ZWU1Y2VjNWZkYmE2MWU4OWQ3NWRlOWI3MDkwMWQyOTQ2OGRmMDI=
12
12
  data.tar.gz: !binary |-
13
- ZGRlYmVkNmMxYTQ3NWIzZjE2MDI1MThmYTkwMzg4YjNmOGFmZjUxMzNlYThi
14
- MGE5NzJlY2UyZWQ4NzQ5OWVmZTk2MDFlNzBjNzkxOTIxMTVjYjM4NjIyNDhm
15
- NWJkOTNlNDcxMGYwNDQyMWYzNmEwNTcxYzVhZWVmMjcyNTdjYzA=
13
+ ZDU1YWI4YzZkNDkwYWNjMjE3MWVmNGFiNjk0MzFkMWE5MmY2MjM5ZWE4ZTBh
14
+ Zjc4ODcwMWVhN2NhMzA1NzBhNWQ2ODc0NzNiM2IyN2M3Y2I3YjMyMjM5MGM0
15
+ MTMzZDg1ZGY1MWM5OTk0NDc4N2M5OTgzNDg2OWJjYTdhZTJjMWM=
@@ -51,7 +51,7 @@ end
51
51
 
52
52
 
53
53
 
54
- if ARGV[3].nil?
54
+ if f[3].nil?
55
55
  puts "uploading..."
56
56
  upload_audio(filename,ARGV[0],public,nil,ARGV[4])
57
57
  else
@@ -60,7 +60,7 @@ else
60
60
  lr = `ffmpeg -i "#{filename}" 2>&1 | grep 'Duration'| cut -d ' ' -f 4 | cut -d ',' -f 1`
61
61
  lr = lr.split(':')
62
62
  length = lr[0].to_i*3600 + lr[1].to_i*60 + lr[2].to_f
63
- puts "largo original archivo: #{lr}"
63
+ puts "largo original archivo: #{length}"
64
64
  start_time = 0
65
65
  counter = 0
66
66
  max_parts = (length/limit).ceil
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_development_dependency "rspec"
24
- spec.add_runtime_dependency "text-hyphen", '~> 1.4.1'
25
24
  spec.add_runtime_dependency "unicode_utils", '~> 1.4.0'
26
25
  spec.add_runtime_dependency "rest-client", "~> 1.6.7"
27
26
  spec.add_runtime_dependency 'mime-types'
@@ -2,7 +2,6 @@
2
2
  require "fonemas/version"
3
3
 
4
4
  module Fonemas
5
- require 'text/hyphen'
6
5
  require 'unicode_utils'
7
6
 
8
7
  def self.version
@@ -33,18 +32,134 @@ module Fonemas
33
32
  return false
34
33
  end
35
34
 
35
+ def self.silabar(palabra)
36
+ #puts "silabar: #{palabra}"
37
+ #algoritmo adaptado desde python
38
+ #codigo original extraido desde:
39
+ #https://github.com/xergio/silabas/blob/master/home/silabea.py
40
+ silabas = []
41
+ letra = 0
42
+ salto = 0
43
+ while silabas.join('').length < palabra.length
44
+ #puts "silabas antes: #{silabas}"
45
+ #puts "letra: #{letra}"
46
+ #puts "palabra length: #{palabra.length}"
47
+ silaba = ''
48
+ salto = 0
49
+ if isConsonante(palabra[letra])
50
+ if isInseparables(palabra[letra..letra+1])
51
+ salto += 2
52
+ else
53
+ salto += 1
54
+ end
55
+ else
56
+ salto += 0
57
+ end
58
+
59
+ #puts "salto: #{salto}"
60
+ if isDiptongoConH(palabra,letra+salto,letra+salto+2)
61
+ #puts "diptongo con h"
62
+ salto += 3
63
+ elsif isDiptongo(palabra,letra+salto,letra+salto+1)
64
+ salto += 2
65
+ elsif isTriptongo(palabra,letra+salto,letra+salto+2)
66
+ salto += 3
67
+ elsif isDieresis(palabra,letra+salto,letra+salto+1)
68
+ salto += 2
69
+ else
70
+ salto += 1
71
+ end
72
+ #puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
73
+
74
+ salto += coda(palabra[letra+salto,palabra.length])
75
+
76
+ #puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
77
+
78
+
79
+ silaba = palabra[letra,salto]
80
+ letra += salto
81
+ silabas << silaba
82
+
83
+ #puts "Dletra: #{letra}"
84
+ #puts "Dsalto: #{salto}"
85
+
86
+ end
87
+ return silabas.join("-")
88
+ end
89
+
90
+ def self.isInseparables(trozo)
91
+ #puts "isInspearable? #{trozo}"
92
+ inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
93
+ return inseparables.include? trozo
94
+ end
95
+
96
+ def self.coda(trozo)
97
+ #puts "coda: #{trozo}"
98
+ l = trozo.length
99
+ if l == 0
100
+ return 0
101
+ elsif l == 1 and isConsonante(trozo)
102
+ return 1
103
+ elsif l > 1 and isInseparables(trozo[0,2])
104
+ return 0
105
+ elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
106
+ return 0
107
+ elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
108
+ return 1
109
+ elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
110
+ return 1
111
+ elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
112
+ return 2
113
+ elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
114
+ return 2
115
+ else
116
+ return 0
117
+ end
118
+ end
119
+
120
+
121
+ def self.calcularPosicionSilabas(silabada)
122
+ #puts "calcular posicion #{silabada}."
123
+ output = []
124
+ text = silabada
125
+ while(!text.index("-").nil?)
126
+ i = text.index("-")
127
+ text = text.slice(0,i) + text.slice(i+1,text.length)
128
+ output << i
129
+ end
130
+ return output
131
+ end
132
+
133
+
36
134
  def self.isTonica(word,i)
135
+ test = _isTonica(word,i)
136
+ if test
137
+ if _isTonica(word,i+1)
138
+ return false
139
+ else
140
+ return test
141
+ end
142
+ else
143
+ return false
144
+ end
145
+ end
146
+
147
+
148
+ def self._isTonica(word,i)
149
+ return false if isConsonante(word,i)
37
150
  #falta considerar las palabras que poseen acento pero no tilde
38
- return true if word.size == 1
39
151
  tildes = %w(á é í ó ú ã ä ë)
40
152
  w = word.join
41
- if tildes.include? word[i]
153
+ #puts "isTonica? #{w}: #{i}"
154
+ return true if w.size == 1
155
+
156
+
157
+ if tildes.include? w[i]
42
158
  return true
43
159
  else
44
- es = Text::Hyphen.new(:language => "es", :left => 0, :right => 1)
45
- p = es.hyphenate(w)
46
- #puts es.visualize(w)
47
- hh = es.visualize(w).split("-")
160
+ g = silabar(w)
161
+ hh = g.split("-")
162
+ p = calcularPosicionSilabas(g)
48
163
 
49
164
  if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
50
165
  #caso johan
@@ -90,7 +205,8 @@ module Fonemas
90
205
  end
91
206
  elsif hh.size >= 3
92
207
  #puts hh.join("-")
93
- if i > p[p.size-1]
208
+ #puts "hhsize3 i: #{i}, p:#{p}"
209
+ if i >= p[p.size-1]
94
210
  if w =~ /[nsaeiou]$/
95
211
  return false
96
212
  else
@@ -132,20 +248,58 @@ module Fonemas
132
248
 
133
249
  end
134
250
 
135
- def self.isVocal(word,i)
251
+ def self.isVocal(word,i=0)
136
252
  vocales = %w(a e i o u á é í ó ú)
137
253
  return vocales.include? word[i]
138
254
  end
139
255
 
256
+ def self.isConsonante(word,i=0)
257
+ return !isVocal(word,i)
258
+ end
259
+
260
+ def self.isTriptongo(palabra,first,third)
261
+ t = palabra[first,third]
262
+ return false if t.length < 3
263
+ triptongos = %w(iai iei uai uei uau iau uay uey)
264
+ return triptongos.include? t
265
+ end
266
+
267
+ def self.isDieresis(palabra,first,second)
268
+ t = palabra[first,second]
269
+ return false if t.length < 2
270
+ dieresis = %w(ue ui)
271
+ return dieresis.include? t
272
+
273
+ end
274
+
140
275
  def self.isDiptongo(word,first,second)
276
+ trozo = word[first..second]
277
+ return false if trozo.length != 2
278
+ #puts "diptongo word #{word}, first: #{first}, second: #{second}"
279
+ #puts "test diptongo #{word[first] + word[second]}"
141
280
  f = word[first]
142
281
  s = word[second]
143
- abiertas = %w(a e o)
144
- cerradas = %w(i u)
282
+ abiertas = %w(a e o á é ó)
283
+ cerradas = %w(i u í ú)
145
284
  return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))
146
285
 
147
286
  end
148
287
 
288
+ def self.isDiptongoConH(word,first,third)
289
+ test = word[first..third]
290
+ #puts "test diptongo con h: #{test}"
291
+ if test[1] == 'h'
292
+ if test[2,2] == 'ue'
293
+ return false
294
+ else
295
+ test = test.gsub(/h/,'')
296
+ end
297
+ else
298
+ return false
299
+ end
300
+ return isDiptongo(test,0,1)
301
+ end
302
+
149
303
  def self.separar(word)
150
304
  word = downcase(word)
151
305
  output = []
@@ -1,3 +1,3 @@
1
1
  module Fonemas
2
- VERSION = '0.4.16'
2
+ VERSION = '0.5.0'
3
3
  end
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require 'spec_helper'
3
3
  describe Fonemas do
4
- it 'test acentos' do
4
+ it 'test acentos' do
5
5
  Fonemas.fonemas('hasta').should include("aa s t a")
6
6
  Fonemas.fonemas('torta').should include("t oo r t a")
7
7
  Fonemas.fonemas('ungüento').should include("u n g u ee n t o")
@@ -29,7 +29,6 @@ describe Fonemas do
29
29
  Fonemas.fonemas('guatón').should include('g u a t oo n')
30
30
  Fonemas.fonemas('gu').should include('gg u')
31
31
  Fonemas.fonemas('guagua').should include('gu aa g u a')
32
- Fonemas.fonemas('joão').should include('ll o aa o')
33
32
  Fonemas.fonemas('johan').should include('ll oo j a n')
34
33
  Fonemas.fonemas('adquirir').should include('a d k i r ii r')
35
34
  for i in Fonemas.fonemas('adskribir')
@@ -108,8 +107,36 @@ describe Fonemas do
108
107
  output.should include('a b e')
109
108
  output.should include('a c d')
110
109
  output.should include('a c e')
110
+ end
111
+
112
+ it 'debe saber separar silabas' do
113
+ Fonemas.silabar('áfrica').should eql('á-fri-ca')
114
+ Fonemas.silabar('abstraer').should eql('abs-tra-er')
115
+ Fonemas.silabar('ahuyentar').should eql('ahu-yen-tar')
116
+ Fonemas.silabar('acaban').should eql('a-ca-ban')
117
+ Fonemas.silabar('pino').should eql('pi-no')
118
+ Fonemas.silabar('camión').should eql('ca-mión')
119
+ Fonemas.silabar('holanda').should eql('ho-lan-da')
120
+ Fonemas.silabar('abuela').should eql('a-bue-la')
121
+
122
+ end
111
123
 
124
+ it 'marcar inicios de cada silaba' do
125
+ Fonemas.calcularPosicionSilabas('ho-lan-da').should eql([2,5])
126
+ end
127
+
128
+ it 'identificar sílaba tónica' do
129
+ word = Fonemas.separar('acaban')
130
+ Fonemas.isTonica(word,0).should be_false
131
+ Fonemas.isTonica(word,2).should be_true
132
+ Fonemas.isTonica(word,4).should be_false
133
+
134
+ end
112
135
 
136
+ it 'sólo debe existir una sílaba acentuada' do
137
+ fonemas = Fonemas.fonemas('acaban')
138
+ fonemas.should_not include('aa k aa b a n')
139
+ fonemas.should include('a k aa b a n')
113
140
  end
114
141
 
115
142
  end
@@ -1 +1,6 @@
1
- require 'fonemas'
1
+ require 'fonemas'
2
+
3
+ RSpec.configure do |config|
4
+ config.filter_run focus: true
5
+ config.run_all_when_everything_filtered = true
6
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fonemas
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.16
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Bahamondez Honores
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-02 00:00:00.000000000 Z
11
+ date: 2013-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - ! '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: text-hyphen
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ~>
60
- - !ruby/object:Gem::Version
61
- version: 1.4.1
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ~>
67
- - !ruby/object:Gem::Version
68
- version: 1.4.1
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: unicode_utils
71
57
  requirement: !ruby/object:Gem::Requirement