fonemas 0.4.16 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- M2JiMjdhZGRkYjI5ZmRkNzM2ZWI1MmRjMTMwMjAwZTlmNmIzMzc3Yg==
4
+ ZjlhNGZlOWUyZjhlMDg4ZmM5OGZjZTRiYzY0NGRiYjVlMDk1NDVjOQ==
5
5
  data.tar.gz: !binary |-
6
- YWI3ZTMwOTFkYmMxYzIwNzdjNDEzNzI1YzliMjEwYzk1M2M4MDQxNA==
6
+ YTRhMWRmZGQyNTFlYzllNWI0MzNiNDFiYmIyMzNmZmI0NjQ5NzA1ZA==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- OGU0Mzg4NGRkMzhkM2Y1ODJkOGVmNmY0MzQ2MTQyYmViY2M3OGQ0NWJlNmZl
10
- NjdjMGI1ZmIzODJmMGY1NzcxYTU4MTIxYzk0Nzg3ZWYwNWI5ZjQ1YmQyNGM4
11
- ZWI4NGE1ZDUxZDc3ZjQwNGRhMjBjYzcyMWUwZWExZGE3MzEwYWQ=
9
+ MGU1NmZmMzc0NDJhNGNlNjBjMzVmYzViOTljYmI2MWFiYjgyMzBlOWYxYThi
10
+ NWQwODk1YTViMmFjMThlMDU1YWEzYzQ4ZmUxOThkOGI5NGEwMmJkMzNiMmNh
11
+ ZWU1Y2VjNWZkYmE2MWU4OWQ3NWRlOWI3MDkwMWQyOTQ2OGRmMDI=
12
12
  data.tar.gz: !binary |-
13
- ZGRlYmVkNmMxYTQ3NWIzZjE2MDI1MThmYTkwMzg4YjNmOGFmZjUxMzNlYThi
14
- MGE5NzJlY2UyZWQ4NzQ5OWVmZTk2MDFlNzBjNzkxOTIxMTVjYjM4NjIyNDhm
15
- NWJkOTNlNDcxMGYwNDQyMWYzNmEwNTcxYzVhZWVmMjcyNTdjYzA=
13
+ ZDU1YWI4YzZkNDkwYWNjMjE3MWVmNGFiNjk0MzFkMWE5MmY2MjM5ZWE4ZTBh
14
+ Zjc4ODcwMWVhN2NhMzA1NzBhNWQ2ODc0NzNiM2IyN2M3Y2I3YjMyMjM5MGM0
15
+ MTMzZDg1ZGY1MWM5OTk0NDc4N2M5OTgzNDg2OWJjYTdhZTJjMWM=
@@ -51,7 +51,7 @@ end
51
51
 
52
52
 
53
53
 
54
- if ARGV[3].nil?
54
+ if f[3].nil?
55
55
  puts "uploading..."
56
56
  upload_audio(filename,ARGV[0],public,nil,ARGV[4])
57
57
  else
@@ -60,7 +60,7 @@ else
60
60
  lr = `ffmpeg -i "#{filename}" 2>&1 | grep 'Duration'| cut -d ' ' -f 4 | cut -d ',' -f 1`
61
61
  lr = lr.split(':')
62
62
  length = lr[0].to_i*3600 + lr[1].to_i*60 + lr[2].to_f
63
- puts "largo original archivo: #{lr}"
63
+ puts "largo original archivo: #{length}"
64
64
  start_time = 0
65
65
  counter = 0
66
66
  max_parts = (length/limit).ceil
@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
21
21
  spec.add_development_dependency "bundler", "~> 1.3"
22
22
  spec.add_development_dependency "rake"
23
23
  spec.add_development_dependency "rspec"
24
- spec.add_runtime_dependency "text-hyphen", '~> 1.4.1'
25
24
  spec.add_runtime_dependency "unicode_utils", '~> 1.4.0'
26
25
  spec.add_runtime_dependency "rest-client", "~> 1.6.7"
27
26
  spec.add_runtime_dependency 'mime-types'
@@ -2,7 +2,6 @@
2
2
  require "fonemas/version"
3
3
 
4
4
  module Fonemas
5
- require 'text/hyphen'
6
5
  require 'unicode_utils'
7
6
 
8
7
  def self.version
@@ -33,18 +32,134 @@ module Fonemas
33
32
  return false
34
33
  end
35
34
 
35
+ def self.silabar(palabra)
36
+ #puts "silabar: #{palabra}"
37
+ #algoritmo adaptado desde python
38
+ #codigo original extraido desde:
39
+ #https://github.com/xergio/silabas/blob/master/home/silabea.py
40
+ silabas = []
41
+ letra = 0
42
+ salto = 0
43
+ while silabas.join('').length < palabra.length
44
+ #puts "silabas antes: #{silabas}"
45
+ #puts "letra: #{letra}"
46
+ #puts "palabra length: #{palabra.length}"
47
+ silaba = ''
48
+ salto = 0
49
+ if isConsonante(palabra[letra])
50
+ if isInseparables(palabra[letra..letra+1])
51
+ salto += 2
52
+ else
53
+ salto += 1
54
+ end
55
+ else
56
+ salto += 0
57
+ end
58
+
59
+ #puts "salto: #{salto}"
60
+ if isDiptongoConH(palabra,letra+salto,letra+salto+2)
61
+ #puts "diptongo con h"
62
+ salto += 3
63
+ elsif isDiptongo(palabra,letra+salto,letra+salto+1)
64
+ salto += 2
65
+ elsif isTriptongo(palabra,letra+salto,letra+salto+2)
66
+ salto += 3
67
+ elsif isDieresis(palabra,letra+salto,letra+salto+1)
68
+ salto += 2
69
+ else
70
+ salto += 1
71
+ end
72
+ #puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
73
+
74
+ salto += coda(palabra[letra+salto,palabra.length])
75
+
76
+ #puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
77
+
78
+
79
+ silaba = palabra[letra,salto]
80
+ letra += salto
81
+ silabas << silaba
82
+
83
+ #puts "Dletra: #{letra}"
84
+ #puts "Dsalto: #{salto}"
85
+
86
+ end
87
+ return silabas.join("-")
88
+ end
89
+
90
+ def self.isInseparables(trozo)
91
+ #puts "isInspearable? #{trozo}"
92
+ inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
93
+ return inseparables.include? trozo
94
+ end
95
+
96
+ def self.coda(trozo)
97
+ #puts "coda: #{trozo}"
98
+ l = trozo.length
99
+ if l == 0
100
+ return 0
101
+ elsif l == 1 and isConsonante(trozo)
102
+ return 1
103
+ elsif l > 1 and isInseparables(trozo[0,2])
104
+ return 0
105
+ elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
106
+ return 0
107
+ elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
108
+ return 1
109
+ elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
110
+ return 1
111
+ elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
112
+ return 2
113
+ elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
114
+ return 2
115
+ else
116
+ return 0
117
+ end
118
+ end
119
+
120
+
121
+ def self.calcularPosicionSilabas(silabada)
122
+ #puts "calcular posicion #{silabada}."
123
+ output = []
124
+ text = silabada
125
+ while(!text.index("-").nil?)
126
+ i = text.index("-")
127
+ text = text.slice(0,i) + text.slice(i+1,text.length)
128
+ output << i
129
+ end
130
+ return output
131
+ end
132
+
133
+
36
134
  def self.isTonica(word,i)
135
+ test = _isTonica(word,i)
136
+ if test
137
+ if _isTonica(word,i+1)
138
+ return false
139
+ else
140
+ return test
141
+ end
142
+ else
143
+ return false
144
+ end
145
+ end
146
+
147
+
148
+ def self._isTonica(word,i)
149
+ return false if isConsonante(word,i)
37
150
  #falta considerar las palabras que poseen acento pero no tilde
38
- return true if word.size == 1
39
151
  tildes = %w(á é í ó ú ã ä ë)
40
152
  w = word.join
41
- if tildes.include? word[i]
153
+ #puts "isTonica? #{w}: #{i}"
154
+ return true if w.size == 1
155
+
156
+
157
+ if tildes.include? w[i]
42
158
  return true
43
159
  else
44
- es = Text::Hyphen.new(:language => "es", :left => 0, :right => 1)
45
- p = es.hyphenate(w)
46
- #puts es.visualize(w)
47
- hh = es.visualize(w).split("-")
160
+ g = silabar(w)
161
+ hh = g.split("-")
162
+ p = calcularPosicionSilabas(g)
48
163
 
49
164
  if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
50
165
  #caso johan
@@ -90,7 +205,8 @@ module Fonemas
90
205
  end
91
206
  elsif hh.size >= 3
92
207
  #puts hh.join("-")
93
- if i > p[p.size-1]
208
+ #puts "hhsize3 i: #{i}, p:#{p}"
209
+ if i >= p[p.size-1]
94
210
  if w =~ /[nsaeiou]$/
95
211
  return false
96
212
  else
@@ -132,20 +248,58 @@ module Fonemas
132
248
 
133
249
  end
134
250
 
135
- def self.isVocal(word,i)
251
+ def self.isVocal(word,i=0)
136
252
  vocales = %w(a e i o u á é í ó ú)
137
253
  return vocales.include? word[i]
138
254
  end
139
255
 
256
+ def self.isConsonante(word,i=0)
257
+ return !isVocal(word,i)
258
+ end
259
+
260
+ def self.isTriptongo(palabra,first,third)
261
+ t = palabra[first,third]
262
+ return false if t.length < 3
263
+ triptongos = %w(iai iei uai uei uau iau uay uey)
264
+ return triptongos.include? t
265
+ end
266
+
267
+ def self.isDieresis(palabra,first,second)
268
+ t = palabra[first,second]
269
+ return false if t.length < 2
270
+ dieresis = %w(ue ui)
271
+ return dieresis.include? t
272
+
273
+ end
274
+
140
275
  def self.isDiptongo(word,first,second)
276
+ trozo = word[first..second]
277
+ return false if trozo.length != 2
278
+ #puts "diptongo word #{word}, first: #{first}, second: #{second}"
279
+ #puts "test diptongo #{word[first] + word[second]}"
141
280
  f = word[first]
142
281
  s = word[second]
143
- abiertas = %w(a e o)
144
- cerradas = %w(i u)
282
+ abiertas = %w(a e o á é ó)
283
+ cerradas = %w(i u í ú)
145
284
  return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))
146
285
 
147
286
  end
148
287
 
288
+ def self.isDiptongoConH(word,first,third)
289
+ test = word[first..third]
290
+ #puts "test diptongo con h: #{test}"
291
+ if test[1] == 'h'
292
+ if test[2,2] == 'ue'
293
+ return false
294
+ else
295
+ test = test.gsub(/h/,'')
296
+ end
297
+ else
298
+ return false
299
+ end
300
+ return isDiptongo(test,0,1)
301
+ end
302
+
149
303
  def self.separar(word)
150
304
  word = downcase(word)
151
305
  output = []
@@ -1,3 +1,3 @@
1
1
  module Fonemas
2
- VERSION = '0.4.16'
2
+ VERSION = '0.5.0'
3
3
  end
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
  require 'spec_helper'
3
3
  describe Fonemas do
4
- it 'test acentos' do
4
+ it 'test acentos' do
5
5
  Fonemas.fonemas('hasta').should include("aa s t a")
6
6
  Fonemas.fonemas('torta').should include("t oo r t a")
7
7
  Fonemas.fonemas('ungüento').should include("u n g u ee n t o")
@@ -29,7 +29,6 @@ describe Fonemas do
29
29
  Fonemas.fonemas('guatón').should include('g u a t oo n')
30
30
  Fonemas.fonemas('gu').should include('gg u')
31
31
  Fonemas.fonemas('guagua').should include('gu aa g u a')
32
- Fonemas.fonemas('joão').should include('ll o aa o')
33
32
  Fonemas.fonemas('johan').should include('ll oo j a n')
34
33
  Fonemas.fonemas('adquirir').should include('a d k i r ii r')
35
34
  for i in Fonemas.fonemas('adskribir')
@@ -108,8 +107,36 @@ describe Fonemas do
108
107
  output.should include('a b e')
109
108
  output.should include('a c d')
110
109
  output.should include('a c e')
110
+ end
111
+
112
+ it 'debe saber separar silabas' do
113
+ Fonemas.silabar('áfrica').should eql('á-fri-ca')
114
+ Fonemas.silabar('abstraer').should eql('abs-tra-er')
115
+ Fonemas.silabar('ahuyentar').should eql('ahu-yen-tar')
116
+ Fonemas.silabar('acaban').should eql('a-ca-ban')
117
+ Fonemas.silabar('pino').should eql('pi-no')
118
+ Fonemas.silabar('camión').should eql('ca-mión')
119
+ Fonemas.silabar('holanda').should eql('ho-lan-da')
120
+ Fonemas.silabar('abuela').should eql('a-bue-la')
121
+
122
+ end
111
123
 
124
+ it 'marcar inicios de cada silaba' do
125
+ Fonemas.calcularPosicionSilabas('ho-lan-da').should eql([2,5])
126
+ end
127
+
128
+ it 'identificar sílaba tónica' do
129
+ word = Fonemas.separar('acaban')
130
+ Fonemas.isTonica(word,0).should be_false
131
+ Fonemas.isTonica(word,2).should be_true
132
+ Fonemas.isTonica(word,4).should be_false
133
+
134
+ end
112
135
 
136
+ it 'sólo debe existir una sílaba acentuada' do
137
+ fonemas = Fonemas.fonemas('acaban')
138
+ fonemas.should_not include('aa k aa b a n')
139
+ fonemas.should include('a k aa b a n')
113
140
  end
114
141
 
115
142
  end
@@ -1 +1,6 @@
1
- require 'fonemas'
1
+ require 'fonemas'
2
+
3
+ RSpec.configure do |config|
4
+ config.filter_run focus: true
5
+ config.run_all_when_everything_filtered = true
6
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fonemas
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.16
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Manuel Bahamondez Honores
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-09-02 00:00:00.000000000 Z
11
+ date: 2013-09-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - ! '>='
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: text-hyphen
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ~>
60
- - !ruby/object:Gem::Version
61
- version: 1.4.1
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ~>
67
- - !ruby/object:Gem::Version
68
- version: 1.4.1
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: unicode_utils
71
57
  requirement: !ruby/object:Gem::Requirement