RubyGems - fonemas - Versions diffs - 0.4.16 → 0.5.0 - Mend

fonemas 0.4.16 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml CHANGED

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    M2JiMjdhZGRkYjI5ZmRkNzM2ZWI1MmRjMTMwMjAwZTlmNmIzMzc3Yg==
+    ZjlhNGZlOWUyZjhlMDg4ZmM5OGZjZTRiYzY0NGRiYjVlMDk1NDVjOQ==
   data.tar.gz: !binary |-
-    YWI3ZTMwOTFkYmMxYzIwNzdjNDEzNzI1YzliMjEwYzk1M2M4MDQxNA==
+    YTRhMWRmZGQyNTFlYzllNWI0MzNiNDFiYmIyMzNmZmI0NjQ5NzA1ZA==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    OGU0Mzg4NGRkMzhkM2Y1ODJkOGVmNmY0MzQ2MTQyYmViY2M3OGQ0NWJlNmZl
-    NjdjMGI1ZmIzODJmMGY1NzcxYTU4MTIxYzk0Nzg3ZWYwNWI5ZjQ1YmQyNGM4
-    ZWI4NGE1ZDUxZDc3ZjQwNGRhMjBjYzcyMWUwZWExZGE3MzEwYWQ=
+    MGU1NmZmMzc0NDJhNGNlNjBjMzVmYzViOTljYmI2MWFiYjgyMzBlOWYxYThi
+    NWQwODk1YTViMmFjMThlMDU1YWEzYzQ4ZmUxOThkOGI5NGEwMmJkMzNiMmNh
+    ZWU1Y2VjNWZkYmE2MWU4OWQ3NWRlOWI3MDkwMWQyOTQ2OGRmMDI=
   data.tar.gz: !binary |-
-    ZGRlYmVkNmMxYTQ3NWIzZjE2MDI1MThmYTkwMzg4YjNmOGFmZjUxMzNlYThi
-    MGE5NzJlY2UyZWQ4NzQ5OWVmZTk2MDFlNzBjNzkxOTIxMTVjYjM4NjIyNDhm
-    NWJkOTNlNDcxMGYwNDQyMWYzNmEwNTcxYzVhZWVmMjcyNTdjYzA=
+    ZDU1YWI4YzZkNDkwYWNjMjE3MWVmNGFiNjk0MzFkMWE5MmY2MjM5ZWE4ZTBh
+    Zjc4ODcwMWVhN2NhMzA1NzBhNWQ2ODc0NzNiM2IyN2M3Y2I3YjMyMjM5MGM0
+    MTMzZDg1ZGY1MWM5OTk0NDc4N2M5OTgzNDg2OWJjYTdhZTJjMWM=

data/bin/audioupload CHANGED

@@ -51,7 +51,7 @@ end
-if ARGV[3].nil?
+if f[3].nil?
     puts "uploading..."
     upload_audio(filename,ARGV[0],public,nil,ARGV[4])
 else
@@ -60,7 +60,7 @@ else
     lr = `ffmpeg -i "#{filename}" 2>&1 | grep 'Duration'| cut -d ' ' -f 4 | cut -d ',' -f 1`
     lr = lr.split(':')
     length = lr[0].to_i*3600 + lr[1].to_i*60 + lr[2].to_f
-    puts "largo original archivo: #{lr}"
+    puts "largo original archivo: #{length}"
     start_time = 0
     counter = 0
     max_parts = (length/limit).ceil

data/fonemas.gemspec CHANGED

@@ -21,7 +21,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "bundler", "~> 1.3"
   spec.add_development_dependency "rake"
   spec.add_development_dependency "rspec"
-  spec.add_runtime_dependency "text-hyphen", '~> 1.4.1'
   spec.add_runtime_dependency "unicode_utils", '~> 1.4.0'
   spec.add_runtime_dependency "rest-client", "~> 1.6.7"
   spec.add_runtime_dependency 'mime-types'

data/lib/fonemas.rb CHANGED

@@ -2,7 +2,6 @@
 require "fonemas/version"
 module Fonemas
-  require 'text/hyphen'
   require 'unicode_utils'
   def self.version
@@ -33,18 +32,134 @@ module Fonemas
     return false
   end
+  def self.silabar(palabra)
+    #puts "silabar: #{palabra}"
+    #algoritmo adaptado desde python
+    #codigo original extraido desde:
+    #https://github.com/xergio/silabas/blob/master/home/silabea.py
+    silabas = []
+    letra = 0
+    salto = 0
+    while silabas.join('').length  < palabra.length
+      #puts "silabas antes: #{silabas}"
+      #puts "letra: #{letra}"
+      #puts "palabra length: #{palabra.length}"
+      silaba = ''
+      salto = 0
+      if isConsonante(palabra[letra])
+        if isInseparables(palabra[letra..letra+1])
+          salto += 2
+        else
+          salto += 1
+        end
+      else
+        salto += 0
+      end
+      #puts "salto: #{salto}"
+      if isDiptongoConH(palabra,letra+salto,letra+salto+2)
+        #puts "diptongo con h"
+        salto += 3
+      elsif isDiptongo(palabra,letra+salto,letra+salto+1)
+        salto += 2
+      elsif isTriptongo(palabra,letra+salto,letra+salto+2)
+        salto += 3
+      elsif isDieresis(palabra,letra+salto,letra+salto+1)
+        salto += 2
+      else
+        salto += 1
+      end
+      #puts "acoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
+      salto += coda(palabra[letra+salto,palabra.length])
+      #puts "dcoda silaba: #{palabra[letra,letra+salto]} letra: #{letra} salto: #{salto}"
+      silaba = palabra[letra,salto]
+      letra += salto
+      silabas << silaba
+      #puts "Dletra: #{letra}"
+      #puts "Dsalto: #{salto}"
+    end
+    return silabas.join("-")
+  end
+  def self.isInseparables(trozo)
+    #puts "isInspearable? #{trozo}"
+    inseparables = %w(br bl cr cl dr fr fl gr gl kr ll pr pl tr rr ch)
+    return inseparables.include? trozo
+  end
+  def self.coda(trozo)
+    #puts "coda: #{trozo}"
+    l = trozo.length
+    if l == 0
+      return 0
+    elsif l == 1 and isConsonante(trozo)
+      return 1
+    elsif l > 1 and isInseparables(trozo[0,2])
+      return 0
+    elsif l > 1 and isConsonante(trozo,0) and isVocal(trozo,1)
+      return 0
+    elsif l > 2 and isConsonante(trozo,0) and isConsonante(trozo,1) and isVocal(trozo,2)
+      return 1
+    elsif l > 3 and isConsonante(trozo,0) and isInseparables(trozo[1,2]) and isVocal(trozo[3])
+      return 1
+    elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isVocal(trozo,3)
+      return 2
+    elsif l > 3 and isConsonante(trozo,0) and isConsonante(trozo,1) and isConsonante(trozo,2) and isConsonante(trozo,3)
+      return 2
+    else
+      return 0
+    end
+  end
+  def self.calcularPosicionSilabas(silabada)
+    #puts "calcular posicion #{silabada}."
+    output = []
+    text = silabada
+    while(!text.index("-").nil?)
+      i = text.index("-")
+      text = text.slice(0,i) + text.slice(i+1,text.length)
+      output << i
+    end
+    return output
+  end
   def self.isTonica(word,i)
+    test = _isTonica(word,i)
+    if test
+      if _isTonica(word,i+1)
+        return false
+      else
+        return test
+      end
+    else
+      return false
+    end
+  end
+  def self._isTonica(word,i)
+    return false if isConsonante(word,i)
     #falta considerar las palabras que poseen acento pero no tilde
-    return true if word.size == 1
     tildes = %w(á é í ó ú ã ä ë)
     w = word.join
-    if tildes.include? word[i]
+    #puts "isTonica? #{w}: #{i}"
+    return true if w.size == 1
+    if tildes.include? w[i]
       return true
     else
-      es = Text::Hyphen.new(:language => "es", :left => 0, :right => 1)
-      p = es.hyphenate(w)
-      #puts es.visualize(w)
-      hh = es.visualize(w).split("-")
+      g = silabar(w)
+      hh = g.split("-")
+      p = calcularPosicionSilabas(g)
       if hh.size == 1 and w.size > 4 and w.include? 'h' and w[0] != 'h'
         #caso johan
@@ -90,7 +205,8 @@ module Fonemas
             end
         elsif hh.size >= 3
           #puts hh.join("-")
-          if i > p[p.size-1]
+          #puts "hhsize3 i: #{i}, p:#{p}"
+          if i >= p[p.size-1]
             if w =~ /[nsaeiou]$/
               return false
             else
@@ -132,20 +248,58 @@ module Fonemas
   end
-  def self.isVocal(word,i)
+  def self.isVocal(word,i=0)
     vocales = %w(a e i o u á é í ó ú)
     return vocales.include? word[i]
   end
+  def self.isConsonante(word,i=0)
+    return !isVocal(word,i)
+  end
+  def self.isTriptongo(palabra,first,third)
+    t = palabra[first,third]
+    return false if t.length < 3
+    triptongos = %w(iai iei uai uei uau iau uay uey)
+    return triptongos.include? t
+  end
+  def self.isDieresis(palabra,first,second)
+    t = palabra[first,second]
+    return false if t.length < 2
+    dieresis = %w(ue ui)
+    return dieresis.include? t
+  end
   def self.isDiptongo(word,first,second)
+    trozo = word[first..second]
+    return false if trozo.length != 2
+    #puts "diptongo word #{word}, first: #{first}, second: #{second}"
+    #puts "test diptongo #{word[first] + word[second]}"
     f = word[first]
     s = word[second]
-    abiertas = %w(a e o)
-    cerradas = %w(i u)
+    abiertas = %w(a e o á é ó)
+    cerradas = %w(i u í ú)
     return ((abiertas.include? f and cerradas.include? s) or (abiertas.include? s and cerradas.include? f) or (cerradas.include? f and cerradas.include? s))
   end
+  def self.isDiptongoConH(word,first,third)
+    test = word[first..third]
+    #puts "test diptongo con h: #{test}"
+    if test[1] == 'h'
+      if test[2,2] == 'ue'
+        return false
+      else
+        test = test.gsub(/h/,'')
+      end
+    else
+      return false
+    end
+    return isDiptongo(test,0,1)
+  end
   def self.separar(word)
     word = downcase(word)
     output = []

data/lib/fonemas/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Fonemas
-  VERSION = '0.4.16'
+  VERSION = '0.5.0'
 end

data/spec/fonemas/fonema_spec.rb CHANGED

@@ -1,7 +1,7 @@
 # encoding: utf-8
 require 'spec_helper'
 describe Fonemas do
-  it 'test acentos' do
+  it 'test acentos'  do
     Fonemas.fonemas('hasta').should include("aa s t a")
     Fonemas.fonemas('torta').should include("t oo r t a")
     Fonemas.fonemas('ungüento').should include("u n g u ee n t o")
@@ -29,7 +29,6 @@ describe Fonemas do
     Fonemas.fonemas('guatón').should include('g u a t oo n')
     Fonemas.fonemas('gu').should include('gg u')
     Fonemas.fonemas('guagua').should include('gu aa g u a')
-    Fonemas.fonemas('joão').should include('ll o aa o')
     Fonemas.fonemas('johan').should include('ll oo j a n')
     Fonemas.fonemas('adquirir').should include('a d k i r ii r')
     for i in Fonemas.fonemas('adskribir')
@@ -108,8 +107,36 @@ describe Fonemas do
     output.should include('a b e')
     output.should include('a c d')
     output.should include('a c e')
+  end
+  it 'debe saber separar silabas' do
+    Fonemas.silabar('áfrica').should eql('á-fri-ca')
+    Fonemas.silabar('abstraer').should eql('abs-tra-er')
+    Fonemas.silabar('ahuyentar').should eql('ahu-yen-tar')
+    Fonemas.silabar('acaban').should eql('a-ca-ban')
+    Fonemas.silabar('pino').should eql('pi-no')
+    Fonemas.silabar('camión').should eql('ca-mión')
+    Fonemas.silabar('holanda').should eql('ho-lan-da')
+    Fonemas.silabar('abuela').should eql('a-bue-la')
+  end
+  it 'marcar inicios de cada silaba' do
+    Fonemas.calcularPosicionSilabas('ho-lan-da').should eql([2,5])
+  end
+  it 'identificar sílaba tónica' do
+    word = Fonemas.separar('acaban')
+    Fonemas.isTonica(word,0).should be_false
+    Fonemas.isTonica(word,2).should be_true
+    Fonemas.isTonica(word,4).should be_false
+  end
+  it 'sólo debe existir una sílaba acentuada' do
+    fonemas = Fonemas.fonemas('acaban')
+    fonemas.should_not include('aa k aa b a n')
+    fonemas.should include('a k aa b a n')
   end
 end

data/spec/spec_helper.rb CHANGED

@@ -1 +1,6 @@
-require 'fonemas'
+require 'fonemas'
+RSpec.configure do |config|
+  config.filter_run focus: true
+  config.run_all_when_everything_filtered = true
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fonemas
 version: !ruby/object:Gem::Version
-  version: 0.4.16
+  version: 0.5.0
 platform: ruby
 authors:
 - Manuel Bahamondez Honores
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-09-02 00:00:00.000000000 Z
+date: 2013-09-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -52,20 +52,6 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: '0'
-- !ruby/object:Gem::Dependency
-  name: text-hyphen
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: 1.4.1
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ~>
-      - !ruby/object:Gem::Version
-        version: 1.4.1
 - !ruby/object:Gem::Dependency
   name: unicode_utils
   requirement: !ruby/object:Gem::Requirement