RubyGems - fonetica - Versions diffs - 0.4.0 → 1.0.0 - Mend

fonetica 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/CHANGELOG CHANGED

@@ -1,3 +1,21 @@
+1.0.0 [Sun Jun 19 2011]
+* Use replacements table proposed by Marcos Rodrigues Caso (see:
+http://caso.somee.com/siteTCC/docs/TCCMarcosRC.pdf)
+  This change will catch current cases and more like 'willian' and 'uillian'
+  Note that this change is backwards-incompatible. But you can update your
+  database using a rake task like that:
+    task :update_fonetica => :environment do
+      Person.transaction do
+        Person.find_each do |person|
+          person.update_attribute(:fonetica, person.name.foneticalize)
+        end
+      end
+    end
 0.4.0 [Sat Feb 12 2010]
 * Compatibility with Ruby 1.9

data/README.textile CHANGED

@@ -11,10 +11,10 @@ Then Google suggested me to read the "BuscaBR algorithm":http://www.unibratec.co
 h2. Usage
 <pre>
-  require 'fonetica'
+require 'fonetica'
-  'Wagner Batista'.foneticalize #=> "VM BT"
-  'Vagner Baptista'.foneticalize #=> "VM BT"
+'wagner batista'.foneticalize #=> "VM BT"
+'vagner baptista'.foneticalize #=> "VM BT"
 </pre>
 h3. Using with ActiveRecord
@@ -22,23 +22,23 @@ h3. Using with ActiveRecord
 You can use the fonetica to search on ActiveRecord like this:
 <pre>
-  class Person < ActiveRecord::Base
-    scope :search, lambda { |name| where("#{quoted_table_name}.fonetica LIKE ?", "#{name.foneticalize}%") }
+class Person < ActiveRecord::Base
+  scope :search, lambda { |name| where("#{quoted_table_name}.fonetica LIKE ?", "#{name.foneticalize}%") }
-    before_save :foneticalize
+  before_save :foneticalize
-    protected
+  protected
-    def foneticalize
-      self.fonetica = name.foneticalize
-    end
+  def foneticalize
+    self.fonetica = name.foneticalize
   end
+end
 </pre>
 If you want to match any part, you should change scope to:
 <pre>
-  scope :search, lambda { |name| where("#{quoted_table_name}.fonetica LIKE ?", "%#{name.foneticalize}%") }
+scope :search, lambda { |name| where("#{quoted_table_name}.fonetica LIKE ?", "%#{name.foneticalize}%") }
 </pre>
 Remember to add a index on fonetica column.
@@ -52,10 +52,10 @@ Please also keep your commits *atomic* so that they are more likely to apply cle
 h2. Development environment
 <pre>
-  $ git clone http://github.com/sobrinho/fonetica
-  $ cd fonetica
-  $ bundle install
-  $ rake test
+$ git clone http://github.com/sobrinho/fonetica
+$ cd fonetica
+$ bundle install
+$ rake test
 </pre>
 h2. Project info

data/Rakefile CHANGED

@@ -1,8 +1,6 @@
-require 'bundler'
+require 'bundler/gem_tasks'
 require 'rake/testtask'
-Bundler::GemHelper.install_tasks
 Rake::TestTask.new(:test) do |t|
   t.libs << 'test'
   t.pattern = 'test/**/*_test.rb'

data/fonetica.gemspec CHANGED

@@ -4,7 +4,7 @@ require "fonetica/version"
 Gem::Specification.new do |s|
   s.name        = "fonetica"
-  s.version     = Fonetica::Version::STRING
+  s.version     = Fonetica::VERSION
   s.platform    = Gem::Platform::RUBY
   s.authors     = ["Gabriel Sobrinho"]
   s.email       = ["gabriel.sobrinho@gmail.com"]

data/lib/fonetica.rb CHANGED

@@ -11,28 +11,22 @@ class Fonetica
   class_attribute :replacements
   self.replacements = [
-    ['Y', 'I'],
-    [/BR|BL/, 'B'],
+    [/BL|BR/, 'B'],
     ['PH', 'F'],
-    [/MG|NG|RG/, 'G'],
-    [/GE|GI|RJ|MJ|NJ/, 'J'],
-    [/GR|GL/, 'G'],
-    [/CE|CI|CH/, 'S'],
-    [/PT|CT/, 'T'],
-    ['CS', 'S'],
-    [/Q|C|CA|CO|CU|CK/, 'K'],
-    ['LH', 'L'],
-    ['RM', 'SM'],
+    [/GL|GR|MG|NG|RG/, 'G'],
+    ['Y', 'I'],
+    [/GE|GI|RJ|MJ/, 'J'],
+    [/CA|CO|CU|CK|Q/, 'K'],
     ['N', 'M'],
-    [/MD|GM|AO\b/, 'M'],
-    ['NH', 'N'],
+    [/AO|AUM|GM|MD|OM|ON/, 'M'],
     ['PR', 'P'],
-    [/X|TS|C|Z|RS/, 'S'],
-    [/TR|TL/, 'T'],
-    [/LT|RT|ST/, 'T'],
-    ['W', 'V'],
-    [/[SZRMNL]\b/, ''],
     ['L', 'R'],
+    [/CE|CI|CH|CS|RS|TS|X|Z/, 'S'],
+    [/TR|TL/, 'T'],
+    [/CT|RT|ST|PT/, 'T'],
+    [/\b[UW]/, 'V'],
+    ['RM', 'SM'],
+    [/[MRS]\b/, ''],
     [/[AEIOUH]/, '']
   ]

data/lib/fonetica/version.rb CHANGED

@@ -1,9 +1,3 @@
 class Fonetica
-  module Version #:nodoc:
-    MAJOR = 0
-    MINOR = 4
-    TINY  = 0
-    STRING = [MAJOR, MINOR, TINY].join('.')
-  end
+  VERSION = '1.0.0'
 end

data/test/fonetica_test.rb ADDED

@@ -0,0 +1,158 @@
+# encoding: utf-8
+require 'test_helper'
+class FoneticaTest < Test::Unit::TestCase
+  def test_broco_and_bloco
+    assert_fonetica 'broco', 'bloco'
+  end
+  def test_casa_and_kasa
+    assert_fonetica 'casa', 'kasa'
+  end
+  def test_cela_and_sela
+    assert_fonetica 'sela', 'cela'
+  end
+  def test_circo_and_sirco
+    assert_fonetica 'circo', 'sirco'
+  end
+  def test_coroar_and_koroar
+    assert_fonetica 'coroar', 'koroar'
+  end
+  def test_cuba_and_kuba
+    assert_fonetica 'cuba', 'kuba'
+  end
+  def test_roca_and_rosa
+    assert_fonetica 'roça', 'rosa'
+  end
+  def test_ameixa_and_ameicha
+    assert_fonetica 'ameixa', 'ameicha'
+  end
+  def test_toracs_and_torax
+    assert_fonetica 'toracs', 'torax'
+  end
+  def test_compactar_and_compatar
+    assert_fonetica 'compactar', 'compatar'
+  end
+  def test_fleuma_and_fleugma
+    assert_fonetica 'fleuma', 'fleugma'
+  end
+  def test_hieroglifo_and_hierogrifo
+    assert_fonetica 'hieroglifo', 'hierogrifo'
+  end
+  def test_negro_and_nego
+    assert_fonetica 'negro', 'nego'
+  end
+  def test_luminar_and_ruminar
+    assert_fonetica 'luminar', 'ruminar'
+  end
+  def test_mudez_and_nudez
+    assert_fonetica 'mudez', 'nudez'
+  end
+  def test_comendo_and_comeno
+    assert_fonetica 'comendo', 'comeno'
+  end
+  def test_bunginganga_and_bugiganga
+    assert_fonetica 'bunginganga', 'bugiganga'
+  end
+  def test_philipe_and_felipe
+    assert_fonetica 'philipe', 'felipe'
+  end
+  def test_estupro_and_estrupo
+    assert_fonetica 'estupro', 'estrupo'
+  end
+  def test_queijo_and_keijo
+    assert_fonetica 'queijo', 'keijo'
+  end
+  def test_lagarto_and_largarto
+    assert_fonetica 'lagarto', 'largarto'
+  end
+  def test_perspectiva_and_pespectiva
+    assert_fonetica 'perspectiva', 'pespectiva'
+  end
+  def test_lagartixa_and_largatixa
+    assert_fonetica 'lagartixa', 'largatixa'
+  end
+  def test_mesmo_and_mermo
+    assert_fonetica 'mesmo', 'mermo'
+  end
+  def test_virgem_and_virge
+    assert_fonetica 'virgem', 'virge'
+  end
+  def test_supersticao_and_superticao
+    assert_fonetica 'supersticao', 'superticao'
+  end
+  def test_estupro_and_estrupo
+    assert_fonetica 'estupro', 'estrupo'
+  end
+  def test_contrato_and_contlato
+    assert_fonetica 'contrato', 'contlato'
+  end
+  def test_kubitscheck_and_kubixeque
+    assert_fonetica 'kubitscheck', 'kubixeque'
+  end
+  def test_walter_and_valter
+    assert_fonetica 'walter', 'valter'
+  end
+  def test_exceder_and_esceder
+    assert_fonetica 'exceder', 'esceder'
+  end
+  def test_yara_and_iara
+    assert_fonetica 'yara', 'iara'
+  end
+  def test_casa_and_caza
+    assert_fonetica 'casa', 'caza'
+  end
+  def test_wilson_and_uilson
+    assert_fonetica 'wilson', 'uilson'
+  end
+  def test_optico_and_otico
+    assert_fonetica 'óptico', 'ótico'
+  end
+  def test_orgaozinho
+    assert_fonetica 'órgãozinho', 'órgaozinho'
+  end
+  def test_batista_and_baptista
+    assert_fonetica 'batista', 'baptista'
+  end
+  protected
+  def assert_fonetica(first, second)
+    assert_equal first.foneticalize, second.foneticalize, "#{first.inspect} and #{second.inspect} do not match"
+  end
+end

data/test/test_helper.rb CHANGED

@@ -2,4 +2,3 @@ require 'rubygems'
 require 'bundler/setup'
 require 'fonetica'
 require 'test/unit'
-require 'active_support/test_case'

metadata CHANGED

@@ -1,13 +1,8 @@
 --- !ruby/object:Gem::Specification
 name: fonetica
 version: !ruby/object:Gem::Version
-  hash: 15
   prerelease:
-  segments:
-  - 0
-  - 4
-  - 0
-  version: 0.4.0
+  version: 1.0.0
 platform: ruby
 authors:
 - Gabriel Sobrinho
@@ -15,7 +10,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-12 00:00:00 -02:00
+date: 2011-06-19 00:00:00 -03:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -26,11 +21,6 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 7
-        segments:
-        - 3
-        - 0
-        - 0
         version: 3.0.0
   type: :runtime
   version_requirements: *id001
@@ -42,11 +32,6 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        hash: 13
-        segments:
-        - 0
-        - 4
-        - 1
         version: 0.4.1
   type: :runtime
   version_requirements: *id002
@@ -70,7 +55,7 @@ files:
 - lib/fonetica.rb
 - lib/fonetica/core_ext/string.rb
 - lib/fonetica/version.rb
-- test/string_test.rb
+- test/fonetica_test.rb
 - test/test_helper.rb
 has_rdoc: true
 homepage: http://github.com/sobrinho/fonetica
@@ -86,18 +71,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
       version: "0"
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
       version: "0"
 requirements: []
@@ -107,5 +86,5 @@ signing_key:
 specification_version: 3
 summary: BuscaBR algorithm which allow the comparison of words based on their phonetic likeness
 test_files:
-- test/string_test.rb
+- test/fonetica_test.rb
 - test/test_helper.rb

data/test/string_test.rb DELETED

@@ -1,188 +0,0 @@
-# encoding: utf-8
-require 'test_helper'
-class StringTest < ActiveSupport::TestCase
-  test 'broco and bloco should fonetica to BK' do
-    assert_equal 'BK', 'broco'.foneticalize
-    assert_equal 'BK', 'bloco'.foneticalize
-  end
-  test 'casa and kasa should fonetica to KS' do
-    assert_equal 'KS', 'casa'.foneticalize
-    assert_equal 'KS', 'kasa'.foneticalize
-  end
-  test 'coroar and koroar should fonetica to KR' do
-    assert_equal 'KR', 'coroar'.foneticalize
-    assert_equal 'KR', 'koroar'.foneticalize
-  end
-  test 'cuba and kuba should fonetica to KB' do
-    assert_equal 'KB', 'cuba'.foneticalize
-    assert_equal 'KB', 'kuba'.foneticalize
-  end
-  test 'cela and sela should fonetica to SR' do
-    assert_equal 'SR', 'cela'.foneticalize
-    assert_equal 'SR', 'sela'.foneticalize
-  end
-  test 'circo and sirco should fonetica to SRK' do
-    assert_equal 'SRK', 'circo'.foneticalize
-    assert_equal 'SRK', 'sirco'.foneticalize
-  end
-  test 'roça and rosa should fonetica to RS' do
-    assert_equal 'RS', 'roça'.foneticalize
-    assert_equal 'RS', 'rosa'.foneticalize
-  end
-  test 'ameixa and ameicha should fonetica to MS' do
-    assert_equal 'MS', 'ameixa'.foneticalize
-    assert_equal 'MS', 'ameicha'.foneticalize
-  end
-  test 'toracs and torax should fonetica to TR' do
-    assert_equal 'TR', 'toracs'.foneticalize
-    assert_equal 'TR', 'torax'.foneticalize
-  end
-  test 'compactar and compatar should fonetica to KMPT' do
-    assert_equal 'KMPT', 'compactar'.foneticalize
-    assert_equal 'KMPT', 'compatar'.foneticalize
-  end
-  test 'batista and baptista should fonetica to BT' do
-    assert_equal 'BT', 'batista'.foneticalize
-    assert_equal 'BT', 'baptista'.foneticalize
-  end
-  test 'gana should fonetica to KMPT' do
-    assert_equal 'GM', 'gana'.foneticalize
-  end
-  test 'gostar should fonetica to GT' do
-    assert_equal 'GT', 'gostar'.foneticalize
-  end
-  test 'guabiru should fonetica to GBR' do
-    assert_equal 'GBR', 'guabiru'.foneticalize
-  end
-  test 'negro and nego should fonetica to MG' do
-    assert_equal 'MG', 'negro'.foneticalize
-    assert_equal 'MG', 'nego'.foneticalize
-  end
-  test 'hieróglifo and hierógrifo should fonetica to RGF' do
-    assert_equal 'RGF', 'hieróglifo'.foneticalize
-    assert_equal 'RGF', 'hierógrifo'.foneticalize
-  end
-  test 'gene should fonetica to JM' do
-    assert_equal 'JM', 'gene'.foneticalize
-  end
-  test 'gibi should fonetica to JB' do
-    assert_equal 'JB', 'gibi'.foneticalize
-  end
-  test 'fleugma should fonetica to FRM' do
-    assert_equal 'FRM', 'fleugma'.foneticalize
-  end
-  test 'luminar and ruminar should fonetica to RM' do
-    assert_equal 'RM', 'luminar'.foneticalize
-    assert_equal 'RM', 'ruminar'.foneticalize
-  end
-  test 'mudez and nudez should fonetica to MD' do
-    assert_equal 'MD', 'mudez'.foneticalize
-    assert_equal 'MD', 'nudez'.foneticalize
-  end
-  test 'comendo and comeno should fonetica to KM' do
-    assert_equal 'KM', 'comendo'.foneticalize
-    assert_equal 'KM', 'comeno'.foneticalize
-  end
-  test 'bunginganga and bugiganga should fonetica to BJG' do
-    assert_equal 'BJG', 'bunginganga'.foneticalize
-    assert_equal 'BJG', 'bugiganga'.foneticalize
-  end
-  test 'philipe and felipe should fonetica to FRP' do
-    assert_equal 'FRP', 'philipe'.foneticalize
-    assert_equal 'FRP', 'felipe'.foneticalize
-  end
-  test 'queijo and keijo should fonetica to KJ' do
-    assert_equal 'KJ', 'queijo'.foneticalize
-    assert_equal 'KJ', 'keijo'.foneticalize
-  end
-  test 'lagarto and largato should fonetica to RGT' do
-    assert_equal 'RGT', 'lagarto'.foneticalize
-    assert_equal 'RGT', 'largato'.foneticalize
-  end
-  test 'perspectiva and pespectiva should fonetica to PSPTV' do
-    assert_equal 'PSPTV', 'perspectiva'.foneticalize
-    assert_equal 'PSPTV', 'pespectiva'.foneticalize
-  end
-  test 'lagartixa and largatixa should fonetica to RGTS' do
-    assert_equal 'RGTS', 'lagartixa'.foneticalize
-    assert_equal 'RGTS', 'largatixa'.foneticalize
-  end
-  test 'mesmo and mermo should fonetica to MSM' do
-    assert_equal 'MSM', 'mesmo'.foneticalize
-    assert_equal 'MSM', 'mermo'.foneticalize
-  end
-  test 'virgem and vige should fonetica to VJ' do
-    assert_equal 'VJ', 'virgem'.foneticalize
-    assert_equal 'VJ', 'vige'.foneticalize
-  end
-  test 'superstição and supertição should fonetica to SPTS' do
-    assert_equal 'SPTS', 'superstição'.foneticalize
-    assert_equal 'SPTS', 'supertição'.foneticalize
-  end
-  test 'estupro and estrupo should fonetica to TP' do
-    assert_equal 'TP', 'estupro'.foneticalize
-    assert_equal 'TP', 'estrupo'.foneticalize
-  end
-  test 'contrato and contlato should fonetica to KMT' do
-    assert_equal 'KMT', 'contrato'.foneticalize
-    assert_equal 'KMT', 'contlato'.foneticalize
-  end
-  test 'kubitscheck and kubixeque should fonetica to KBSK' do
-    assert_equal 'KBSK', 'kubitscheck'.foneticalize
-    assert_equal 'KBSK', 'kubixeque'.foneticalize
-  end
-  test 'walter and valter should fonetica to VT' do
-    assert_equal 'VT', 'walter'.foneticalize
-    assert_equal 'VT', 'valter'.foneticalize
-  end
-  test 'exceder and esceder should fonetica to SD' do
-    assert_equal 'SD', 'exceder'.foneticalize
-    assert_equal 'SD', 'esceder'.foneticalize
-  end
-  test 'yara and iara should fonetica to R' do
-    assert_equal 'R', 'yara'.foneticalize
-    assert_equal 'R', 'iara'.foneticalize
-  end
-  test 'casa and caza should fonetica to KS' do
-    assert_equal 'KS', 'casa'.foneticalize
-    assert_equal 'KS', 'caza'.foneticalize
-  end
-end