RubyGems - dimus-taxamatch_rb - Versions diffs - 0.5.1 → 0.5.2 - Mend

dimus-taxamatch_rb 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

data/README.rdoc +1 -0
data/lib/taxamatch_rb/authmatch.rb +2 -0
data/lib/taxamatch_rb/phonetizer.rb +6 -2
data/lib/taxamatch_rb.rb +12 -12
data/spec/taxamatch_rb_spec.rb +39 -39
metadata +2 -2

data/README.rdoc CHANGED Viewed

@@ -4,6 +4,7 @@ Taxamatch_Rb is a ruby implementation of Taxamatch algorithms developed by Tony
 The purpose of Taxamatch gem is to facilitate fuzzy comparison of two scientific name renderings to find out if they actually point to the same scientific name.
+    require 'taxamatch_rb'
     tm = Taxamatch::Base.new
     tm.taxamatch('Homo sapien', 'Homo sapiens') #returns true
     tm.taxamatch('Homo sapiens Linnaeus', 'Hommo sapens (Linn. 1758)') #returns true

data/lib/taxamatch_rb/authmatch.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# Algorithms for Taxamatch::Authmatch are developed by Patrick Leary of uBio and EOL fame
 module Taxamatch
   class Authmatch

data/lib/taxamatch_rb/phonetizer.rb CHANGED Viewed

@@ -1,8 +1,12 @@
 # encoding: UTF-8
 module Taxamatch
-  class Phonetizer
+  module Phonetizer
+    def self.phonetize(a_word, normalize_ending = false)
+      self.near_match(a_word, normalize_ending)
+    end
     def self.near_match(a_word, normalize_ending = false)
       a_word = a_word.strip rescue ''
       return '' if a_word == ''

data/lib/taxamatch_rb.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module Taxamatch
     def taxamatch(str1, str2)
       preparsed_1 = @parser.parse(str1)
       preparsed_2 = @parser.parse(str2)
-      taxamatch_preparsed(preparsed_1, preparsed_2)[:match]
+      taxamatch_preparsed(preparsed_1, preparsed_2)['match']
     end
     #takes two hashes of parsed scientific names, analyses them and returns back
@@ -33,8 +33,8 @@ module Taxamatch
       result = nil
       result =  match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
       result =  match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
-      if result && result[:match]
-        result[:match] = false if match_authors(preparsed_1, preparsed_2) == 0
+      if result && result['match']
+        result['match'] = false if match_authors(preparsed_1, preparsed_2) == 0
       end
       return result
     end
@@ -49,7 +49,7 @@ module Taxamatch
       au_match = match_authors(preparsed_1, preparsed_2)
       total_length = preparsed_1[:genus][:epitheton].size + preparsed_2[:genus][:epitheton].size + preparsed_1[:species][:epitheton].size + preparsed_2[:species][:epitheton].size
       match = match_matches(gen_match, sp_match)
-      match.merge({:score => (1- match[:edit_distance]/(total_length/2))})
+      match.merge({'score' => (1- match['edit_distance']/(total_length/2))})
     end
     def match_genera(genus1, genus2)
@@ -57,10 +57,10 @@ module Taxamatch
       genus2_length = genus2[:normalized].size
       match = false
       ed = @dlm.distance(genus1[:normalized], genus2[:normalized],2,3)
-      return {:edit_distance => ed, :phonetic_match => true, :match => true} if genus1[:phonetized] == genus2[:phonetized]
+      return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
       match = true if ed <= 3 && ([genus1_length, genus2_length].min > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
-      {:edit_distance => ed, :match => match, :phonetic_match => false}
+      {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
     end
     def match_species(sp1, sp2)
@@ -70,10 +70,10 @@ module Taxamatch
       sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
       match = false
       ed = @dlm.distance(sp1[:normalized], sp2[:normalized], 4, 4)
-      return {:edit_distance => ed, :phonetic_match => true, :match => true} if sp1[:phonetized] == sp2[:phonetized]
+      return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
       match = true if ed <= 4 && ([sp1_length, sp2_length].min >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
-      {:edit_distance => ed, :match => match, :phonetic_match => false}
+      { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
     end
     def match_authors(preparsed_1, preparsed_2)
@@ -86,10 +86,10 @@ module Taxamatch
     def match_matches(genus_match, species_match, infraspecies_matches = [])
       match = species_match
-      match[:edit_distance] += genus_match[:edit_distance]
-      match[:match] = false if match[:edit_distance] > 4
-      match[:match] &&= genus_match[:match]
-      match[:phonetic_match] &&= genus_match[:phonetic_match]
+      match['edit_distance'] += genus_match['edit_distance']
+      match['match'] = false if match['edit_distance'] > 4
+      match['match'] &&= genus_match['match']
+      match['phonetic_match'] &&= genus_match['phonetic_match']
       match
     end

data/spec/taxamatch_rb_spec.rb CHANGED Viewed

@@ -70,107 +70,107 @@ describe 'Taxamatch::Base' do
     #edit distance 1 always match
     g1 = make_taxamatch_hash 'Plantago'
     g2 = make_taxamatch_hash 'Plantagon'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>false, :edit_distance=>1, :match=>true}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
     #edit_distance above threshold does not math
     g1 = make_taxamatch_hash 'Plantago'
     g2 = make_taxamatch_hash 'This shouldnt match'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>false, :match=>false, :edit_distance=>4}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
     #phonetic_match matches
     g1 = make_taxamatch_hash 'Plantagi'
     g2 = make_taxamatch_hash 'Plantagy'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>true, :edit_distance=>1, :match=>true}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 1, 'match' => true}
     #distance 1 in first letter also matches
     g1 = make_taxamatch_hash 'Xantheri'
     g2 = make_taxamatch_hash 'Pantheri'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>false, :edit_distance=>1, :match=>true}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
     #phonetic match tramps everything
     g1 = make_taxamatch_hash 'Xantheriiiiiiiiiiiiiii'
     g2 = make_taxamatch_hash 'Zanthery'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>true, :edit_distance=>4, :match=>true}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 4, 'match' => true}
     #same first letter and distance 2 should match
     g1 = make_taxamatch_hash 'Xantherii'
     g2 = make_taxamatch_hash 'Xantherrr'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>false, :match=>true, :edit_distance=>2}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
     #First letter is the same and distance is 3 should match, no phonetic match
     g1 = make_taxamatch_hash 'Xantheriii'
     g2 = make_taxamatch_hash 'Xantherrrr'
-    @tm.match_genera(g1, g2).should == {:phonetic_match=>false, :match=>true, :edit_distance=>3}
+    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
     #Should not match if one of words is shorter than 2x edit distance and distance is 2 or 3
     g1 = make_taxamatch_hash 'Xant'
     g2 = make_taxamatch_hash 'Xanthe'
-    @tm.match_genera(g1, g2).should ==  {:phonetic_match=>false, :match=>false, :edit_distance=>2}
+    @tm.match_genera(g1, g2).should ==  {'phonetic_match' => false, 'match' => false, 'edit_distance' => 2}
     #Should not match if edit distance > 3 and no phonetic match
     g1 = make_taxamatch_hash 'Xantheriiii'
     g2 = make_taxamatch_hash 'Xantherrrrr'
-    @tm.match_genera(g1, g2).should ==  {:phonetic_match=>false, :match=>false, :edit_distance=>4}
+    @tm.match_genera(g1, g2).should ==  {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
   end
   it 'should compare species' do
     #Exact match
     s1 = make_taxamatch_hash 'major'
     s2 = make_taxamatch_hash 'major'
-    @tm.match_species(s1, s2).should ==  {:phonetic_match=>true, :match=>true, :edit_distance=>0}
+    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 0}
     #Phonetic match always works
     s1 = make_taxamatch_hash 'xanteriiiiiiii'
     s2 = make_taxamatch_hash 'zantereeeeeeee'
-    @tm.match_species(s1, s2).should ==  {:phonetic_match=>true, :match=>true, :edit_distance=>5}
+    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 5}
     #Phonetic match works with different endings
     s1 = make_taxamatch_hash 'majorum'
     s2 = make_taxamatch_hash 'majoris'
-    @tm.match_species(s1, s2).should ==  {:phonetic_match=>true, :match=>true, :edit_distance=>2}
+    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 2}
     #Distance 4 matches if first 3 chars are the same
     s1 = make_taxamatch_hash 'majorrrrr'
     s2 = make_taxamatch_hash 'majoraaaa'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>true, :edit_distance=>4}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 4}
     #Should not match if Distance 4 matches and first 3 chars are not the same
     s1 = make_taxamatch_hash 'majorrrrr'
     s2 = make_taxamatch_hash 'marorraaa'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>false, :edit_distance=>4}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
     #Distance 2 or 3 matches if first 1 char is the same
     s1 = make_taxamatch_hash 'morrrr'
     s2 = make_taxamatch_hash 'moraaa'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>true, :edit_distance=>3}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
     #Should not match if Distance 2 or 3 and first 1 char is not the same
     s1 = make_taxamatch_hash 'morrrr'
     s2 = make_taxamatch_hash 'torraa'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>false, :edit_distance=>3}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
     #Distance 1 will match anywhere
     s1 = make_taxamatch_hash 'major'
     s2 = make_taxamatch_hash 'rajor'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>true, :edit_distance=>1}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
     #Will not match if distance 3 and length is less then twice of the edit distance
     s1 = make_taxamatch_hash 'marrr'
     s2 = make_taxamatch_hash 'maaaa'
-    @tm.match_species(s1, s2).should == {:phonetic_match=>false, :match=>false, :edit_distance=>3}
+    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
   end
   it 'should match mathes' do
     #No trobule case
-    gmatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    smatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>2, :match=>true}
+    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match' => true, 'edit_distance' => 2, 'match' => true}
     #Will not match if either genus or sp. epithet dont match
-    gmatch = {:match => false, :phonetic_match => false, :edit_distance => 1}
-    smatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>false, :edit_distance=>2, :match=>false}
-    gmatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    smatch = {:match => false, :phonetic_match => false, :edit_distance => 1}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>false, :edit_distance=>2, :match=>false}
+    gmatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
+    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
+    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
     #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
-    gmatch = {:match => true, :phonetic_match => true, :edit_distance => 3}
-    smatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>5, :match=>false}
+    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
+    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>true, 'edit_distance'=>5, 'match'=>false}
     #Should not have phonetic match if one of the components does not match phonetically
-    gmatch = {:match => true, :phonetic_match => false, :edit_distance => 1}
-    smatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>false, :edit_distance=>2, :match=>true}
-    gmatch = {:match => true, :phonetic_match => true, :edit_distance => 1}
-    smatch = {:match => true, :phonetic_match => false, :edit_distance => 1}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>false, :edit_distance=>2, :match=>true}
+    gmatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
+    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>true}
+    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
+    smatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>true}
     #edit distance should be equal the sum of of edit distances
-    gmatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
-    smatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
-    @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>4, :match=>true}
+    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
+    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
+    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>true, 'edit_distance'=>4, 'match'=>true}
   end
   describe 'Taxamatch::Authmatch' do

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: dimus-taxamatch_rb
 version: !ruby/object:Gem::Version
-  version: 0.5.1
+  version: 0.5.2
 platform: ruby
 authors:
 - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-08-08 00:00:00 -07:00
+date: 2009-08-09 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency