RubyGems - taxamatch_rb - Versions diffs - 0.9.10 → 1.0.0 - Mend

taxamatch_rb 0.9.10 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

data/CHANGELOG +5 -2
data/Gemfile +14 -16
data/Gemfile.lock +18 -19
data/LICENSE +1 -1
data/{README.rdoc → README.md} +26 -7
data/Rakefile +11 -9
data/VERSION +1 -1
data/lib/taxamatch_rb.rb +76 -43
data/lib/taxamatch_rb/atomizer.rb +19 -10
data/lib/taxamatch_rb/authmatch.rb +29 -16
data/lib/taxamatch_rb/normalizer.rb +4 -4
data/lib/taxamatch_rb/phonetizer.rb +9 -8
data/spec/taxamatch_rb_spec.rb +223 -109
data/taxamatch_rb.gemspec +11 -41
metadata +11 -171

data/lib/taxamatch_rb/authmatch.rb CHANGED

@@ -1,15 +1,19 @@
-# Algorithms for Taxamatch::Authmatch are developed by Patrick Leary of uBio and EOL fame
+# Algorithms for Taxamatch::Authmatch
+# are developed by Patrick Leary of uBio and EOL fame
 module Taxamatch
   class Authmatch
     def self.authmatch(authors1, authors2, years1, years2)
-      unique_authors1, unique_authors2 = remove_duplicate_authors(authors1, authors2)
+      unique_authors1, unique_authors2 =
+        remove_duplicate_authors(authors1, authors2)
       year_difference = compare_years(years1, years2)
-      get_score(authors1, unique_authors1, authors2, unique_authors2, year_difference)
+      get_score(authors1, unique_authors1,
+                authors2, unique_authors2, year_difference)
     end
-    def self.get_score(authors1, unique_authors1, authors2, unique_authors2, year_diff)
+    def self.get_score(authors1, unique_authors1,
+                       authors2, unique_authors2, year_diff)
       count_before = authors1.size + authors2.size
       count_after = unique_authors1.size + unique_authors2.size
       score = 0
@@ -18,7 +22,7 @@ module Taxamatch
           if year_diff == 0
             score = 100
           elsif year_diff == 1
-            score = 54
+            score = 54
           end
         else
           score = 94
@@ -35,11 +39,11 @@ module Taxamatch
         end
       else
         score = ((1 - count_after.to_f/count_before.to_f) * 100).round
-        score = 0 unless year_diff == nil || (year_diff && year_diff == 0)
+        score = 0 unless year_diff == nil || (year_diff && year_diff == 0)
       end
       score > 50 ? score : 0
     end
     def self.remove_duplicate_authors(authors1, authors2)
       unique_authors1 = authors1.dup
       unique_authors2 = authors2.dup
@@ -48,12 +52,14 @@ module Taxamatch
           au1_match = au2_match = false
           if au1 == au2
             au1_match = au2_match = true
-          elsif au1 == au2[0...au1.size]
+          elsif au1 == au2[0...au1.size]
             au1_match = true
           elsif au1[0...au2.size] == au2
             au2_match = true
           end
-          if (au1.size >= 3 && au1_match) || (au2.size >= 3 && au2_match) || (au1_match && au2_match)
+          if (au1.size >= 3 && au1_match) ||
+             (au2.size >= 3 && au2_match) ||
+             (au1_match && au2_match)
             unique_authors1.delete au1
             unique_authors2.delete au2
           elsif au1_match
@@ -61,8 +67,11 @@ module Taxamatch
           elsif au2_match
             unique_authors2.delete au2
           else
-            #TODO: masking a bug in damerau levenshtsin mod which appears comparing 1letter to a longer string
-            if au1.size > 1 && au2.size > 1 && self.fuzzy_match_authors(au1, au2)
+            #TODO: masking a bug in damerau levenshtsin
+            # mod which appears comparing 1letter to a longer string
+            if au1.size > 1 &&
+               au2.size > 1 &&
+               self.fuzzy_match_authors(au1, au2)
               unique_authors1.delete au1
               unique_authors2.delete au2
             end
@@ -71,18 +80,22 @@ module Taxamatch
       end
       [unique_authors1, unique_authors2]
     end
     def self.fuzzy_match_authors(author1, author2)
       au1_length = author1.size
       au2_length = author2.size
       dlm = DamerauLevenshtein
-      ed = dlm.distance(author1, author2,1,3) #get around a bug in C code, but it really has to be fixed
-      (ed <= 3 && ([au1_length, au2_length].min > ed * 2) && (ed < 2 || author1[0] == author2[0]))
+      #get around a bug in C code, but it really has to be fixed
+      ed = dlm.distance(author1, author2,1,3)
+      (ed <= 3 && ([au1_length, au2_length].min > ed * 2) &&
+      (ed < 2 || author1[0] == author2[0]))
     end
     def self.compare_years(years1, years2)
       return 0 if years1 == [] && years2 == []
-      return (years1[0].to_i - years2[0].to_i).abs if years1.size == 1 && years2.size == 1
+      if years1.size == 1 && years2.size == 1
+        return (years1[0].to_i - years2[0].to_i).abs
+      end
       nil
     end
   end

data/lib/taxamatch_rb/normalizer.rb CHANGED

@@ -1,16 +1,16 @@
 # encoding: UTF-8
 module Taxamatch
   module Normalizer
     def self.normalize(string)
       utf8_to_ascii(string.strip.upcase).gsub(/[^\x00-\x7F]/,'?')
     end
     def self.normalize_word(word)
       self.normalize(word).gsub(/[^A-Z0-9\-]/, '').strip
     end
     def self.normalize_author(string)
       self.normalize(string).gsub(/[^A-Z]/, ' ').gsub(/[\s]{2,}/, ' ').strip
     end
@@ -20,7 +20,7 @@ module Taxamatch
       year_int = nil unless year_int.between?(1757, Time.now.year + 1)
       year_int
     end
   private
     def self.utf8_to_ascii(string)

data/lib/taxamatch_rb/phonetizer.rb CHANGED

@@ -2,11 +2,11 @@
 module Taxamatch
   module Phonetizer
     def self.phonetize(a_word, normalize_ending = false)
       self.near_match(a_word, normalize_ending)
     end
     def self.near_match(a_word, normalize_ending = false)
       a_word = a_word.strip rescue ''
       return '' if a_word == ''
@@ -50,7 +50,7 @@ module Taxamatch
           a_word = 'Z' + a_word[1..-1]
       end
       first_char = a_word.split('')[0]
-      rest_chars = a_word.split('')[1..-1].join('')
+      rest_chars = a_word.split('')[1..-1].join('')
       rest_chars.gsub!('AE', 'I')
       rest_chars.gsub!('IA', 'A')
       rest_chars.gsub!('OE', 'I')
@@ -59,21 +59,22 @@ module Taxamatch
       rest_chars.gsub!('H', '')
       rest_chars.tr!('EOUYKZ', 'IAIICS')
       a_word = (first_char + rest_chars).squeeze
       if normalize_ending && a_word.size > 4
         a_word = self.normalize_ending(a_word)
       end
       a_word
     end
     def self.normalize_ending(a_word)
-        # -- deal with variant endings -is (includes -us, -ys, -es), -im (was -um), -as (-os)
+        # -- deal with variant endings
+        # -is (includes -us, -ys, -es), -im (was -um), -as (-os)
         # -- at the end of a string translate all to -a
         a_word.gsub!(/IS$/, 'A')
         a_word.gsub!(/IM$/, 'A')
         a_word.gsub(/AS$/, 'A')
     end
   end
-end
+end

data/spec/taxamatch_rb_spec.rb CHANGED

@@ -7,25 +7,81 @@ describe 'Atomizer' do
   end
   it 'should parse uninomials' do
-    @parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :canonical_form=>"Betula", :uninomial=>{:string=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[], :normalized_authors=>[]}}
-    @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>[1872], :canonical_form=>"Aerenea", :uninomial=>{:string=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>[1872], :normalized_authors=>["LACORDAIRE"]}}
+    @parser.parse('Betula').should == { :all_authors => [], :all_years => [],
+      :canonical_form => "Betula", :uninomial => { :string => "Betula",
+      :normalized => 'BETULA', :phonetized => "BITILA", :authors => [],
+      :years => [], :normalized_authors => [] } }
+    @parser.parse('Ærenea Lacordaire, 1872').should == {
+      :all_authors => ["LACORDAIRE"], :all_years => [1872],
+      :canonical_form => "Aerenea", :uninomial => { :string => "Aerenea",
+        :normalized => "AERENEA", :phonetized => "ERINIA",
+        :authors => ["Lacordaire"], :years => [1872],
+        :normalized_authors => ["LACORDAIRE"] } }
   end
   it 'should parse binomials' do
-    @parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>[1913], :canonical_form=>"Leoeptura laetifica", :genus=>{:string=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>[1913], :normalized_authors=>["DOW"]}}
+    @parser.parse('Leœptura laetifica Dow, 1913').should == {
+      :all_authors => ["DOW"], :all_years => [1913],
+      :canonical_form => "Leoeptura laetifica", :genus => {
+      :string => "Leoeptura", :normalized => "LEOEPTURA",
+      :phonetized => "LIPTIRA", :authors => [], :years => [],
+      :normalized_authors => []}, :species => {
+      :string => "laetifica", :normalized => "LAETIFICA",
+      :phonetized => "LITIFICA", :authors => ["Dow"],
+      :years => [1913], :normalized_authors => ["DOW"] } }
   end
   it 'should parse trinomials' do
-    @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should ==  {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>[1972], :canonical_form=>"Hydnellum scrobiculatum zonatum", :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[], :normalized_authors=>[]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>[1972], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
+    @parser.parse('Hydnellum scrobiculatum zonatum ' +
+                  '(Banker) D. Hall et D.E. Stuntz 1972').should ==  {
+      :all_authors => ["BANKER", "D HALL", "D E STUNTZ"], :all_years => [1972],
+      :canonical_form => "Hydnellum scrobiculatum zonatum", :genus=>{
+      :string => "Hydnellum", :normalized => "HYDNELLUM",
+      :phonetized => "HIDNILIM", :authors => [], :years => [],
+      :normalized_authors => [] }, :species => { :string => "scrobiculatum",
+      :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
+      :authors => [], :years => [], :normalized_authors => [] },
+      :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
+      :phonetized => "ZANATA", :authors => ["Banker", "D. Hall", "D.E. Stuntz"],
+      :years => [1972], :normalized_authors => ["BANKER", "D HALL",
+      "D E STUNTZ"] }] }
   end
   it 'should normalize years to integers' do
     future_year = Time.now.year + 10
-    @parser.parse("Hydnellum scrobiculatum Kern #{future_year} zonatum (Banker) D. Hall et D.E. Stuntz 1972?").should == {:all_authors=>["KERN", "BANKER", "D HALL", "D E STUNTZ"], :all_years=>[1972], :canonical_form=>"Hydnellum scrobiculatum zonatum", :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>["Kern"], :years=>[], :normalized_authors=>["KERN"]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>[1972], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
+    @parser.parse("Hydnellum scrobiculatum Kern #{future_year} " +
+                  "zonatum (Banker) D. Hall et D.E. Stuntz 1972?").should == {
+      :all_authors => ["KERN", "BANKER", "D HALL", "D E STUNTZ"],
+      :all_years => [1972],
+      :canonical_form => "Hydnellum scrobiculatum zonatum", :genus => {
+      :string => "Hydnellum", :normalized => "HYDNELLUM",
+      :phonetized => "HIDNILIM", :authors => [], :years => [],
+      :normalized_authors => [] }, :species => { :string => "scrobiculatum",
+      :normalized => "SCROBICULATUM", :phonetized => "SCRABICILATA",
+      :authors => ["Kern"], :years => [], :normalized_authors => ["KERN"] },
+      :infraspecies => [{ :string => "zonatum", :normalized => "ZONATUM",
+      :phonetized => "ZANATA", :authors =>
+      ["Banker", "D. Hall", "D.E. Stuntz"], :years => [1972],
+      :normalized_authors => ["BANKER", "D HALL", "D E STUNTZ"] }] }
   end
   it 'should normalize names with abbreviated genus after cf.' do
-    @parser.parse('Unio cf. U. alba').should == {:all_authors=>[], :all_years=>[], :canonical_form=>"Unio", :genus=>{:string=>"Unio", :normalized=>"UNIO", :phonetized=>"UNIA", :authors=>[], :years=>[], :normalized_authors=>[]}}
+    @parser.parse('Unio cf. U. alba').should == { :all_authors => [],
+      :all_years => [], :canonical_form => "Unio",
+      :genus => { :string => "Unio", :normalized => "UNIO",
+      :phonetized => "UNIA", :authors => [], :years => [],
+      :normalized_authors => [] } }
+  end
+  it 'should parse names which broke it before' do
+    ['Parus caeruleus species complex',
+     'Euxoa nr. idahoensis sp. 1clay',
+     'Cetraria islandica ? islandica',
+     'Buteo borealis ? ventralis'].each do |n|
+      res = @parser.parse(n)
+      res.class.should == Hash
+      res.empty?.should be_false
+    end
   end
 end
@@ -38,12 +94,14 @@ describe 'Taxamatch::Normalizer' do
     Taxamatch::Normalizer.normalize('Fallén').should == 'FALLEN'
     Taxamatch::Normalizer.normalize('Fallé€n').should == 'FALLE?N'
     Taxamatch::Normalizer.normalize('Fallén привет').should == 'FALLEN ??????'
-    Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should == 'CHORIOZOPELLA TRAGARDHI'
+    Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should ==
+      'CHORIOZOPELLA TRAGARDHI'
     Taxamatch::Normalizer.normalize('×Zygomena').should == 'xZYGOMENA'
   end
   it 'should normalize words' do
-    Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should == 'L-3EOEPTURA'
+    Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should ==
+      'L-3EOEPTURA'
   end
 end
@@ -53,7 +111,8 @@ describe 'Taxamatch::Base' do
   end
   it 'should get txt tests' do
-    read_test_file(File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt', 4) do |y|
+    test_file = File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt'
+    read_test_file(test_file, 4) do |y|
       if y
         y[2] = y[2] == 'true' ? true : false
         res = @tm.taxamatch(y[0], y[1], false)
@@ -65,127 +124,169 @@ describe 'Taxamatch::Base' do
   end
   it 'should work with names that cannot be parsed' do
-    res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921','Quadraspidiotus ostreaeformis Curtis)')
+    res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921',
+                        'Quadraspidiotus ostreaeformis Curtis)')
     res = false
   end
   it 'should compare genera' do
-    #edit distance 1 always match
+    # edit distance 1 always match
     g1 = make_taxamatch_hash 'Plantago'
     g2 = make_taxamatch_hash 'Plantagon'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
-    #edit_distance above threshold does not math
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
+      'edit_distance' => 1, 'match' => true }
+    # edit_distance above threshold does not math
     g1 = make_taxamatch_hash 'Plantago'
     g2 = make_taxamatch_hash 'This shouldnt match'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
-    #phonetic_match matches
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
+      'match' => false, 'edit_distance' => 4 }
+    # phonetic_match matches
     g1 = make_taxamatch_hash 'Plantagi'
     g2 = make_taxamatch_hash 'Plantagy'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 1, 'match' => true}
-    @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
-    #distance 1 in first letter also matches
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
+      'edit_distance' => 1, 'match' => true }
+    @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
+      'phonetic_match' => false, 'edit_distance' => 1, 'match' => true }
+    # distance 1 in first letter also matches
     g1 = make_taxamatch_hash 'Xantheri'
     g2 = make_taxamatch_hash 'Pantheri'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
-    #phonetic match tramps everything
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
+      'edit_distance' => 1, 'match' => true }
+    # phonetic match tramps everything
     g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
     g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 4, 'match' => true}
-    @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 4, 'match' => false}
-    #same first letter and distance 2 should match
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => true,
+      'edit_distance' => 4, 'match' => true }
+    @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {
+      'phonetic_match' => false, 'edit_distance' => 4, 'match' => false }
+    # same first letter and distance 2 should match
     g1 = make_taxamatch_hash 'Xaaaantherii'
     g2 = make_taxamatch_hash 'Xaaaantherrr'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
-    #First letter is the same and distance is 3 should match, no phonetic match
+    @tm.match_genera(g1, g2).should == { 'phonetic_match' => false,
+      'match' => true, 'edit_distance' => 2 }
+    # First letter is the same and distance is 3 should match, no phonetic match
     g1 = make_taxamatch_hash 'Xaaaaaaaaaaantheriii'
     g2 = make_taxamatch_hash 'Xaaaaaaaaaaantherrrr'
-    @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
-    #Should not match if one of words is shorter than 2x edit distance and distance is 2 or 3
+    @tm.match_genera(g1, g2).should ==
+      { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 3 }
+    # Should not match if one of words is shorter than 2x edit
+    # distance and distance is 2 or 3
     g1 = make_taxamatch_hash 'Xant'
     g2 = make_taxamatch_hash 'Xanthe'
-    @tm.match_genera(g1, g2).should ==  {'phonetic_match' => false, 'match' => false, 'edit_distance' => 2}
-    #Should not match if edit distance > 3 and no phonetic match
+    @tm.match_genera(g1, g2).should ==  { 'phonetic_match' => false,
+      'match' => false, 'edit_distance' => 2 }
+    # Should not match if edit distance > 3 and no phonetic match
     g1 = make_taxamatch_hash 'Xantheriiii'
     g2 = make_taxamatch_hash 'Xantherrrrr'
-    @tm.match_genera(g1, g2).should ==  {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
+    @tm.match_genera(g1, g2).should ==  { 'phonetic_match' => false,
+      'match' => false, 'edit_distance' => 4 }
   end
   it 'should compare species' do
-    #Exact match
+    # Exact match
     s1 = make_taxamatch_hash 'major'
     s2 = make_taxamatch_hash 'major'
-    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 0}
-    @tm.match_species(s1, s2, :with_phonetic_match => false).should ==  {'phonetic_match' => false, 'match' => true, 'edit_distance' => 0}
-    #Phonetic match always works
+    @tm.match_species(s1, s2).should ==  { 'phonetic_match' => true,
+      'match' => true, 'edit_distance' => 0 }
+    @tm.match_species(s1, s2, :with_phonetic_match => false).should == {
+      'phonetic_match' => false, 'match' => true, 'edit_distance' => 0 }
+    # Phonetic match always works
     s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
     s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
-    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 4}
-    @tm.match_species(s1, s2, :with_phonetic_match => false).should ==  {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
-    #Phonetic match works with different endings
+    @tm.match_species(s1, s2).should ==  { 'phonetic_match' => true,
+      'match' => true, 'edit_distance' => 4 }
+    @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
+      { 'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
+    # Phonetic match works with different endings
     s1 = make_taxamatch_hash 'majorum'
     s2 = make_taxamatch_hash 'majoris'
-    @tm.match_species(s1, s2).should ==  {'phonetic_match' => true, 'match' => true, 'edit_distance' => 2}
-    @tm.match_species(s1, s2, :with_phonetic_match => false).should ==  {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
-    #Distance 4 matches if first 3 chars are the same
+    @tm.match_species(s1, s2).should ==  {
+      'phonetic_match' => true, 'match' => true, 'edit_distance' => 2 }
+    @tm.match_species(s1, s2, :with_phonetic_match => false).should ==
+      { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 2 }
+    # Distance 4 matches if first 3 chars are the same
     s1 = make_taxamatch_hash 'majjjjorrrrr'
     s2 = make_taxamatch_hash 'majjjjoraaaa'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 4}
-    #Should not match if Distance 4 matches and first 3 chars are not the same
+    @tm.match_species(s1, s2).should ==
+      { 'phonetic_match' => false, 'match' => true, 'edit_distance' => 4 }
+    # Should not match if Distance 4 matches and first 3 chars are not the same
     s1 = make_taxamatch_hash 'majorrrrr'
     s2 = make_taxamatch_hash 'marorraaa'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
-    #Distance 2 or 3 matches if first 1 char is the same
+    @tm.match_species(s1, s2).should == {
+      'phonetic_match' => false, 'match' => false, 'edit_distance' => 4 }
+    # Distance 2 or 3 matches if first 1 char is the same
     s1 = make_taxamatch_hash 'moooorrrr'
     s2 = make_taxamatch_hash 'mooooraaa'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
-    #Should not match if Distance 2 or 3 and first 1 char is not the same
+    @tm.match_species(s1, s2).should == { 'phonetic_match' => false,
+      'match' => true, 'edit_distance' => 3 }
+    # Should not match if Distance 2 or 3 and first 1 char is not the same
     s1 = make_taxamatch_hash 'morrrr'
     s2 = make_taxamatch_hash 'torraa'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
-    #Distance 1 will match anywhere
+    @tm.match_species(s1, s2).should == {
+      'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
+    # Distance 1 will match anywhere
     s1 = make_taxamatch_hash 'major'
     s2 = make_taxamatch_hash 'rajor'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
-    #Will not match if distance 3 and length is less then twice of the edit distance
+    @tm.match_species(s1, s2).should == {
+      'phonetic_match' => false, 'match' => true, 'edit_distance' => 1 }
+    # Will not match if distance 3 and length is less then twice
+    # of the edit distance
     s1 = make_taxamatch_hash 'marrr'
     s2 = make_taxamatch_hash 'maaaa'
-    @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
+    @tm.match_species(s1, s2).should == {
+      'phonetic_match' => false, 'match' => false, 'edit_distance' => 3 }
   end
   it 'should match matches' do
-    #No trobule case
-    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match' => true, 'edit_distance' => 2, 'match' => true}
-    #Will not match if either genus or sp. epithet dont match
-    gmatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
-    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
-    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
-    #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
-    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
-    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match' => true, 'edit_distance' => 5, 'match' => false}
-    #Should not have phonetic match if one of the components does not match phonetically
-    gmatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
-    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>true}
-    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
-    smatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>true}
-    #edit distance should be equal the sum of of edit distances
-    gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
-    smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
-    @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>true, 'edit_distance'=>4, 'match'=>true}
+    # No trobule case
+    gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
+    smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
+    @tm.match_matches(gmatch, smatch).should ==
+      { 'phonetic_match' => true, 'edit_distance' => 2, 'match' => true }
+    # Will not match if either genus or sp. epithet dont match
+    gmatch = { 'match' => false,
+      'phonetic_match' => false, 'edit_distance' => 1 }
+    smatch = { 'match' => true,
+      'phonetic_match' => true, 'edit_distance' => 1 }
+    @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
+      'edit_distance' => 2, 'match' => false }
+    gmatch = { 'match' => true, 'phonetic_match' => true,
+      'edit_distance' => 1 }
+    smatch = { 'match' => false, 'phonetic_match' => false,
+      'edit_distance' => 1 }
+    @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
+      'edit_distance' => 2, 'match' => false }
+    # Should not match if binomial edit distance > 4
+    # NOTE: EVEN with full phonetic match
+    gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 3 }
+    smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
+    @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => true,
+      'edit_distance' => 5, 'match' => false }
+    # Should not have phonetic match if one of the components
+    # does not match phonetically
+    gmatch = { 'match' => true,
+      'phonetic_match' => false, 'edit_distance' => 1 }
+    smatch = { 'match' => true,
+      'phonetic_match' => true, 'edit_distance' => 1 }
+    @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
+      'edit_distance' => 2, 'match' => true }
+    gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 1 }
+    smatch = { 'match' => true,
+      'phonetic_match' => false, 'edit_distance' => 1 }
+    @tm.match_matches(gmatch, smatch).should == { 'phonetic_match' => false,
+      'edit_distance' => 2, 'match' => true }
+    # edit distance should be equal the sum of of edit distances
+    gmatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
+    smatch = { 'match' => true, 'phonetic_match' => true, 'edit_distance' => 2 }
+    @tm.match_matches(gmatch, smatch).should == {
+      'phonetic_match'=>true, 'edit_distance'=>4, 'match'=>true }
   end
   it 'should return only boolean values' do
     @tm.taxamatch("AJLJljljlj", "sls").should_not be_nil
     @tm.taxamatch('Olsl','a')
   end
   it "should not match authors from different parts of name" do
     parser = Taxamatch::Atomizer.new
     t = Taxamatch::Base.new
@@ -199,11 +300,11 @@ describe 'Taxamatch::Base' do
     n8 = parser.parse "Betula alba Linnaeus alba Smith"
     n9 = parser.parse "Betula alba Smith alba L."
     n10 = parser.parse "Betula Linn."
-    #if one authorship is empty, return 0
+    # if one authorship is empty, return 0
     t.match_authors(n1, n5).should == 0
     t.match_authors(n5, n1).should == 0
     t.match_authors(n5, n6).should == 0
-    #if authorship matches on different levels ignore
+    # if authorship matches on different levels ignore
     t.match_authors(n7, n3).should == 0
     t.match_authors(n8, n3).should == -1
     t.match_authors(n2, n8).should == 0
@@ -227,29 +328,37 @@ describe 'Taxamatch::Base' do
       res.should == 90
       res = @am.authmatch(['Linnaeus'],['Kurtz'], [], [])
       res.should == 0
-      #found all authors, same year
-      res = @am.authmatch(['Linnaeus', 'Muller'], ['Muller', 'Linnaeus'], [1766], [1766])
+      # found all authors, same year
+      res = @am.authmatch(['Linnaeus', 'Muller'],
+                          ['Muller', 'Linnaeus'], [1766], [1766])
       res.should == 100
-      #all authors, 1 year diff
-      res = @am.authmatch(['Linnaeus', 'Muller'], ['Muller', 'Linnaeus'], [1767], [1766])
+      # all authors, 1 year diff
+      res = @am.authmatch(['Linnaeus', 'Muller'],
+                          ['Muller', 'Linnaeus'], [1767], [1766])
       res.should == 54
-      #year is not counted in
-      res = @am.authmatch(['Linnaeus', 'Muller'], ['Muller', 'Linnaeus'], [1767], [])
+      # year is not counted in
+      res = @am.authmatch(['Linnaeus', 'Muller'],
+                          ['Muller', 'Linnaeus'], [1767], [])
       res.should == 94
-      #found all authors on one side, same year
-      res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'], ['Muller', 'Linnaeus'], [1767], [1767])
+      # found all authors on one side, same year
+      res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
+                          ['Muller', 'Linnaeus'], [1767], [1767])
       res.should == 91
-      #found all authors on one side, 1 year diff
-      res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'], ['Muller', 'Linnaeus'], [1766], [1767])
+      # found all authors on one side, 1 year diff
+      res = @am.authmatch(['Linnaeus', 'Muller', 'Kurtz'],
+                          ['Muller', 'Linnaeus'], [1766], [1767])
       res.should == 51
-      #found all authors on one side, year does not count
-      res = @am.authmatch(['Linnaeus', 'Muller'], ['Muller', 'Linnaeus', 'Kurtz'], [1766], [])
+      # found all authors on one side, year does not count
+      res = @am.authmatch(['Linnaeus', 'Muller'],
+                          ['Muller', 'Linnaeus', 'Kurtz'], [1766], [])
       res.should == 90
-      #found some authors
-      res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'], ['Muller', 'Kurtz', 'Stepanov'], [1766], [])
+      # found some authors
+      res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
+                          ['Muller', 'Kurtz', 'Stepanov'], [1766], [])
       res.should == 67
-      #if year does not match or not present no match for previous case
-      res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'], ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
+      # if year does not match or not present no match for previous case
+      res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'],
+                          ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
       res.should == 0
     end
@@ -261,22 +370,29 @@ describe 'Taxamatch::Base' do
     end
     it 'should remove duplicate authors' do
-      #Li submatches Linnaeus and it its size 3 is big enought to remove Linnaeus
-      #Muller is identical
-      res = @am.remove_duplicate_authors(['Lin', 'Muller'], ['Linnaeus', 'Muller'])
+      # Li submatches Linnaeus and it its size 3 is big enought to remove
+      # Linnaeus Muller is identical
+      res = @am.remove_duplicate_authors(['Lin', 'Muller'],
+                                         ['Linnaeus', 'Muller'])
       res.should == [[], []]
-      #same in different order
-      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'], ['Linn', 'Muller'])
+      # same in different order
+      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
+                                         ['Linn', 'Muller'])
       res.should == [[], []]
-      #auth Li submatches Linnaeus, but Li size less then 3 required to remove Linnaeus
-      res = @am.remove_duplicate_authors(['Dem', 'Li'], ['Linnaeus', 'Stepanov'])
+      # auth Li submatches Linnaeus, but Li size less then 3
+      # required to remove Linnaeus
+      res = @am.remove_duplicate_authors(['Dem', 'Li'],
+                                         ['Linnaeus', 'Stepanov'])
       res.should == [["Dem"], ["Linnaeus", "Stepanov"]]
-      #fuzzy match
-      res = @am.remove_duplicate_authors(['Dem', 'Lennaeus'], ['Linnaeus', 'Stepanov'])
+      # fuzzy match
+      res = @am.remove_duplicate_authors(['Dem', 'Lennaeus'],
+                                         ['Linnaeus', 'Stepanov'])
       res.should == [["Dem"], ["Stepanov"]]
-      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'], ['L', 'Kenn'])
+      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
+                                         ['L', 'Kenn'])
       res.should == [['Linnaeus', 'Muller'], ['Kenn']]
-      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'], ['Muller', 'Linnaeus', 'Kurtz'])
+      res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'],
+                                         ['Muller', 'Linnaeus', 'Kurtz'])
       res.should == [[],['Kurtz']]
     end
@@ -288,5 +404,3 @@ describe 'Taxamatch::Base' do
   end
 end