RubyGems - dimus-taxamatch_rb - Versions diffs - 0.1.2 → 0.1.4 - Mend

dimus-taxamatch_rb 0.1.2 → 0.1.4

Files changed (18) hide show

data/lib/taxamatch_rb.rb +12 -2
data/lib/taxamatch_rb/authmatch.rb +86 -0
data/lib/taxamatch_rb/normalizer.rb +1 -1
data/lib/taxamatch_rb/parser.rb +2 -2
data/spec/taxamatch_rb_spec.rb +19 -5
metadata +4 -15
data/.document +0 -5
data/.gitignore +0 -6
data/Rakefile +0 -50
data/VERSION +0 -1
data/features/step_definitions/common_steps.rb +0 -163
data/features/step_definitions/taxamatch_rb.rb +0 -92
data/features/support/common.rb +0 -29
data/features/support/env.rb +0 -14
data/features/support/matchers.rb +0 -11
data/features/taxamatch_rb.feature +0 -33
data/lib/taxamatch_rb/authormatch.rb +0 -0
data/taxamatch_rb.gemspec +0 -67

data/lib/taxamatch_rb.rb CHANGED Viewed

@@ -5,11 +5,12 @@ require 'taxamatch_rb/damerau_levenshtein_mod'
 require 'taxamatch_rb/parser'
 require 'taxamatch_rb/normalizer'
 require 'taxamatch_rb/phonetizer'
+require 'taxamatch_rb/authmatch'
 class Taxamatch
   def initialize
-    @parser = Parser.new
+    @parser = TaxamatchParser.new
     @dlm = DamerauLevenshteinMod.new
   end
@@ -24,7 +25,7 @@ class Taxamatch
   #takes two hashes of parsed scientific names, analyses them and returns back
   #this function is useful when species strings are preparsed.
   def taxamatch_parsed_data(parsed_data_1, parsed_data_2)
-    return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:unicode] && parsed_data_2[:unicode]
+    return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:uninomial] && parsed_data_2[:uninomial]
     return match_multinomial(parsed_data_1, parsed_data_2) if parsed_data_1[:genus] && parsed_data_2[:genus]
     return false
   end
@@ -36,6 +37,7 @@ class Taxamatch
   def match_multinomial(parsed_data_1, parsed_data_2)
     gen_match = match_genera(parsed_data_1[:genus], parsed_data_2[:genus])
     sp_match = match_species(parsed_data_1[:species], parsed_data_2[:species])
+    au_match = match_authors(parsed_data_1, parsed_data_2)
     total_length = parsed_data_1[:genus][:epitheton].size + parsed_data_2[:genus][:epitheton].size + parsed_data_1[:species][:epitheton].size + parsed_data_2[:species][:epitheton].size
     match = match_matches(gen_match, sp_match)
     match.merge({:score => (1- match[:edit_distance]/(total_length/2))})
@@ -65,6 +67,14 @@ class Taxamatch
     {:edit_distance => ed, :match => match, :phonetic_match => false}
   end
+  def match_authors(parsed_data_1, parsed_data_2)
+    au1 = parsed_data_1[:all_authors]
+    au2 = parsed_data_2[:all_authors]
+    yr1 = parsed_data_1[:all_years]
+    yr2 = parsed_data_2[:all_years]
+    #Authormatch.compare_authorities(au1, au2, yr1, yr2)
+  end
   def match_matches(genus_match, species_match, infraspecies_matches = [])
     match = species_match
     match[:edit_distance] += genus_match[:edit_distance]

data/lib/taxamatch_rb/authmatch.rb ADDED Viewed

@@ -0,0 +1,86 @@
+class Authmatch
+  def self.authmatch(authors1, authors2, years1, years2)
+    return true
+    unique_authors1, unique_authors2 = remove_duplicate_authors(authors1, authors2)
+    year_difference = compare_years(years1, years2)
+    #return get_score_author_comparison(authors1, unique_authors1, authors2, unique_authors2, year_difference, 50, true);
+  end
+  def self.remove_duplicate_authors(author1, authors2)
+    au1_match = au2_match = false
+    au1_match.each do |au1|
+      match1 = false
+      au1_match.each do |au2|
+        match2 = false
+        if au1 == au2
+          match1 = match2 = true
+        elsif au1.size < au2.size
+          match1 = true if au1 == au2[0..au1.size]
+        elseif
+        end
+      end
+    end
+  end
+  def self.compare_years(years1, years2)
+    return 0 if years1 == [] && years2 == []
+    return (years1[0] - years2[0]).abs if years1.size == 1 && years2.size == 1
+    nil
+  end
+end
+=begin
+		foreach($author_words1 as $key1 => $author1)
+		{
+			$author1_matches = false;
+			$author1 = Normalize::normalize_author_string($author1);
+			foreach($author_words2 as $key2 => $author2)
+				{
+				$author2_matches = false;
+				$author2 = Normalize::normalize_author_string($author2);
+				if($author1 == $author2)
+				{
+					$author1_matches = true;
+					$author2_matches = true;
+				}elseif(preg_match("/^".preg_quote($author1, "/")."/i", $author2))
+				{
+					$author1_matches = true;
+				}elseif(preg_match("/^".preg_quote($author2, "/")."/i", $author1))
+				{
+					$author2_matches = true;
+				}
+				// equal or one is contained in the other, so consider it a match for both terms
+				if((strlen($author1)>=3 && $author1_matches) || (strlen($author2)>=3 && $author2_matches) || $author1 == $author2)
+				{
+					unset($unique_authors1[$key1]);
+					unset($unique_authors2[$key2]);
+				}elseif($author1_matches)
+				{
+					// author1 was abbreviation of author2
+					unset($unique_authors1[$key1]);
+				}elseif($author2_matches)
+				{
+				// author1 was abbreviation of author2
+					unset($unique_authors2[$key2]);
+				}else
+				{
+					// no match or abbreviation so try a fuzzy match
+					$max_length = max(strlen($author1), strlen($author2));
+					$lev = levenshtein($author1, $author2);
+					if(($lev/$max_length) <= .167)
+					{
+						unset($unique_authors1[$key1]);
+						unset($unique_authors2[$key2]);
+					}
+			}
+		}
+		reset($author_words2);
+	}
+=end

data/lib/taxamatch_rb/normalizer.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Normalizer
   end
   def self.normalize_word(word)
-    self.normalize(word).gsub(/[^A-Z0-9\.\-]/, '')
+    self.normalize(word).gsub(/[^A-Z0-9\-]/, '')
   end
 protected

data/lib/taxamatch_rb/parser.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # encoding: UTF-8
 require 'biodiversity'
-class Parser
+class TaxamatchParser
   def initialize
     @parser = ScientificNameParser.new
     @parsed_raw = nil
@@ -28,7 +28,7 @@ protected
     process_node(:genus, d['genus'])
     process_node(:species, d['species'], true)
     process_infraspecies(d['infraspecies'])
-    @res[:all_authors].uniq!
+    @res[:all_authors] = @res[:all_authors].uniq.map {|a| Normalizer.normalize(a)}
     @res[:all_years].uniq!
     @res.keys.size > 2 ? @res : nil
   end

data/spec/taxamatch_rb_spec.rb CHANGED Viewed

@@ -16,21 +16,21 @@ end
 describe 'Parser' do
   before(:all) do
-    @parser = Parser.new
+    @parser =TaxamatchParser.new
   end
   it 'should parse uninomials' do
     @parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:epitheton=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[]}}
-    @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["Lacordaire"], :all_years=>["1872"], :uninomial=>{:epitheton=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"]}}
-    @parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["Lacordaire", "Muller"], :all_years=>["1872", "2007"], :uninomial=>{:epitheton=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire", "Muller"], :years=>["1872", "2007"]}}
+    @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872"]}}
+    @parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["LACORDAIRE", "MULLER"], :all_years=>["1872", "2007"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire", "Muller"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872", "2007"]}}
   end
   it 'should parse binomials' do
-    @parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["Dow"], :all_years=>["1913"], :genus=>{:epitheton=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[]}, :species=>{:epitheton=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"]}}
+    @parser.parse('Leœptura laetifica Dow, 1913').should == {:species=>{:epitheton=>"laetifica", :authors=>["Dow"], :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :years=>["1913"]}, :all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:epitheton=>"Leoeptura", :authors=>[], :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :years=>[]}}
   end
   it 'should parse trinomials' do
-    @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["Banker", "D. Hall", "D.E. Stuntz"], :all_years=>["1972"], :genus=>{:epitheton=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[]}, :species=>{:epitheton=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[]}, :infraspecies=>[{:epitheton=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"]}]}
+    @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:genus=>{:epitheton=>"Hydnellum", :authors=>[], :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :years=>[]}, :infraspecies=>[{:epitheton=>"zonatum", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :normalized=>"ZONATUM", :phonetized=>"ZANATA", :years=>["1972"]}], :all_authors=>["BANKER", "D. HALL", "D.E. STUNTZ"], :all_years=>["1972"], :species=>{:epitheton=>"scrobiculatum", :authors=>[], :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :years=>[]}}
   end
 end
@@ -174,6 +174,20 @@ describe 'Taxamatch' do
     smatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
     @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>4, :match=>true}
   end
+  describe 'Authmatch' do
+    before(:all) do
+      @am = Authmatch
+    end
+    it 'should compare years' do
+      @am.compare_years([1882],[1880]).should == 2
+      @am.compare_years([1882],[]).should == nil
+      @am.compare_years([],[]).should == 0
+      @am.compare_years([1788,1798], [1788,1798]).should be_nil
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: dimus-taxamatch_rb
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.4
 platform: ruby
 authors:
 - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-07-29 00:00:00 -07:00
+date: 2009-08-02 00:00:00 -07:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -42,20 +42,9 @@ extra_rdoc_files:
 - LICENSE
 - README.rdoc
 files:
-- .document
-- .gitignore
-- LICENSE
 - README.rdoc
-- Rakefile
-- VERSION
-- features/step_definitions/common_steps.rb
-- features/step_definitions/taxamatch_rb.rb
-- features/support/common.rb
-- features/support/env.rb
-- features/support/matchers.rb
-- features/taxamatch_rb.feature
 - lib/taxamatch_rb.rb
-- lib/taxamatch_rb/authormatch.rb
+- lib/taxamatch_rb/authmatch.rb
 - lib/taxamatch_rb/damerau_levenshtein_mod.rb
 - lib/taxamatch_rb/normalizer.rb
 - lib/taxamatch_rb/parser.rb
@@ -65,7 +54,7 @@ files:
 - spec/spec_helper.rb
 - spec/taxamatch_rb_spec.rb
 - spec/taxamatch_test.txt
-- taxamatch_rb.gemspec
+- LICENSE
 has_rdoc: true
 homepage: http://github.com/dimus/taxamatch_rb
 licenses:

data/.document DELETED Viewed

@@ -1,5 +0,0 @@
-README.rdoc
-lib/**/*.rb
-bin/*
-features/**/*.feature
-LICENSE

data/.gitignore DELETED Viewed

@@ -1,6 +0,0 @@
-*.sw?
-.DS_Store
-coverage
-rdoc
-pkg
-tmp

data/Rakefile DELETED Viewed

@@ -1,50 +0,0 @@
-require 'rubygems'
-require 'rake'
-begin
-  require 'jeweler'
-  Jeweler::Tasks.new do |gem|
-    gem.name = "taxamatch_rb"
-    gem.summary = %Q{TODO}
-    gem.email = "dmozzherin@eol.org"
-    gem.homepage = "http://github.com/dimus/taxamatch_rb"
-    gem.authors = ["Dmitry Mozzherin"]
-    gem.add_dependency('RubyInline')
-    gem.add_dependency('dimus-biodiversity')
-    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
-  end
-rescue LoadError
-  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
-end
-require 'spec/rake/spectask'
-Spec::Rake::SpecTask.new(:spec) do |spec|
-  spec.libs << 'lib' << 'spec'
-  spec.spec_files = FileList['spec/**/*_spec.rb']
-end
-Spec::Rake::SpecTask.new(:rcov) do |spec|
-  spec.libs << 'lib' << 'spec'
-  spec.pattern = 'spec/**/*_spec.rb'
-  spec.rcov = true
-end
-task :default => :spec
-require 'rake/rdoctask'
-Rake::RDocTask.new do |rdoc|
-  if File.exist?('VERSION.yml')
-    config = YAML.load(File.read('VERSION.yml'))
-    version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
-  else
-    version = ""
-  end
-  rdoc.rdoc_dir = 'rdoc'
-  rdoc.title = "taxamatch_rb #{version}"
-  rdoc.rdoc_files.include('README*')
-  rdoc.rdoc_files.include('lib/**/*.rb')
-end

data/VERSION DELETED Viewed

	@@ -1 +0,0 @@
1	- 0.1.2

data/features/step_definitions/common_steps.rb DELETED Viewed

@@ -1,163 +0,0 @@
-Given /^this project is active project folder/ do
-  @active_project_folder = File.expand_path(File.dirname(__FILE__) + "/../..")
-end
-Given /^env variable \$([\w_]+) set to "(.*)"/ do |env_var, value|
-  ENV[env_var] = value
-end
-Given /"(.*)" folder is deleted/ do |folder|
-  in_project_folder { FileUtils.rm_rf folder }
-end
-When /^I invoke "(.*)" generator with arguments "(.*)"$/ do |generator, arguments|
-  @stdout = StringIO.new
-  in_project_folder do
-    if Object.const_defined?("APP_ROOT")
-      APP_ROOT.replace(FileUtils.pwd)
-    else
-      APP_ROOT = FileUtils.pwd
-    end
-    run_generator(generator, arguments.split(' '), SOURCES, :stdout => @stdout)
-  end
-  File.open(File.join(@tmp_root, "generator.out"), "w") do |f|
-    @stdout.rewind
-    f << @stdout.read
-  end
-end
-When /^I run executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
-  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
-  in_project_folder do
-    system "#{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
-  end
-end
-When /^I run project executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
-  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
-  in_project_folder do
-    system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
-  end
-end
-When /^I run local executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
-  @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
-  executable = File.expand_path(File.join(File.dirname(__FILE__), "/../../bin", executable))
-  in_project_folder do
-    system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
-  end
-end
-When /^I invoke task "rake (.*)"/ do |task|
-  @stdout = File.expand_path(File.join(@tmp_root, "tests.out"))
-  in_project_folder do
-    system "rake #{task} --trace > #{@stdout} 2> #{@stdout}"
-  end
-end
-Then /^folder "(.*)" (is|is not) created/ do |folder, is|
-  in_project_folder do
-    File.exists?(folder).should(is == 'is' ? be_true : be_false)
-  end
-end
-Then /^file "(.*)" (is|is not) created/ do |file, is|
-  in_project_folder do
-    File.exists?(file).should(is == 'is' ? be_true : be_false)
-  end
-end
-Then /^file with name matching "(.*)" is created/ do |pattern|
-  in_project_folder do
-    Dir[pattern].should_not be_empty
-  end
-end
-Then /^file "(.*)" contents (does|does not) match \/(.*)\// do |file, does, regex|
-  in_project_folder do
-    actual_output = File.read(file)
-    (does == 'does') ?
-      actual_output.should(match(/#{regex}/)) :
-      actual_output.should_not(match(/#{regex}/))
-  end
-end
-Then /gem file "(.*)" and generated file "(.*)" should be the same/ do |gem_file, project_file|
-  File.exists?(gem_file).should be_true
-  File.exists?(project_file).should be_true
-  gem_file_contents = File.read(File.dirname(__FILE__) + "/../../#{gem_file}")
-  project_file_contents = File.read(File.join(@active_project_folder, project_file))
-  project_file_contents.should == gem_file_contents
-end
-Then /^(does|does not) invoke generator "(.*)"$/ do |does_invoke, generator|
-  actual_output = File.read(@stdout)
-  does_invoke == "does" ?
-    actual_output.should(match(/dependency\s+#{generator}/)) :
-    actual_output.should_not(match(/dependency\s+#{generator}/))
-end
-Then /help options "(.*)" and "(.*)" are displayed/ do |opt1, opt2|
-  actual_output = File.read(@stdout)
-  actual_output.should match(/#{opt1}/)
-  actual_output.should match(/#{opt2}/)
-end
-Then /^I should see$/ do |text|
-  actual_output = File.read(@stdout)
-  actual_output.should contain(text)
-end
-Then /^I should not see$/ do |text|
-  actual_output = File.read(@stdout)
-  actual_output.should_not contain(text)
-end
-Then /^I should see exactly$/ do |text|
-  actual_output = File.read(@stdout)
-  actual_output.should == text
-end
-Then /^I should see all (\d+) tests pass/ do |expected_test_count|
-  expected = %r{^#{expected_test_count} tests, \d+ assertions, 0 failures, 0 errors}
-  actual_output = File.read(@stdout)
-  actual_output.should match(expected)
-end
-Then /^I should see all (\d+) examples pass/ do |expected_test_count|
-  expected = %r{^#{expected_test_count} examples?, 0 failures}
-  actual_output = File.read(@stdout)
-  actual_output.should match(expected)
-end
-Then /^yaml file "(.*)" contains (\{.*\})/ do |file, yaml|
-  in_project_folder do
-    yaml = eval yaml
-    YAML.load(File.read(file)).should == yaml
-  end
-end
-Then /^Rakefile can display tasks successfully/ do
-  @stdout = File.expand_path(File.join(@tmp_root, "rakefile.out"))
-  in_project_folder do
-    system "rake -T > #{@stdout} 2> #{@stdout}"
-  end
-  actual_output = File.read(@stdout)
-  actual_output.should match(/^rake\s+\w+\s+#\s.*/)
-end
-Then /^task "rake (.*)" is executed successfully/ do |task|
-  @stdout.should_not be_nil
-  actual_output = File.read(@stdout)
-  actual_output.should_not match(/^Don't know how to build task '#{task}'/)
-  actual_output.should_not match(/Error/i)
-end
-Then /^gem spec key "(.*)" contains \/(.*)\// do |key, regex|
-  in_project_folder do
-    gem_file = Dir["pkg/*.gem"].first
-    gem_spec = Gem::Specification.from_yaml(`gem spec #{gem_file}`)
-    spec_value = gem_spec.send(key.to_sym)
-    spec_value.to_s.should match(/#{regex}/)
-  end
-end

data/features/step_definitions/taxamatch_rb.rb DELETED Viewed

@@ -1,92 +0,0 @@
-str1 = str2 = block_size = max_distance = distance = dlm = nil
-###############
-#DAMERAU LEVENSHTEIN MOD
-###############
-Given /^strings "([^\"]*)" and "([^\"]*)", transposition block size "([^\"]*)", and a maximum allowed distance "([^\"]*)"$/ do |a,b,c,d|
-  str1 = a
-  str2 = b
-  block_size = c.to_i
-  max_distance = d.to_i
-end
-When /^I run "([^\"]*)" instance function "([^\"]*)"$/ do |arg1, arg2|
-  dl = eval(arg1 + ".new")
-  distance = dl.distance(str1, str2, block_size, max_distance)
-end
-Then /^I should receive edit distance "([^\"]*)"$/ do |arg1|
-  distance.should == arg1.to_i
-end
-#############
-#PARSER
-#############
-sci_name =  result = nil
-parser = Parser.new
-Given /^a name "([^\"]*)"$/ do |arg1|
-  sci_name = arg1
-end
-When /^I run a Parser function parse$/ do
-  result = parser.parse(sci_name)
-end
-Then /^I should receive "([^\"]*)" as genus epithet, "([^\"]*)" as species epithet, "([^\"]*)" and "([^\"]*)" as species authors, "([^\"]*)" as a species year$/ do |gen_val, sp_val, au_val1, au_val2, yr_val|
-  result[:genus][:epitheton].should == gen_val
-  result[:species][:epitheton].should == sp_val
-  result[:species][:authors].include?(au_val1).should be_true
-  result[:species][:authors].include?(au_val2).should be_true
-  result[:species][:years].include?(yr_val).should be_true
-  require 'pp'
-  print result
-end
-#############
-# NORMALIZER
-#############
-string = normalized_string = nil
-Given /^a string "([^\"]*)"$/ do |arg1|
-  string = arg1
-end
-When /^I run a Normalizer function normalize$/ do
-  normalized_string = Normalizer.normalize(string)
-end
-Then /^I should receive "([^\"]*)" as a normalized form of the string$/ do |arg1|
-  normalized_string.should == arg1
-end
-######
-# PHONETIZER
-#####
-word = phonetized_word = nil
-Given /^a word "([^\"]*)"$/ do |arg1|
-  word = arg1
-end
-When /^I run a Phonetizer function near_match$/ do
-  phonetized_word = Phonetizer.near_match(word)
-end
-Then /^I should receive "([^\"]*)" as a phonetic form of the word$/ do |arg1|
-  phonetized_word.should == arg1
-end
-When /^I run a Phonetizer function near_match with an option normalize_ending$/ do
-  phonetized_word = Phonetizer.near_match(word,true)
-end
-Then /^I should receive "([^\"]*)" as a normalized phonetic form of the word$/ do |arg1|
-  phonetized_word.should == arg1
-end

data/features/support/common.rb DELETED Viewed

@@ -1,29 +0,0 @@
-module CommonHelpers
-  def in_tmp_folder(&block)
-    FileUtils.chdir(@tmp_root, &block)
-  end
-  def in_project_folder(&block)
-    project_folder = @active_project_folder || @tmp_root
-    FileUtils.chdir(project_folder, &block)
-  end
-  def in_home_folder(&block)
-    FileUtils.chdir(@home_path, &block)
-  end
-  def force_local_lib_override(project_name = @project_name)
-    rakefile = File.read(File.join(project_name, 'Rakefile'))
-    File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
-      f << "$:.unshift('#{@lib_path}')\n"
-      f << rakefile
-    end
-  end
-  def setup_active_project_folder project_name
-    @active_project_folder = File.join(@tmp_root, project_name)
-    @project_name = project_name
-  end
-end
-World(CommonHelpers)

data/features/support/env.rb DELETED Viewed

@@ -1,14 +0,0 @@
-require File.dirname(__FILE__) + "/../../lib/taxamatch_rb"
-gem 'cucumber'
-require 'cucumber'
-gem 'rspec'
-require 'spec'
-Before do
-  @tmp_root = File.dirname(__FILE__) + "/../../tmp"
-  @home_path = File.expand_path(File.join(@tmp_root, "home"))
-  FileUtils.rm_rf   @tmp_root
-  FileUtils.mkdir_p @home_path
-  ENV['HOME'] = @home_path
-end

data/features/support/matchers.rb DELETED Viewed

@@ -1,11 +0,0 @@
-module Matchers
-  def contain(expected)
-    simple_matcher("contain #{expected.inspect}") do |given, matcher|
-      matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
-      matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
-      given.index expected
-    end
-  end
-end
-World(Matchers)

data/features/taxamatch_rb.feature DELETED Viewed

@@ -1,33 +0,0 @@
-Feature: Find if two scientific names are lexical variants of each other
-  As a Biodiversity Informatician
-  I want to be able to compare scientific names to determine if they are variants of the same name.
-  And I want to be able to combine names that are the same into lexical groups, so they appear together in names list
-  So I want to implement Tony Rees and Barbara Boehmer taxamatch algorithms http://bit.ly/boWyG
-  Scenario: find edit distance between two unicode (utf8) strings
-    Given strings "Sjostedt" and "Sojstedt", transposition block size "1", and a maximum allowed distance "4"
-    When I run "DamerauLevenshteinMod" instance function "distance"
-    Then I should receive edit distance "1"
-  Scenario: find parts of a name in unicode
-    Given a name "Arthopyrenia hyalospora (Banker) D. Hall 1988 hyalosporis Kutz 1999"
-    When I run a Parser function parse
-    Then I should receive "Arthopyrenia" as genus epithet, "hyalospora" as species epithet, "Banker" and "D. Hall" as species authors, "1988" as a species year
-  Scenario: normalize a string into ASCII upcase
-    Given a string "Choriozopella trägårdhi"
-    When I run a Normalizer function normalize
-    Then I should receive "CHORIOZOPELLA TRAGARDHI" as a normalized form of the string
-  Scenario: create phonetic version of a word
-    Given a word "bifasciata"
-    When I run a Phonetizer function near_match
-    Then I should receive "BIFASATA" as a phonetic form of the word
-  Scenario: create phonetic version of a species epithet normalizing ending
-    Given a word "bifasciatum"
-    When I run a Phonetizer function near_match with an option normalize_ending
-    Then I should receive "BIFASATA" as a normalized phonetic form of the word

data/lib/taxamatch_rb/authormatch.rb DELETED Viewed

File without changes

data/taxamatch_rb.gemspec DELETED Viewed

@@ -1,67 +0,0 @@
-# -*- encoding: utf-8 -*-
-Gem::Specification.new do |s|
-  s.name = %q{taxamatch_rb}
-  s.version = "0.1.2"
-  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
-  s.authors = ["Dmitry Mozzherin"]
-  s.date = %q{2009-07-29}
-  s.email = %q{dmozzherin@eol.org}
-  s.extra_rdoc_files = [
-    "LICENSE",
-     "README.rdoc"
-  ]
-  s.files = [
-    ".document",
-     ".gitignore",
-     "LICENSE",
-     "README.rdoc",
-     "Rakefile",
-     "VERSION",
-     "features/step_definitions/common_steps.rb",
-     "features/step_definitions/taxamatch_rb.rb",
-     "features/support/common.rb",
-     "features/support/env.rb",
-     "features/support/matchers.rb",
-     "features/taxamatch_rb.feature",
-     "lib/taxamatch_rb.rb",
-     "lib/taxamatch_rb/authormatch.rb",
-     "lib/taxamatch_rb/damerau_levenshtein_mod.rb",
-     "lib/taxamatch_rb/normalizer.rb",
-     "lib/taxamatch_rb/parser.rb",
-     "lib/taxamatch_rb/phonetizer.rb",
-     "spec/damerau_levenshtein_mod_test.txt",
-     "spec/spec.opts",
-     "spec/spec_helper.rb",
-     "spec/taxamatch_rb_spec.rb",
-     "spec/taxamatch_test.txt",
-     "taxamatch_rb.gemspec"
-  ]
-  s.has_rdoc = true
-  s.homepage = %q{http://github.com/dimus/taxamatch_rb}
-  s.rdoc_options = ["--charset=UTF-8"]
-  s.require_paths = ["lib"]
-  s.rubygems_version = %q{1.3.1}
-  s.summary = %q{TODO}
-  s.test_files = [
-    "spec/spec_helper.rb",
-     "spec/taxamatch_rb_spec.rb"
-  ]
-  if s.respond_to? :specification_version then
-    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
-    s.specification_version = 2
-    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<RubyInline>, [">= 0"])
-      s.add_runtime_dependency(%q<dimus-biodiversity>, [">= 0"])
-    else
-      s.add_dependency(%q<RubyInline>, [">= 0"])
-      s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
-    end
-  else
-    s.add_dependency(%q<RubyInline>, [">= 0"])
-    s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
-  end
-end