dimus-taxamatch_rb 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/taxamatch_rb.rb +12 -2
- data/lib/taxamatch_rb/authmatch.rb +86 -0
- data/lib/taxamatch_rb/normalizer.rb +1 -1
- data/lib/taxamatch_rb/parser.rb +2 -2
- data/spec/taxamatch_rb_spec.rb +19 -5
- metadata +4 -15
- data/.document +0 -5
- data/.gitignore +0 -6
- data/Rakefile +0 -50
- data/VERSION +0 -1
- data/features/step_definitions/common_steps.rb +0 -163
- data/features/step_definitions/taxamatch_rb.rb +0 -92
- data/features/support/common.rb +0 -29
- data/features/support/env.rb +0 -14
- data/features/support/matchers.rb +0 -11
- data/features/taxamatch_rb.feature +0 -33
- data/lib/taxamatch_rb/authormatch.rb +0 -0
- data/taxamatch_rb.gemspec +0 -67
data/lib/taxamatch_rb.rb
CHANGED
@@ -5,11 +5,12 @@ require 'taxamatch_rb/damerau_levenshtein_mod'
|
|
5
5
|
require 'taxamatch_rb/parser'
|
6
6
|
require 'taxamatch_rb/normalizer'
|
7
7
|
require 'taxamatch_rb/phonetizer'
|
8
|
+
require 'taxamatch_rb/authmatch'
|
8
9
|
|
9
10
|
class Taxamatch
|
10
11
|
|
11
12
|
def initialize
|
12
|
-
@parser =
|
13
|
+
@parser = TaxamatchParser.new
|
13
14
|
@dlm = DamerauLevenshteinMod.new
|
14
15
|
end
|
15
16
|
|
@@ -24,7 +25,7 @@ class Taxamatch
|
|
24
25
|
#takes two hashes of parsed scientific names, analyses them and returns back
|
25
26
|
#this function is useful when species strings are preparsed.
|
26
27
|
def taxamatch_parsed_data(parsed_data_1, parsed_data_2)
|
27
|
-
return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:
|
28
|
+
return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:uninomial] && parsed_data_2[:uninomial]
|
28
29
|
return match_multinomial(parsed_data_1, parsed_data_2) if parsed_data_1[:genus] && parsed_data_2[:genus]
|
29
30
|
return false
|
30
31
|
end
|
@@ -36,6 +37,7 @@ class Taxamatch
|
|
36
37
|
def match_multinomial(parsed_data_1, parsed_data_2)
|
37
38
|
gen_match = match_genera(parsed_data_1[:genus], parsed_data_2[:genus])
|
38
39
|
sp_match = match_species(parsed_data_1[:species], parsed_data_2[:species])
|
40
|
+
au_match = match_authors(parsed_data_1, parsed_data_2)
|
39
41
|
total_length = parsed_data_1[:genus][:epitheton].size + parsed_data_2[:genus][:epitheton].size + parsed_data_1[:species][:epitheton].size + parsed_data_2[:species][:epitheton].size
|
40
42
|
match = match_matches(gen_match, sp_match)
|
41
43
|
match.merge({:score => (1- match[:edit_distance]/(total_length/2))})
|
@@ -65,6 +67,14 @@ class Taxamatch
|
|
65
67
|
{:edit_distance => ed, :match => match, :phonetic_match => false}
|
66
68
|
end
|
67
69
|
|
70
|
+
def match_authors(parsed_data_1, parsed_data_2)
|
71
|
+
au1 = parsed_data_1[:all_authors]
|
72
|
+
au2 = parsed_data_2[:all_authors]
|
73
|
+
yr1 = parsed_data_1[:all_years]
|
74
|
+
yr2 = parsed_data_2[:all_years]
|
75
|
+
#Authormatch.compare_authorities(au1, au2, yr1, yr2)
|
76
|
+
end
|
77
|
+
|
68
78
|
def match_matches(genus_match, species_match, infraspecies_matches = [])
|
69
79
|
match = species_match
|
70
80
|
match[:edit_distance] += genus_match[:edit_distance]
|
@@ -0,0 +1,86 @@
|
|
1
|
+
class Authmatch
|
2
|
+
|
3
|
+
def self.authmatch(authors1, authors2, years1, years2)
|
4
|
+
return true
|
5
|
+
unique_authors1, unique_authors2 = remove_duplicate_authors(authors1, authors2)
|
6
|
+
year_difference = compare_years(years1, years2)
|
7
|
+
|
8
|
+
|
9
|
+
#return get_score_author_comparison(authors1, unique_authors1, authors2, unique_authors2, year_difference, 50, true);
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.remove_duplicate_authors(author1, authors2)
|
13
|
+
au1_match = au2_match = false
|
14
|
+
au1_match.each do |au1|
|
15
|
+
match1 = false
|
16
|
+
au1_match.each do |au2|
|
17
|
+
match2 = false
|
18
|
+
if au1 == au2
|
19
|
+
match1 = match2 = true
|
20
|
+
elsif au1.size < au2.size
|
21
|
+
match1 = true if au1 == au2[0..au1.size]
|
22
|
+
elseif
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.compare_years(years1, years2)
|
29
|
+
return 0 if years1 == [] && years2 == []
|
30
|
+
return (years1[0] - years2[0]).abs if years1.size == 1 && years2.size == 1
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
=begin
|
36
|
+
foreach($author_words1 as $key1 => $author1)
|
37
|
+
{
|
38
|
+
$author1_matches = false;
|
39
|
+
$author1 = Normalize::normalize_author_string($author1);
|
40
|
+
foreach($author_words2 as $key2 => $author2)
|
41
|
+
{
|
42
|
+
$author2_matches = false;
|
43
|
+
$author2 = Normalize::normalize_author_string($author2);
|
44
|
+
|
45
|
+
if($author1 == $author2)
|
46
|
+
{
|
47
|
+
$author1_matches = true;
|
48
|
+
$author2_matches = true;
|
49
|
+
}elseif(preg_match("/^".preg_quote($author1, "/")."/i", $author2))
|
50
|
+
{
|
51
|
+
$author1_matches = true;
|
52
|
+
}elseif(preg_match("/^".preg_quote($author2, "/")."/i", $author1))
|
53
|
+
{
|
54
|
+
$author2_matches = true;
|
55
|
+
}
|
56
|
+
|
57
|
+
// equal or one is contained in the other, so consider it a match for both terms
|
58
|
+
if((strlen($author1)>=3 && $author1_matches) || (strlen($author2)>=3 && $author2_matches) || $author1 == $author2)
|
59
|
+
{
|
60
|
+
unset($unique_authors1[$key1]);
|
61
|
+
unset($unique_authors2[$key2]);
|
62
|
+
}elseif($author1_matches)
|
63
|
+
{
|
64
|
+
// author1 was abbreviation of author2
|
65
|
+
unset($unique_authors1[$key1]);
|
66
|
+
}elseif($author2_matches)
|
67
|
+
{
|
68
|
+
// author1 was abbreviation of author2
|
69
|
+
unset($unique_authors2[$key2]);
|
70
|
+
}else
|
71
|
+
{
|
72
|
+
// no match or abbreviation so try a fuzzy match
|
73
|
+
$max_length = max(strlen($author1), strlen($author2));
|
74
|
+
$lev = levenshtein($author1, $author2);
|
75
|
+
if(($lev/$max_length) <= .167)
|
76
|
+
{
|
77
|
+
unset($unique_authors1[$key1]);
|
78
|
+
unset($unique_authors2[$key2]);
|
79
|
+
}
|
80
|
+
}
|
81
|
+
}
|
82
|
+
reset($author_words2);
|
83
|
+
}
|
84
|
+
|
85
|
+
|
86
|
+
=end
|
data/lib/taxamatch_rb/parser.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'biodiversity'
|
3
3
|
|
4
|
-
class
|
4
|
+
class TaxamatchParser
|
5
5
|
def initialize
|
6
6
|
@parser = ScientificNameParser.new
|
7
7
|
@parsed_raw = nil
|
@@ -28,7 +28,7 @@ protected
|
|
28
28
|
process_node(:genus, d['genus'])
|
29
29
|
process_node(:species, d['species'], true)
|
30
30
|
process_infraspecies(d['infraspecies'])
|
31
|
-
@res[:all_authors].uniq
|
31
|
+
@res[:all_authors] = @res[:all_authors].uniq.map {|a| Normalizer.normalize(a)}
|
32
32
|
@res[:all_years].uniq!
|
33
33
|
@res.keys.size > 2 ? @res : nil
|
34
34
|
end
|
data/spec/taxamatch_rb_spec.rb
CHANGED
@@ -16,21 +16,21 @@ end
|
|
16
16
|
|
17
17
|
describe 'Parser' do
|
18
18
|
before(:all) do
|
19
|
-
@parser =
|
19
|
+
@parser =TaxamatchParser.new
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'should parse uninomials' do
|
23
23
|
@parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:epitheton=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[]}}
|
24
|
-
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["
|
25
|
-
@parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["
|
24
|
+
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872"]}}
|
25
|
+
@parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["LACORDAIRE", "MULLER"], :all_years=>["1872", "2007"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire", "Muller"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872", "2007"]}}
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'should parse binomials' do
|
29
|
-
@parser.parse('Leœptura laetifica Dow, 1913').should == {:
|
29
|
+
@parser.parse('Leœptura laetifica Dow, 1913').should == {:species=>{:epitheton=>"laetifica", :authors=>["Dow"], :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :years=>["1913"]}, :all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:epitheton=>"Leoeptura", :authors=>[], :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :years=>[]}}
|
30
30
|
end
|
31
31
|
|
32
32
|
it 'should parse trinomials' do
|
33
|
-
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:
|
33
|
+
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:genus=>{:epitheton=>"Hydnellum", :authors=>[], :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :years=>[]}, :infraspecies=>[{:epitheton=>"zonatum", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :normalized=>"ZONATUM", :phonetized=>"ZANATA", :years=>["1972"]}], :all_authors=>["BANKER", "D. HALL", "D.E. STUNTZ"], :all_years=>["1972"], :species=>{:epitheton=>"scrobiculatum", :authors=>[], :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :years=>[]}}
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
@@ -174,6 +174,20 @@ describe 'Taxamatch' do
|
|
174
174
|
smatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
|
175
175
|
@tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>4, :match=>true}
|
176
176
|
end
|
177
|
+
|
178
|
+
describe 'Authmatch' do
|
179
|
+
before(:all) do
|
180
|
+
@am = Authmatch
|
181
|
+
end
|
182
|
+
|
183
|
+
it 'should compare years' do
|
184
|
+
@am.compare_years([1882],[1880]).should == 2
|
185
|
+
@am.compare_years([1882],[]).should == nil
|
186
|
+
@am.compare_years([],[]).should == 0
|
187
|
+
@am.compare_years([1788,1798], [1788,1798]).should be_nil
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
177
191
|
end
|
178
192
|
|
179
193
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-taxamatch_rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-08-02 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -42,20 +42,9 @@ extra_rdoc_files:
|
|
42
42
|
- LICENSE
|
43
43
|
- README.rdoc
|
44
44
|
files:
|
45
|
-
- .document
|
46
|
-
- .gitignore
|
47
|
-
- LICENSE
|
48
45
|
- README.rdoc
|
49
|
-
- Rakefile
|
50
|
-
- VERSION
|
51
|
-
- features/step_definitions/common_steps.rb
|
52
|
-
- features/step_definitions/taxamatch_rb.rb
|
53
|
-
- features/support/common.rb
|
54
|
-
- features/support/env.rb
|
55
|
-
- features/support/matchers.rb
|
56
|
-
- features/taxamatch_rb.feature
|
57
46
|
- lib/taxamatch_rb.rb
|
58
|
-
- lib/taxamatch_rb/
|
47
|
+
- lib/taxamatch_rb/authmatch.rb
|
59
48
|
- lib/taxamatch_rb/damerau_levenshtein_mod.rb
|
60
49
|
- lib/taxamatch_rb/normalizer.rb
|
61
50
|
- lib/taxamatch_rb/parser.rb
|
@@ -65,7 +54,7 @@ files:
|
|
65
54
|
- spec/spec_helper.rb
|
66
55
|
- spec/taxamatch_rb_spec.rb
|
67
56
|
- spec/taxamatch_test.txt
|
68
|
-
-
|
57
|
+
- LICENSE
|
69
58
|
has_rdoc: true
|
70
59
|
homepage: http://github.com/dimus/taxamatch_rb
|
71
60
|
licenses:
|
data/.document
DELETED
data/.gitignore
DELETED
data/Rakefile
DELETED
@@ -1,50 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'rake'
|
3
|
-
|
4
|
-
begin
|
5
|
-
require 'jeweler'
|
6
|
-
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "taxamatch_rb"
|
8
|
-
gem.summary = %Q{TODO}
|
9
|
-
gem.email = "dmozzherin@eol.org"
|
10
|
-
gem.homepage = "http://github.com/dimus/taxamatch_rb"
|
11
|
-
gem.authors = ["Dmitry Mozzherin"]
|
12
|
-
gem.add_dependency('RubyInline')
|
13
|
-
gem.add_dependency('dimus-biodiversity')
|
14
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
-
end
|
16
|
-
|
17
|
-
rescue LoadError
|
18
|
-
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
19
|
-
end
|
20
|
-
|
21
|
-
require 'spec/rake/spectask'
|
22
|
-
Spec::Rake::SpecTask.new(:spec) do |spec|
|
23
|
-
spec.libs << 'lib' << 'spec'
|
24
|
-
spec.spec_files = FileList['spec/**/*_spec.rb']
|
25
|
-
end
|
26
|
-
|
27
|
-
Spec::Rake::SpecTask.new(:rcov) do |spec|
|
28
|
-
spec.libs << 'lib' << 'spec'
|
29
|
-
spec.pattern = 'spec/**/*_spec.rb'
|
30
|
-
spec.rcov = true
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
task :default => :spec
|
35
|
-
|
36
|
-
require 'rake/rdoctask'
|
37
|
-
Rake::RDocTask.new do |rdoc|
|
38
|
-
if File.exist?('VERSION.yml')
|
39
|
-
config = YAML.load(File.read('VERSION.yml'))
|
40
|
-
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
41
|
-
else
|
42
|
-
version = ""
|
43
|
-
end
|
44
|
-
|
45
|
-
rdoc.rdoc_dir = 'rdoc'
|
46
|
-
rdoc.title = "taxamatch_rb #{version}"
|
47
|
-
rdoc.rdoc_files.include('README*')
|
48
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
49
|
-
end
|
50
|
-
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.2
|
@@ -1,163 +0,0 @@
|
|
1
|
-
Given /^this project is active project folder/ do
|
2
|
-
@active_project_folder = File.expand_path(File.dirname(__FILE__) + "/../..")
|
3
|
-
end
|
4
|
-
|
5
|
-
Given /^env variable \$([\w_]+) set to "(.*)"/ do |env_var, value|
|
6
|
-
ENV[env_var] = value
|
7
|
-
end
|
8
|
-
|
9
|
-
Given /"(.*)" folder is deleted/ do |folder|
|
10
|
-
in_project_folder { FileUtils.rm_rf folder }
|
11
|
-
end
|
12
|
-
|
13
|
-
When /^I invoke "(.*)" generator with arguments "(.*)"$/ do |generator, arguments|
|
14
|
-
@stdout = StringIO.new
|
15
|
-
in_project_folder do
|
16
|
-
if Object.const_defined?("APP_ROOT")
|
17
|
-
APP_ROOT.replace(FileUtils.pwd)
|
18
|
-
else
|
19
|
-
APP_ROOT = FileUtils.pwd
|
20
|
-
end
|
21
|
-
run_generator(generator, arguments.split(' '), SOURCES, :stdout => @stdout)
|
22
|
-
end
|
23
|
-
File.open(File.join(@tmp_root, "generator.out"), "w") do |f|
|
24
|
-
@stdout.rewind
|
25
|
-
f << @stdout.read
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
When /^I run executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
|
30
|
-
@stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
|
31
|
-
in_project_folder do
|
32
|
-
system "#{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
When /^I run project executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
|
37
|
-
@stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
|
38
|
-
in_project_folder do
|
39
|
-
system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
When /^I run local executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
|
44
|
-
@stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
|
45
|
-
executable = File.expand_path(File.join(File.dirname(__FILE__), "/../../bin", executable))
|
46
|
-
in_project_folder do
|
47
|
-
system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
When /^I invoke task "rake (.*)"/ do |task|
|
52
|
-
@stdout = File.expand_path(File.join(@tmp_root, "tests.out"))
|
53
|
-
in_project_folder do
|
54
|
-
system "rake #{task} --trace > #{@stdout} 2> #{@stdout}"
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
Then /^folder "(.*)" (is|is not) created/ do |folder, is|
|
59
|
-
in_project_folder do
|
60
|
-
File.exists?(folder).should(is == 'is' ? be_true : be_false)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
Then /^file "(.*)" (is|is not) created/ do |file, is|
|
65
|
-
in_project_folder do
|
66
|
-
File.exists?(file).should(is == 'is' ? be_true : be_false)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
Then /^file with name matching "(.*)" is created/ do |pattern|
|
71
|
-
in_project_folder do
|
72
|
-
Dir[pattern].should_not be_empty
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
Then /^file "(.*)" contents (does|does not) match \/(.*)\// do |file, does, regex|
|
77
|
-
in_project_folder do
|
78
|
-
actual_output = File.read(file)
|
79
|
-
(does == 'does') ?
|
80
|
-
actual_output.should(match(/#{regex}/)) :
|
81
|
-
actual_output.should_not(match(/#{regex}/))
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
Then /gem file "(.*)" and generated file "(.*)" should be the same/ do |gem_file, project_file|
|
86
|
-
File.exists?(gem_file).should be_true
|
87
|
-
File.exists?(project_file).should be_true
|
88
|
-
gem_file_contents = File.read(File.dirname(__FILE__) + "/../../#{gem_file}")
|
89
|
-
project_file_contents = File.read(File.join(@active_project_folder, project_file))
|
90
|
-
project_file_contents.should == gem_file_contents
|
91
|
-
end
|
92
|
-
|
93
|
-
Then /^(does|does not) invoke generator "(.*)"$/ do |does_invoke, generator|
|
94
|
-
actual_output = File.read(@stdout)
|
95
|
-
does_invoke == "does" ?
|
96
|
-
actual_output.should(match(/dependency\s+#{generator}/)) :
|
97
|
-
actual_output.should_not(match(/dependency\s+#{generator}/))
|
98
|
-
end
|
99
|
-
|
100
|
-
Then /help options "(.*)" and "(.*)" are displayed/ do |opt1, opt2|
|
101
|
-
actual_output = File.read(@stdout)
|
102
|
-
actual_output.should match(/#{opt1}/)
|
103
|
-
actual_output.should match(/#{opt2}/)
|
104
|
-
end
|
105
|
-
|
106
|
-
Then /^I should see$/ do |text|
|
107
|
-
actual_output = File.read(@stdout)
|
108
|
-
actual_output.should contain(text)
|
109
|
-
end
|
110
|
-
|
111
|
-
Then /^I should not see$/ do |text|
|
112
|
-
actual_output = File.read(@stdout)
|
113
|
-
actual_output.should_not contain(text)
|
114
|
-
end
|
115
|
-
|
116
|
-
Then /^I should see exactly$/ do |text|
|
117
|
-
actual_output = File.read(@stdout)
|
118
|
-
actual_output.should == text
|
119
|
-
end
|
120
|
-
|
121
|
-
Then /^I should see all (\d+) tests pass/ do |expected_test_count|
|
122
|
-
expected = %r{^#{expected_test_count} tests, \d+ assertions, 0 failures, 0 errors}
|
123
|
-
actual_output = File.read(@stdout)
|
124
|
-
actual_output.should match(expected)
|
125
|
-
end
|
126
|
-
|
127
|
-
Then /^I should see all (\d+) examples pass/ do |expected_test_count|
|
128
|
-
expected = %r{^#{expected_test_count} examples?, 0 failures}
|
129
|
-
actual_output = File.read(@stdout)
|
130
|
-
actual_output.should match(expected)
|
131
|
-
end
|
132
|
-
|
133
|
-
Then /^yaml file "(.*)" contains (\{.*\})/ do |file, yaml|
|
134
|
-
in_project_folder do
|
135
|
-
yaml = eval yaml
|
136
|
-
YAML.load(File.read(file)).should == yaml
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
Then /^Rakefile can display tasks successfully/ do
|
141
|
-
@stdout = File.expand_path(File.join(@tmp_root, "rakefile.out"))
|
142
|
-
in_project_folder do
|
143
|
-
system "rake -T > #{@stdout} 2> #{@stdout}"
|
144
|
-
end
|
145
|
-
actual_output = File.read(@stdout)
|
146
|
-
actual_output.should match(/^rake\s+\w+\s+#\s.*/)
|
147
|
-
end
|
148
|
-
|
149
|
-
Then /^task "rake (.*)" is executed successfully/ do |task|
|
150
|
-
@stdout.should_not be_nil
|
151
|
-
actual_output = File.read(@stdout)
|
152
|
-
actual_output.should_not match(/^Don't know how to build task '#{task}'/)
|
153
|
-
actual_output.should_not match(/Error/i)
|
154
|
-
end
|
155
|
-
|
156
|
-
Then /^gem spec key "(.*)" contains \/(.*)\// do |key, regex|
|
157
|
-
in_project_folder do
|
158
|
-
gem_file = Dir["pkg/*.gem"].first
|
159
|
-
gem_spec = Gem::Specification.from_yaml(`gem spec #{gem_file}`)
|
160
|
-
spec_value = gem_spec.send(key.to_sym)
|
161
|
-
spec_value.to_s.should match(/#{regex}/)
|
162
|
-
end
|
163
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
str1 = str2 = block_size = max_distance = distance = dlm = nil
|
2
|
-
|
3
|
-
###############
|
4
|
-
#DAMERAU LEVENSHTEIN MOD
|
5
|
-
###############
|
6
|
-
|
7
|
-
Given /^strings "([^\"]*)" and "([^\"]*)", transposition block size "([^\"]*)", and a maximum allowed distance "([^\"]*)"$/ do |a,b,c,d|
|
8
|
-
str1 = a
|
9
|
-
str2 = b
|
10
|
-
block_size = c.to_i
|
11
|
-
max_distance = d.to_i
|
12
|
-
end
|
13
|
-
|
14
|
-
When /^I run "([^\"]*)" instance function "([^\"]*)"$/ do |arg1, arg2|
|
15
|
-
dl = eval(arg1 + ".new")
|
16
|
-
distance = dl.distance(str1, str2, block_size, max_distance)
|
17
|
-
end
|
18
|
-
|
19
|
-
Then /^I should receive edit distance "([^\"]*)"$/ do |arg1|
|
20
|
-
distance.should == arg1.to_i
|
21
|
-
end
|
22
|
-
|
23
|
-
#############
|
24
|
-
#PARSER
|
25
|
-
#############
|
26
|
-
|
27
|
-
sci_name = result = nil
|
28
|
-
parser = Parser.new
|
29
|
-
|
30
|
-
Given /^a name "([^\"]*)"$/ do |arg1|
|
31
|
-
sci_name = arg1
|
32
|
-
end
|
33
|
-
|
34
|
-
When /^I run a Parser function parse$/ do
|
35
|
-
result = parser.parse(sci_name)
|
36
|
-
end
|
37
|
-
|
38
|
-
Then /^I should receive "([^\"]*)" as genus epithet, "([^\"]*)" as species epithet, "([^\"]*)" and "([^\"]*)" as species authors, "([^\"]*)" as a species year$/ do |gen_val, sp_val, au_val1, au_val2, yr_val|
|
39
|
-
result[:genus][:epitheton].should == gen_val
|
40
|
-
result[:species][:epitheton].should == sp_val
|
41
|
-
result[:species][:authors].include?(au_val1).should be_true
|
42
|
-
result[:species][:authors].include?(au_val2).should be_true
|
43
|
-
result[:species][:years].include?(yr_val).should be_true
|
44
|
-
require 'pp'
|
45
|
-
print result
|
46
|
-
end
|
47
|
-
|
48
|
-
#############
|
49
|
-
# NORMALIZER
|
50
|
-
#############
|
51
|
-
|
52
|
-
string = normalized_string = nil
|
53
|
-
|
54
|
-
Given /^a string "([^\"]*)"$/ do |arg1|
|
55
|
-
string = arg1
|
56
|
-
end
|
57
|
-
|
58
|
-
When /^I run a Normalizer function normalize$/ do
|
59
|
-
normalized_string = Normalizer.normalize(string)
|
60
|
-
end
|
61
|
-
|
62
|
-
Then /^I should receive "([^\"]*)" as a normalized form of the string$/ do |arg1|
|
63
|
-
normalized_string.should == arg1
|
64
|
-
end
|
65
|
-
|
66
|
-
######
|
67
|
-
# PHONETIZER
|
68
|
-
#####
|
69
|
-
|
70
|
-
word = phonetized_word = nil
|
71
|
-
|
72
|
-
Given /^a word "([^\"]*)"$/ do |arg1|
|
73
|
-
word = arg1
|
74
|
-
end
|
75
|
-
|
76
|
-
When /^I run a Phonetizer function near_match$/ do
|
77
|
-
phonetized_word = Phonetizer.near_match(word)
|
78
|
-
end
|
79
|
-
|
80
|
-
Then /^I should receive "([^\"]*)" as a phonetic form of the word$/ do |arg1|
|
81
|
-
phonetized_word.should == arg1
|
82
|
-
end
|
83
|
-
|
84
|
-
|
85
|
-
When /^I run a Phonetizer function near_match with an option normalize_ending$/ do
|
86
|
-
phonetized_word = Phonetizer.near_match(word,true)
|
87
|
-
end
|
88
|
-
|
89
|
-
Then /^I should receive "([^\"]*)" as a normalized phonetic form of the word$/ do |arg1|
|
90
|
-
phonetized_word.should == arg1
|
91
|
-
end
|
92
|
-
|
data/features/support/common.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
module CommonHelpers
|
2
|
-
def in_tmp_folder(&block)
|
3
|
-
FileUtils.chdir(@tmp_root, &block)
|
4
|
-
end
|
5
|
-
|
6
|
-
def in_project_folder(&block)
|
7
|
-
project_folder = @active_project_folder || @tmp_root
|
8
|
-
FileUtils.chdir(project_folder, &block)
|
9
|
-
end
|
10
|
-
|
11
|
-
def in_home_folder(&block)
|
12
|
-
FileUtils.chdir(@home_path, &block)
|
13
|
-
end
|
14
|
-
|
15
|
-
def force_local_lib_override(project_name = @project_name)
|
16
|
-
rakefile = File.read(File.join(project_name, 'Rakefile'))
|
17
|
-
File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
|
18
|
-
f << "$:.unshift('#{@lib_path}')\n"
|
19
|
-
f << rakefile
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def setup_active_project_folder project_name
|
24
|
-
@active_project_folder = File.join(@tmp_root, project_name)
|
25
|
-
@project_name = project_name
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
World(CommonHelpers)
|
data/features/support/env.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../lib/taxamatch_rb"
|
2
|
-
|
3
|
-
gem 'cucumber'
|
4
|
-
require 'cucumber'
|
5
|
-
gem 'rspec'
|
6
|
-
require 'spec'
|
7
|
-
|
8
|
-
Before do
|
9
|
-
@tmp_root = File.dirname(__FILE__) + "/../../tmp"
|
10
|
-
@home_path = File.expand_path(File.join(@tmp_root, "home"))
|
11
|
-
FileUtils.rm_rf @tmp_root
|
12
|
-
FileUtils.mkdir_p @home_path
|
13
|
-
ENV['HOME'] = @home_path
|
14
|
-
end
|
@@ -1,11 +0,0 @@
|
|
1
|
-
module Matchers
|
2
|
-
def contain(expected)
|
3
|
-
simple_matcher("contain #{expected.inspect}") do |given, matcher|
|
4
|
-
matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
|
5
|
-
matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
|
6
|
-
given.index expected
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
World(Matchers)
|
@@ -1,33 +0,0 @@
|
|
1
|
-
Feature: Find if two scientific names are lexical variants of each other
|
2
|
-
|
3
|
-
As a Biodiversity Informatician
|
4
|
-
I want to be able to compare scientific names to determine if they are variants of the same name.
|
5
|
-
And I want to be able to combine names that are the same into lexical groups, so they appear together in names list
|
6
|
-
So I want to implement Tony Rees and Barbara Boehmer taxamatch algorithms http://bit.ly/boWyG
|
7
|
-
|
8
|
-
|
9
|
-
Scenario: find edit distance between two unicode (utf8) strings
|
10
|
-
Given strings "Sjostedt" and "Sojstedt", transposition block size "1", and a maximum allowed distance "4"
|
11
|
-
When I run "DamerauLevenshteinMod" instance function "distance"
|
12
|
-
Then I should receive edit distance "1"
|
13
|
-
|
14
|
-
Scenario: find parts of a name in unicode
|
15
|
-
Given a name "Arthopyrenia hyalospora (Banker) D. Hall 1988 hyalosporis Kutz 1999"
|
16
|
-
When I run a Parser function parse
|
17
|
-
Then I should receive "Arthopyrenia" as genus epithet, "hyalospora" as species epithet, "Banker" and "D. Hall" as species authors, "1988" as a species year
|
18
|
-
|
19
|
-
Scenario: normalize a string into ASCII upcase
|
20
|
-
Given a string "Choriozopella trägårdhi"
|
21
|
-
When I run a Normalizer function normalize
|
22
|
-
Then I should receive "CHORIOZOPELLA TRAGARDHI" as a normalized form of the string
|
23
|
-
|
24
|
-
Scenario: create phonetic version of a word
|
25
|
-
Given a word "bifasciata"
|
26
|
-
When I run a Phonetizer function near_match
|
27
|
-
Then I should receive "BIFASATA" as a phonetic form of the word
|
28
|
-
|
29
|
-
Scenario: create phonetic version of a species epithet normalizing ending
|
30
|
-
Given a word "bifasciatum"
|
31
|
-
When I run a Phonetizer function near_match with an option normalize_ending
|
32
|
-
Then I should receive "BIFASATA" as a normalized phonetic form of the word
|
33
|
-
|
File without changes
|
data/taxamatch_rb.gemspec
DELETED
@@ -1,67 +0,0 @@
|
|
1
|
-
# -*- encoding: utf-8 -*-
|
2
|
-
|
3
|
-
Gem::Specification.new do |s|
|
4
|
-
s.name = %q{taxamatch_rb}
|
5
|
-
s.version = "0.1.2"
|
6
|
-
|
7
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
-
s.authors = ["Dmitry Mozzherin"]
|
9
|
-
s.date = %q{2009-07-29}
|
10
|
-
s.email = %q{dmozzherin@eol.org}
|
11
|
-
s.extra_rdoc_files = [
|
12
|
-
"LICENSE",
|
13
|
-
"README.rdoc"
|
14
|
-
]
|
15
|
-
s.files = [
|
16
|
-
".document",
|
17
|
-
".gitignore",
|
18
|
-
"LICENSE",
|
19
|
-
"README.rdoc",
|
20
|
-
"Rakefile",
|
21
|
-
"VERSION",
|
22
|
-
"features/step_definitions/common_steps.rb",
|
23
|
-
"features/step_definitions/taxamatch_rb.rb",
|
24
|
-
"features/support/common.rb",
|
25
|
-
"features/support/env.rb",
|
26
|
-
"features/support/matchers.rb",
|
27
|
-
"features/taxamatch_rb.feature",
|
28
|
-
"lib/taxamatch_rb.rb",
|
29
|
-
"lib/taxamatch_rb/authormatch.rb",
|
30
|
-
"lib/taxamatch_rb/damerau_levenshtein_mod.rb",
|
31
|
-
"lib/taxamatch_rb/normalizer.rb",
|
32
|
-
"lib/taxamatch_rb/parser.rb",
|
33
|
-
"lib/taxamatch_rb/phonetizer.rb",
|
34
|
-
"spec/damerau_levenshtein_mod_test.txt",
|
35
|
-
"spec/spec.opts",
|
36
|
-
"spec/spec_helper.rb",
|
37
|
-
"spec/taxamatch_rb_spec.rb",
|
38
|
-
"spec/taxamatch_test.txt",
|
39
|
-
"taxamatch_rb.gemspec"
|
40
|
-
]
|
41
|
-
s.has_rdoc = true
|
42
|
-
s.homepage = %q{http://github.com/dimus/taxamatch_rb}
|
43
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
44
|
-
s.require_paths = ["lib"]
|
45
|
-
s.rubygems_version = %q{1.3.1}
|
46
|
-
s.summary = %q{TODO}
|
47
|
-
s.test_files = [
|
48
|
-
"spec/spec_helper.rb",
|
49
|
-
"spec/taxamatch_rb_spec.rb"
|
50
|
-
]
|
51
|
-
|
52
|
-
if s.respond_to? :specification_version then
|
53
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
|
-
s.specification_version = 2
|
55
|
-
|
56
|
-
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
57
|
-
s.add_runtime_dependency(%q<RubyInline>, [">= 0"])
|
58
|
-
s.add_runtime_dependency(%q<dimus-biodiversity>, [">= 0"])
|
59
|
-
else
|
60
|
-
s.add_dependency(%q<RubyInline>, [">= 0"])
|
61
|
-
s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
|
62
|
-
end
|
63
|
-
else
|
64
|
-
s.add_dependency(%q<RubyInline>, [">= 0"])
|
65
|
-
s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
|
66
|
-
end
|
67
|
-
end
|