dimus-taxamatch_rb 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/taxamatch_rb.rb CHANGED
@@ -5,11 +5,12 @@ require 'taxamatch_rb/damerau_levenshtein_mod'
5
5
  require 'taxamatch_rb/parser'
6
6
  require 'taxamatch_rb/normalizer'
7
7
  require 'taxamatch_rb/phonetizer'
8
+ require 'taxamatch_rb/authmatch'
8
9
 
9
10
  class Taxamatch
10
11
 
11
12
  def initialize
12
- @parser = Parser.new
13
+ @parser = TaxamatchParser.new
13
14
  @dlm = DamerauLevenshteinMod.new
14
15
  end
15
16
 
@@ -24,7 +25,7 @@ class Taxamatch
24
25
  #takes two hashes of parsed scientific names, analyses them and returns back
25
26
  #this function is useful when species strings are preparsed.
26
27
  def taxamatch_parsed_data(parsed_data_1, parsed_data_2)
27
- return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:unicode] && parsed_data_2[:unicode]
28
+ return match_uninomial(parsed_data_1, parsed_data_2) if parsed_data_1[:uninomial] && parsed_data_2[:uninomial]
28
29
  return match_multinomial(parsed_data_1, parsed_data_2) if parsed_data_1[:genus] && parsed_data_2[:genus]
29
30
  return false
30
31
  end
@@ -36,6 +37,7 @@ class Taxamatch
36
37
  def match_multinomial(parsed_data_1, parsed_data_2)
37
38
  gen_match = match_genera(parsed_data_1[:genus], parsed_data_2[:genus])
38
39
  sp_match = match_species(parsed_data_1[:species], parsed_data_2[:species])
40
+ au_match = match_authors(parsed_data_1, parsed_data_2)
39
41
  total_length = parsed_data_1[:genus][:epitheton].size + parsed_data_2[:genus][:epitheton].size + parsed_data_1[:species][:epitheton].size + parsed_data_2[:species][:epitheton].size
40
42
  match = match_matches(gen_match, sp_match)
41
43
  match.merge({:score => (1- match[:edit_distance]/(total_length/2))})
@@ -65,6 +67,14 @@ class Taxamatch
65
67
  {:edit_distance => ed, :match => match, :phonetic_match => false}
66
68
  end
67
69
 
70
+ def match_authors(parsed_data_1, parsed_data_2)
71
+ au1 = parsed_data_1[:all_authors]
72
+ au2 = parsed_data_2[:all_authors]
73
+ yr1 = parsed_data_1[:all_years]
74
+ yr2 = parsed_data_2[:all_years]
75
+ #Authormatch.compare_authorities(au1, au2, yr1, yr2)
76
+ end
77
+
68
78
  def match_matches(genus_match, species_match, infraspecies_matches = [])
69
79
  match = species_match
70
80
  match[:edit_distance] += genus_match[:edit_distance]
@@ -0,0 +1,86 @@
1
+ class Authmatch
2
+
3
+ def self.authmatch(authors1, authors2, years1, years2)
4
+ return true
5
+ unique_authors1, unique_authors2 = remove_duplicate_authors(authors1, authors2)
6
+ year_difference = compare_years(years1, years2)
7
+
8
+
9
+ #return get_score_author_comparison(authors1, unique_authors1, authors2, unique_authors2, year_difference, 50, true);
10
+ end
11
+
12
+ def self.remove_duplicate_authors(author1, authors2)
13
+ au1_match = au2_match = false
14
+ au1_match.each do |au1|
15
+ match1 = false
16
+ au1_match.each do |au2|
17
+ match2 = false
18
+ if au1 == au2
19
+ match1 = match2 = true
20
+ elsif au1.size < au2.size
21
+ match1 = true if au1 == au2[0..au1.size]
22
+ elseif
23
+ end
24
+ end
25
+ end
26
+ end
27
+
28
+ def self.compare_years(years1, years2)
29
+ return 0 if years1 == [] && years2 == []
30
+ return (years1[0] - years2[0]).abs if years1.size == 1 && years2.size == 1
31
+ nil
32
+ end
33
+ end
34
+
35
+ =begin
36
+ foreach($author_words1 as $key1 => $author1)
37
+ {
38
+ $author1_matches = false;
39
+ $author1 = Normalize::normalize_author_string($author1);
40
+ foreach($author_words2 as $key2 => $author2)
41
+ {
42
+ $author2_matches = false;
43
+ $author2 = Normalize::normalize_author_string($author2);
44
+
45
+ if($author1 == $author2)
46
+ {
47
+ $author1_matches = true;
48
+ $author2_matches = true;
49
+ }elseif(preg_match("/^".preg_quote($author1, "/")."/i", $author2))
50
+ {
51
+ $author1_matches = true;
52
+ }elseif(preg_match("/^".preg_quote($author2, "/")."/i", $author1))
53
+ {
54
+ $author2_matches = true;
55
+ }
56
+
57
+ // equal or one is contained in the other, so consider it a match for both terms
58
+ if((strlen($author1)>=3 && $author1_matches) || (strlen($author2)>=3 && $author2_matches) || $author1 == $author2)
59
+ {
60
+ unset($unique_authors1[$key1]);
61
+ unset($unique_authors2[$key2]);
62
+ }elseif($author1_matches)
63
+ {
64
+ // author1 was abbreviation of author2
65
+ unset($unique_authors1[$key1]);
66
+ }elseif($author2_matches)
67
+ {
68
+ // author1 was abbreviation of author2
69
+ unset($unique_authors2[$key2]);
70
+ }else
71
+ {
72
+ // no match or abbreviation so try a fuzzy match
73
+ $max_length = max(strlen($author1), strlen($author2));
74
+ $lev = levenshtein($author1, $author2);
75
+ if(($lev/$max_length) <= .167)
76
+ {
77
+ unset($unique_authors1[$key1]);
78
+ unset($unique_authors2[$key2]);
79
+ }
80
+ }
81
+ }
82
+ reset($author_words2);
83
+ }
84
+
85
+
86
+ =end
@@ -6,7 +6,7 @@ module Normalizer
6
6
  end
7
7
 
8
8
  def self.normalize_word(word)
9
- self.normalize(word).gsub(/[^A-Z0-9\.\-]/, '')
9
+ self.normalize(word).gsub(/[^A-Z0-9\-]/, '')
10
10
  end
11
11
 
12
12
  protected
@@ -1,7 +1,7 @@
1
1
  # encoding: UTF-8
2
2
  require 'biodiversity'
3
3
 
4
- class Parser
4
+ class TaxamatchParser
5
5
  def initialize
6
6
  @parser = ScientificNameParser.new
7
7
  @parsed_raw = nil
@@ -28,7 +28,7 @@ protected
28
28
  process_node(:genus, d['genus'])
29
29
  process_node(:species, d['species'], true)
30
30
  process_infraspecies(d['infraspecies'])
31
- @res[:all_authors].uniq!
31
+ @res[:all_authors] = @res[:all_authors].uniq.map {|a| Normalizer.normalize(a)}
32
32
  @res[:all_years].uniq!
33
33
  @res.keys.size > 2 ? @res : nil
34
34
  end
@@ -16,21 +16,21 @@ end
16
16
 
17
17
  describe 'Parser' do
18
18
  before(:all) do
19
- @parser = Parser.new
19
+ @parser =TaxamatchParser.new
20
20
  end
21
21
 
22
22
  it 'should parse uninomials' do
23
23
  @parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:epitheton=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[]}}
24
- @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["Lacordaire"], :all_years=>["1872"], :uninomial=>{:epitheton=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"]}}
25
- @parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["Lacordaire", "Muller"], :all_years=>["1872", "2007"], :uninomial=>{:epitheton=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire", "Muller"], :years=>["1872", "2007"]}}
24
+ @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872"]}}
25
+ @parser.parse('Ærenea (Lacordaire, 1872) Muller 2007').should == {:all_authors=>["LACORDAIRE", "MULLER"], :all_years=>["1872", "2007"], :uninomial=>{:epitheton=>"Aerenea", :authors=>["Lacordaire", "Muller"], :normalized=>"AERENEA", :phonetized=>"ERINIA", :years=>["1872", "2007"]}}
26
26
  end
27
27
 
28
28
  it 'should parse binomials' do
29
- @parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["Dow"], :all_years=>["1913"], :genus=>{:epitheton=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[]}, :species=>{:epitheton=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"]}}
29
+ @parser.parse('Leœptura laetifica Dow, 1913').should == {:species=>{:epitheton=>"laetifica", :authors=>["Dow"], :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :years=>["1913"]}, :all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:epitheton=>"Leoeptura", :authors=>[], :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :years=>[]}}
30
30
  end
31
31
 
32
32
  it 'should parse trinomials' do
33
- @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["Banker", "D. Hall", "D.E. Stuntz"], :all_years=>["1972"], :genus=>{:epitheton=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[]}, :species=>{:epitheton=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[]}, :infraspecies=>[{:epitheton=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"]}]}
33
+ @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:genus=>{:epitheton=>"Hydnellum", :authors=>[], :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :years=>[]}, :infraspecies=>[{:epitheton=>"zonatum", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :normalized=>"ZONATUM", :phonetized=>"ZANATA", :years=>["1972"]}], :all_authors=>["BANKER", "D. HALL", "D.E. STUNTZ"], :all_years=>["1972"], :species=>{:epitheton=>"scrobiculatum", :authors=>[], :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :years=>[]}}
34
34
  end
35
35
  end
36
36
 
@@ -174,6 +174,20 @@ describe 'Taxamatch' do
174
174
  smatch = {:match => true, :phonetic_match => true, :edit_distance => 2}
175
175
  @tm.match_matches(gmatch, smatch).should == {:phonetic_match=>true, :edit_distance=>4, :match=>true}
176
176
  end
177
+
178
+ describe 'Authmatch' do
179
+ before(:all) do
180
+ @am = Authmatch
181
+ end
182
+
183
+ it 'should compare years' do
184
+ @am.compare_years([1882],[1880]).should == 2
185
+ @am.compare_years([1882],[]).should == nil
186
+ @am.compare_years([],[]).should == 0
187
+ @am.compare_years([1788,1798], [1788,1798]).should be_nil
188
+ end
189
+ end
190
+
177
191
  end
178
192
 
179
193
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-taxamatch_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-07-29 00:00:00 -07:00
12
+ date: 2009-08-02 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -42,20 +42,9 @@ extra_rdoc_files:
42
42
  - LICENSE
43
43
  - README.rdoc
44
44
  files:
45
- - .document
46
- - .gitignore
47
- - LICENSE
48
45
  - README.rdoc
49
- - Rakefile
50
- - VERSION
51
- - features/step_definitions/common_steps.rb
52
- - features/step_definitions/taxamatch_rb.rb
53
- - features/support/common.rb
54
- - features/support/env.rb
55
- - features/support/matchers.rb
56
- - features/taxamatch_rb.feature
57
46
  - lib/taxamatch_rb.rb
58
- - lib/taxamatch_rb/authormatch.rb
47
+ - lib/taxamatch_rb/authmatch.rb
59
48
  - lib/taxamatch_rb/damerau_levenshtein_mod.rb
60
49
  - lib/taxamatch_rb/normalizer.rb
61
50
  - lib/taxamatch_rb/parser.rb
@@ -65,7 +54,7 @@ files:
65
54
  - spec/spec_helper.rb
66
55
  - spec/taxamatch_rb_spec.rb
67
56
  - spec/taxamatch_test.txt
68
- - taxamatch_rb.gemspec
57
+ - LICENSE
69
58
  has_rdoc: true
70
59
  homepage: http://github.com/dimus/taxamatch_rb
71
60
  licenses:
data/.document DELETED
@@ -1,5 +0,0 @@
1
- README.rdoc
2
- lib/**/*.rb
3
- bin/*
4
- features/**/*.feature
5
- LICENSE
data/.gitignore DELETED
@@ -1,6 +0,0 @@
1
- *.sw?
2
- .DS_Store
3
- coverage
4
- rdoc
5
- pkg
6
- tmp
data/Rakefile DELETED
@@ -1,50 +0,0 @@
1
- require 'rubygems'
2
- require 'rake'
3
-
4
- begin
5
- require 'jeweler'
6
- Jeweler::Tasks.new do |gem|
7
- gem.name = "taxamatch_rb"
8
- gem.summary = %Q{TODO}
9
- gem.email = "dmozzherin@eol.org"
10
- gem.homepage = "http://github.com/dimus/taxamatch_rb"
11
- gem.authors = ["Dmitry Mozzherin"]
12
- gem.add_dependency('RubyInline')
13
- gem.add_dependency('dimus-biodiversity')
14
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
- end
16
-
17
- rescue LoadError
18
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
- end
20
-
21
- require 'spec/rake/spectask'
22
- Spec::Rake::SpecTask.new(:spec) do |spec|
23
- spec.libs << 'lib' << 'spec'
24
- spec.spec_files = FileList['spec/**/*_spec.rb']
25
- end
26
-
27
- Spec::Rake::SpecTask.new(:rcov) do |spec|
28
- spec.libs << 'lib' << 'spec'
29
- spec.pattern = 'spec/**/*_spec.rb'
30
- spec.rcov = true
31
- end
32
-
33
-
34
- task :default => :spec
35
-
36
- require 'rake/rdoctask'
37
- Rake::RDocTask.new do |rdoc|
38
- if File.exist?('VERSION.yml')
39
- config = YAML.load(File.read('VERSION.yml'))
40
- version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
41
- else
42
- version = ""
43
- end
44
-
45
- rdoc.rdoc_dir = 'rdoc'
46
- rdoc.title = "taxamatch_rb #{version}"
47
- rdoc.rdoc_files.include('README*')
48
- rdoc.rdoc_files.include('lib/**/*.rb')
49
- end
50
-
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.1.2
@@ -1,163 +0,0 @@
1
- Given /^this project is active project folder/ do
2
- @active_project_folder = File.expand_path(File.dirname(__FILE__) + "/../..")
3
- end
4
-
5
- Given /^env variable \$([\w_]+) set to "(.*)"/ do |env_var, value|
6
- ENV[env_var] = value
7
- end
8
-
9
- Given /"(.*)" folder is deleted/ do |folder|
10
- in_project_folder { FileUtils.rm_rf folder }
11
- end
12
-
13
- When /^I invoke "(.*)" generator with arguments "(.*)"$/ do |generator, arguments|
14
- @stdout = StringIO.new
15
- in_project_folder do
16
- if Object.const_defined?("APP_ROOT")
17
- APP_ROOT.replace(FileUtils.pwd)
18
- else
19
- APP_ROOT = FileUtils.pwd
20
- end
21
- run_generator(generator, arguments.split(' '), SOURCES, :stdout => @stdout)
22
- end
23
- File.open(File.join(@tmp_root, "generator.out"), "w") do |f|
24
- @stdout.rewind
25
- f << @stdout.read
26
- end
27
- end
28
-
29
- When /^I run executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
30
- @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
31
- in_project_folder do
32
- system "#{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
33
- end
34
- end
35
-
36
- When /^I run project executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
37
- @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
38
- in_project_folder do
39
- system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
40
- end
41
- end
42
-
43
- When /^I run local executable "(.*)" with arguments "(.*)"/ do |executable, arguments|
44
- @stdout = File.expand_path(File.join(@tmp_root, "executable.out"))
45
- executable = File.expand_path(File.join(File.dirname(__FILE__), "/../../bin", executable))
46
- in_project_folder do
47
- system "ruby #{executable} #{arguments} > #{@stdout} 2> #{@stdout}"
48
- end
49
- end
50
-
51
- When /^I invoke task "rake (.*)"/ do |task|
52
- @stdout = File.expand_path(File.join(@tmp_root, "tests.out"))
53
- in_project_folder do
54
- system "rake #{task} --trace > #{@stdout} 2> #{@stdout}"
55
- end
56
- end
57
-
58
- Then /^folder "(.*)" (is|is not) created/ do |folder, is|
59
- in_project_folder do
60
- File.exists?(folder).should(is == 'is' ? be_true : be_false)
61
- end
62
- end
63
-
64
- Then /^file "(.*)" (is|is not) created/ do |file, is|
65
- in_project_folder do
66
- File.exists?(file).should(is == 'is' ? be_true : be_false)
67
- end
68
- end
69
-
70
- Then /^file with name matching "(.*)" is created/ do |pattern|
71
- in_project_folder do
72
- Dir[pattern].should_not be_empty
73
- end
74
- end
75
-
76
- Then /^file "(.*)" contents (does|does not) match \/(.*)\// do |file, does, regex|
77
- in_project_folder do
78
- actual_output = File.read(file)
79
- (does == 'does') ?
80
- actual_output.should(match(/#{regex}/)) :
81
- actual_output.should_not(match(/#{regex}/))
82
- end
83
- end
84
-
85
- Then /gem file "(.*)" and generated file "(.*)" should be the same/ do |gem_file, project_file|
86
- File.exists?(gem_file).should be_true
87
- File.exists?(project_file).should be_true
88
- gem_file_contents = File.read(File.dirname(__FILE__) + "/../../#{gem_file}")
89
- project_file_contents = File.read(File.join(@active_project_folder, project_file))
90
- project_file_contents.should == gem_file_contents
91
- end
92
-
93
- Then /^(does|does not) invoke generator "(.*)"$/ do |does_invoke, generator|
94
- actual_output = File.read(@stdout)
95
- does_invoke == "does" ?
96
- actual_output.should(match(/dependency\s+#{generator}/)) :
97
- actual_output.should_not(match(/dependency\s+#{generator}/))
98
- end
99
-
100
- Then /help options "(.*)" and "(.*)" are displayed/ do |opt1, opt2|
101
- actual_output = File.read(@stdout)
102
- actual_output.should match(/#{opt1}/)
103
- actual_output.should match(/#{opt2}/)
104
- end
105
-
106
- Then /^I should see$/ do |text|
107
- actual_output = File.read(@stdout)
108
- actual_output.should contain(text)
109
- end
110
-
111
- Then /^I should not see$/ do |text|
112
- actual_output = File.read(@stdout)
113
- actual_output.should_not contain(text)
114
- end
115
-
116
- Then /^I should see exactly$/ do |text|
117
- actual_output = File.read(@stdout)
118
- actual_output.should == text
119
- end
120
-
121
- Then /^I should see all (\d+) tests pass/ do |expected_test_count|
122
- expected = %r{^#{expected_test_count} tests, \d+ assertions, 0 failures, 0 errors}
123
- actual_output = File.read(@stdout)
124
- actual_output.should match(expected)
125
- end
126
-
127
- Then /^I should see all (\d+) examples pass/ do |expected_test_count|
128
- expected = %r{^#{expected_test_count} examples?, 0 failures}
129
- actual_output = File.read(@stdout)
130
- actual_output.should match(expected)
131
- end
132
-
133
- Then /^yaml file "(.*)" contains (\{.*\})/ do |file, yaml|
134
- in_project_folder do
135
- yaml = eval yaml
136
- YAML.load(File.read(file)).should == yaml
137
- end
138
- end
139
-
140
- Then /^Rakefile can display tasks successfully/ do
141
- @stdout = File.expand_path(File.join(@tmp_root, "rakefile.out"))
142
- in_project_folder do
143
- system "rake -T > #{@stdout} 2> #{@stdout}"
144
- end
145
- actual_output = File.read(@stdout)
146
- actual_output.should match(/^rake\s+\w+\s+#\s.*/)
147
- end
148
-
149
- Then /^task "rake (.*)" is executed successfully/ do |task|
150
- @stdout.should_not be_nil
151
- actual_output = File.read(@stdout)
152
- actual_output.should_not match(/^Don't know how to build task '#{task}'/)
153
- actual_output.should_not match(/Error/i)
154
- end
155
-
156
- Then /^gem spec key "(.*)" contains \/(.*)\// do |key, regex|
157
- in_project_folder do
158
- gem_file = Dir["pkg/*.gem"].first
159
- gem_spec = Gem::Specification.from_yaml(`gem spec #{gem_file}`)
160
- spec_value = gem_spec.send(key.to_sym)
161
- spec_value.to_s.should match(/#{regex}/)
162
- end
163
- end
@@ -1,92 +0,0 @@
1
- str1 = str2 = block_size = max_distance = distance = dlm = nil
2
-
3
- ###############
4
- #DAMERAU LEVENSHTEIN MOD
5
- ###############
6
-
7
- Given /^strings "([^\"]*)" and "([^\"]*)", transposition block size "([^\"]*)", and a maximum allowed distance "([^\"]*)"$/ do |a,b,c,d|
8
- str1 = a
9
- str2 = b
10
- block_size = c.to_i
11
- max_distance = d.to_i
12
- end
13
-
14
- When /^I run "([^\"]*)" instance function "([^\"]*)"$/ do |arg1, arg2|
15
- dl = eval(arg1 + ".new")
16
- distance = dl.distance(str1, str2, block_size, max_distance)
17
- end
18
-
19
- Then /^I should receive edit distance "([^\"]*)"$/ do |arg1|
20
- distance.should == arg1.to_i
21
- end
22
-
23
- #############
24
- #PARSER
25
- #############
26
-
27
- sci_name = result = nil
28
- parser = Parser.new
29
-
30
- Given /^a name "([^\"]*)"$/ do |arg1|
31
- sci_name = arg1
32
- end
33
-
34
- When /^I run a Parser function parse$/ do
35
- result = parser.parse(sci_name)
36
- end
37
-
38
- Then /^I should receive "([^\"]*)" as genus epithet, "([^\"]*)" as species epithet, "([^\"]*)" and "([^\"]*)" as species authors, "([^\"]*)" as a species year$/ do |gen_val, sp_val, au_val1, au_val2, yr_val|
39
- result[:genus][:epitheton].should == gen_val
40
- result[:species][:epitheton].should == sp_val
41
- result[:species][:authors].include?(au_val1).should be_true
42
- result[:species][:authors].include?(au_val2).should be_true
43
- result[:species][:years].include?(yr_val).should be_true
44
- require 'pp'
45
- print result
46
- end
47
-
48
- #############
49
- # NORMALIZER
50
- #############
51
-
52
- string = normalized_string = nil
53
-
54
- Given /^a string "([^\"]*)"$/ do |arg1|
55
- string = arg1
56
- end
57
-
58
- When /^I run a Normalizer function normalize$/ do
59
- normalized_string = Normalizer.normalize(string)
60
- end
61
-
62
- Then /^I should receive "([^\"]*)" as a normalized form of the string$/ do |arg1|
63
- normalized_string.should == arg1
64
- end
65
-
66
- ######
67
- # PHONETIZER
68
- #####
69
-
70
- word = phonetized_word = nil
71
-
72
- Given /^a word "([^\"]*)"$/ do |arg1|
73
- word = arg1
74
- end
75
-
76
- When /^I run a Phonetizer function near_match$/ do
77
- phonetized_word = Phonetizer.near_match(word)
78
- end
79
-
80
- Then /^I should receive "([^\"]*)" as a phonetic form of the word$/ do |arg1|
81
- phonetized_word.should == arg1
82
- end
83
-
84
-
85
- When /^I run a Phonetizer function near_match with an option normalize_ending$/ do
86
- phonetized_word = Phonetizer.near_match(word,true)
87
- end
88
-
89
- Then /^I should receive "([^\"]*)" as a normalized phonetic form of the word$/ do |arg1|
90
- phonetized_word.should == arg1
91
- end
92
-
@@ -1,29 +0,0 @@
1
- module CommonHelpers
2
- def in_tmp_folder(&block)
3
- FileUtils.chdir(@tmp_root, &block)
4
- end
5
-
6
- def in_project_folder(&block)
7
- project_folder = @active_project_folder || @tmp_root
8
- FileUtils.chdir(project_folder, &block)
9
- end
10
-
11
- def in_home_folder(&block)
12
- FileUtils.chdir(@home_path, &block)
13
- end
14
-
15
- def force_local_lib_override(project_name = @project_name)
16
- rakefile = File.read(File.join(project_name, 'Rakefile'))
17
- File.open(File.join(project_name, 'Rakefile'), "w+") do |f|
18
- f << "$:.unshift('#{@lib_path}')\n"
19
- f << rakefile
20
- end
21
- end
22
-
23
- def setup_active_project_folder project_name
24
- @active_project_folder = File.join(@tmp_root, project_name)
25
- @project_name = project_name
26
- end
27
- end
28
-
29
- World(CommonHelpers)
@@ -1,14 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../lib/taxamatch_rb"
2
-
3
- gem 'cucumber'
4
- require 'cucumber'
5
- gem 'rspec'
6
- require 'spec'
7
-
8
- Before do
9
- @tmp_root = File.dirname(__FILE__) + "/../../tmp"
10
- @home_path = File.expand_path(File.join(@tmp_root, "home"))
11
- FileUtils.rm_rf @tmp_root
12
- FileUtils.mkdir_p @home_path
13
- ENV['HOME'] = @home_path
14
- end
@@ -1,11 +0,0 @@
1
- module Matchers
2
- def contain(expected)
3
- simple_matcher("contain #{expected.inspect}") do |given, matcher|
4
- matcher.failure_message = "expected #{given.inspect} to contain #{expected.inspect}"
5
- matcher.negative_failure_message = "expected #{given.inspect} not to contain #{expected.inspect}"
6
- given.index expected
7
- end
8
- end
9
- end
10
-
11
- World(Matchers)
@@ -1,33 +0,0 @@
1
- Feature: Find if two scientific names are lexical variants of each other
2
-
3
- As a Biodiversity Informatician
4
- I want to be able to compare scientific names to determine if they are variants of the same name.
5
- And I want to be able to combine names that are the same into lexical groups, so they appear together in names list
6
- So I want to implement Tony Rees and Barbara Boehmer taxamatch algorithms http://bit.ly/boWyG
7
-
8
-
9
- Scenario: find edit distance between two unicode (utf8) strings
10
- Given strings "Sjostedt" and "Sojstedt", transposition block size "1", and a maximum allowed distance "4"
11
- When I run "DamerauLevenshteinMod" instance function "distance"
12
- Then I should receive edit distance "1"
13
-
14
- Scenario: find parts of a name in unicode
15
- Given a name "Arthopyrenia hyalospora (Banker) D. Hall 1988 hyalosporis Kutz 1999"
16
- When I run a Parser function parse
17
- Then I should receive "Arthopyrenia" as genus epithet, "hyalospora" as species epithet, "Banker" and "D. Hall" as species authors, "1988" as a species year
18
-
19
- Scenario: normalize a string into ASCII upcase
20
- Given a string "Choriozopella trägårdhi"
21
- When I run a Normalizer function normalize
22
- Then I should receive "CHORIOZOPELLA TRAGARDHI" as a normalized form of the string
23
-
24
- Scenario: create phonetic version of a word
25
- Given a word "bifasciata"
26
- When I run a Phonetizer function near_match
27
- Then I should receive "BIFASATA" as a phonetic form of the word
28
-
29
- Scenario: create phonetic version of a species epithet normalizing ending
30
- Given a word "bifasciatum"
31
- When I run a Phonetizer function near_match with an option normalize_ending
32
- Then I should receive "BIFASATA" as a normalized phonetic form of the word
33
-
File without changes
data/taxamatch_rb.gemspec DELETED
@@ -1,67 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- Gem::Specification.new do |s|
4
- s.name = %q{taxamatch_rb}
5
- s.version = "0.1.2"
6
-
7
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
- s.authors = ["Dmitry Mozzherin"]
9
- s.date = %q{2009-07-29}
10
- s.email = %q{dmozzherin@eol.org}
11
- s.extra_rdoc_files = [
12
- "LICENSE",
13
- "README.rdoc"
14
- ]
15
- s.files = [
16
- ".document",
17
- ".gitignore",
18
- "LICENSE",
19
- "README.rdoc",
20
- "Rakefile",
21
- "VERSION",
22
- "features/step_definitions/common_steps.rb",
23
- "features/step_definitions/taxamatch_rb.rb",
24
- "features/support/common.rb",
25
- "features/support/env.rb",
26
- "features/support/matchers.rb",
27
- "features/taxamatch_rb.feature",
28
- "lib/taxamatch_rb.rb",
29
- "lib/taxamatch_rb/authormatch.rb",
30
- "lib/taxamatch_rb/damerau_levenshtein_mod.rb",
31
- "lib/taxamatch_rb/normalizer.rb",
32
- "lib/taxamatch_rb/parser.rb",
33
- "lib/taxamatch_rb/phonetizer.rb",
34
- "spec/damerau_levenshtein_mod_test.txt",
35
- "spec/spec.opts",
36
- "spec/spec_helper.rb",
37
- "spec/taxamatch_rb_spec.rb",
38
- "spec/taxamatch_test.txt",
39
- "taxamatch_rb.gemspec"
40
- ]
41
- s.has_rdoc = true
42
- s.homepage = %q{http://github.com/dimus/taxamatch_rb}
43
- s.rdoc_options = ["--charset=UTF-8"]
44
- s.require_paths = ["lib"]
45
- s.rubygems_version = %q{1.3.1}
46
- s.summary = %q{TODO}
47
- s.test_files = [
48
- "spec/spec_helper.rb",
49
- "spec/taxamatch_rb_spec.rb"
50
- ]
51
-
52
- if s.respond_to? :specification_version then
53
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
- s.specification_version = 2
55
-
56
- if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
57
- s.add_runtime_dependency(%q<RubyInline>, [">= 0"])
58
- s.add_runtime_dependency(%q<dimus-biodiversity>, [">= 0"])
59
- else
60
- s.add_dependency(%q<RubyInline>, [">= 0"])
61
- s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
62
- end
63
- else
64
- s.add_dependency(%q<RubyInline>, [">= 0"])
65
- s.add_dependency(%q<dimus-biodiversity>, [">= 0"])
66
- end
67
- end