taxamatch_rb 0.8.7 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.7
1
+ 0.9.0
data/lib/taxamatch_rb.rb CHANGED
@@ -36,7 +36,7 @@ module Taxamatch
36
36
  result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
37
37
  result = match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
38
38
  if result && result['match']
39
- result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
39
+ result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ? false : true
40
40
  end
41
41
  return result
42
42
  end
@@ -63,12 +63,17 @@ module Taxamatch
63
63
  match_hash
64
64
  end
65
65
 
66
- def match_genera(genus1, genus2)
66
+ def match_genera(genus1, genus2, opts = {})
67
67
  genus1_length = genus1[:normalized].size
68
68
  genus2_length = genus2[:normalized].size
69
+ opts = {:with_phonetic_match => true}.merge(opts)
69
70
  min_length = [genus1_length, genus2_length].min
71
+ unless opts[:with_phonetic_match]
72
+ genus1[:phonetized] = "A"
73
+ genus2[:phonetized] = "B"
74
+ end
70
75
  match = false
71
- ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
76
+ ed = @dlm.distance(genus1[:normalized], genus2[:normalized], 1, 3) #TODO put block = 2
72
77
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.2
73
78
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
74
79
 
@@ -76,10 +81,15 @@ module Taxamatch
76
81
  {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
77
82
  end
78
83
 
79
- def match_species(sp1, sp2)
84
+ def match_species(sp1, sp2, opts = {})
80
85
  sp1_length = sp1[:normalized].size
81
86
  sp2_length = sp2[:normalized].size
87
+ opts = {:with_phonetic_match => true}.merge(opts)
82
88
  min_length = [sp1_length, sp2_length].min
89
+ unless opts[:with_phonetic_match]
90
+ sp1[:phonetized] = "A"
91
+ sp2[:phonetized] = "B"
92
+ end
83
93
  sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
84
94
  sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
85
95
  match = false
@@ -93,11 +103,25 @@ module Taxamatch
93
103
  end
94
104
 
95
105
  def match_authors(preparsed_1, preparsed_2)
96
- au1 = preparsed_1[:all_authors]
97
- au2 = preparsed_2[:all_authors]
98
- yr1 = preparsed_1[:all_years]
99
- yr2 = preparsed_2[:all_years]
100
- Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
106
+ p1 = { :normalized_authors => [], :years => [] }
107
+ p2 = { :normalized_authors => [], :years => [] }
108
+ if preparsed_1[:infraspecies] || preparsed_2[:infraspecies]
109
+ p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
110
+ p2 = preparsed_2[:infraspecies].last if preparsed_2[:infraspecies]
111
+ elsif preparsed_1[:species] || preparsed_2[:species]
112
+ p1 = preparsed_1[:species] if preparsed_1[:species]
113
+ p2 = preparsed_2[:species] if preparsed_2[:species]
114
+ elsif preparsed_1[:uninomial] && preparsed_2[:uninomial]
115
+ p1 = preparsed_1[:uninomial]
116
+ p2 = preparsed_2[:uninomial]
117
+ end
118
+ au1 = p1[:normalized_authors]
119
+ au2 = p2[:normalized_authors]
120
+ yr1 = p1[:years]
121
+ yr2 = p2[:years]
122
+ return 0 if au1.empty? || au2.empty?
123
+ score = Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
124
+ score == 0 ? -1 : 1
101
125
  end
102
126
 
103
127
  def match_matches(genus_match, species_match, infraspecies_match = nil)
@@ -76,7 +76,7 @@ module Taxamatch
76
76
  au1_length = author1.size
77
77
  au2_length = author2.size
78
78
  dlm = DamerauLevenshtein
79
- ed = dlm.distance(author1, author2,2,3) #get around a bug in C code, but it really has to be fixed
79
+ ed = dlm.distance(author1, author2,1,3) #get around a bug in C code, but it really has to be fixed
80
80
  (ed <= 3 && ([au1_length, au2_length].min > ed * 2) && (ed < 2 || author1[0] == author2[0]))
81
81
  end
82
82
 
@@ -75,6 +75,7 @@ describe 'Taxamatch::Base' do
75
75
  g1 = make_taxamatch_hash 'Plantagi'
76
76
  g2 = make_taxamatch_hash 'Plantagy'
77
77
  @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 1, 'match' => true}
78
+ @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
78
79
  #distance 1 in first letter also matches
79
80
  g1 = make_taxamatch_hash 'Xantheri'
80
81
  g2 = make_taxamatch_hash 'Pantheri'
@@ -83,6 +84,7 @@ describe 'Taxamatch::Base' do
83
84
  g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
84
85
  g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
85
86
  @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 4, 'match' => true}
87
+ @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 4, 'match' => false}
86
88
  #same first letter and distance 2 should match
87
89
  g1 = make_taxamatch_hash 'Xaaaantherii'
88
90
  g2 = make_taxamatch_hash 'Xaaaantherrr'
@@ -106,14 +108,17 @@ describe 'Taxamatch::Base' do
106
108
  s1 = make_taxamatch_hash 'major'
107
109
  s2 = make_taxamatch_hash 'major'
108
110
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 0}
111
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 0}
109
112
  #Phonetic match always works
110
113
  s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
111
114
  s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
112
115
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 4}
116
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
113
117
  #Phonetic match works with different endings
114
118
  s1 = make_taxamatch_hash 'majorum'
115
119
  s2 = make_taxamatch_hash 'majoris'
116
120
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 2}
121
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
117
122
  #Distance 4 matches if first 3 chars are the same
118
123
  s1 = make_taxamatch_hash 'majjjjorrrrr'
119
124
  s2 = make_taxamatch_hash 'majjjjoraaaa'
@@ -155,7 +160,7 @@ describe 'Taxamatch::Base' do
155
160
  #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
156
161
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
157
162
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
158
- @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>true, 'edit_distance'=>5, 'match'=>false}
163
+ @tm.match_matches(gmatch, smatch).should == {'phonetic_match' => true, 'edit_distance' => 5, 'match' => false}
159
164
  #Should not have phonetic match if one of the components does not match phonetically
160
165
  gmatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
161
166
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
@@ -173,6 +178,36 @@ describe 'Taxamatch::Base' do
173
178
  @tm.taxamatch("AJLJljljlj", "sls").should_not be_nil
174
179
  @tm.taxamatch('Olsl','a')
175
180
  end
181
+
182
+ it "should not match authors from different parts of name" do
183
+ parser = Taxamatch::Atomizer.new
184
+ t = Taxamatch::Base.new
185
+ n1 = parser.parse "Betula Linnaeus"
186
+ n2 = parser.parse "Betula alba Linnaeus"
187
+ n3 = parser.parse "Betula alba alba Linnaeus"
188
+ n4 = parser.parse "Betula alba L."
189
+ n5 = parser.parse "Betula alba"
190
+ n6 = parser.parse "Betula olba"
191
+ n7 = parser.parse "Betula alba Linnaeus alba"
192
+ n8 = parser.parse "Betula alba Linnaeus alba Smith"
193
+ n9 = parser.parse "Betula alba Smith alba L."
194
+ n10 = parser.parse "Betula Linn."
195
+ #if one authorship is empty, return 0
196
+ t.match_authors(n1, n5).should == 0
197
+ t.match_authors(n5, n1).should == 0
198
+ t.match_authors(n5, n6).should == 0
199
+ #if authorship matches on different levels ignore
200
+ t.match_authors(n7, n3).should == 0
201
+ t.match_authors(n8, n3).should == -1
202
+ t.match_authors(n2, n8).should == 0
203
+ t.match_authors(n1, n2).should == 0
204
+ # match on infraspecies level
205
+ t.match_authors(n9, n3).should == 1
206
+ # match on species level
207
+ t.match_authors(n2, n4).should == 1
208
+ # match on uninomial level
209
+ t.match_authors(n1, n10).should == 1
210
+ end
176
211
 
177
212
 
178
213
  describe 'Taxamatch::Authmatch' do
@@ -239,9 +274,8 @@ describe 'Taxamatch::Base' do
239
274
  end
240
275
 
241
276
  it 'should fuzzy match authors' do
242
- #TODO: fix the bug revealed by this test
243
- # res = @am.fuzzy_match_authors('L', 'Muller')
244
- # res.should be_false
277
+ res = @am.fuzzy_match_authors('L', 'Muller')
278
+ res.should be_false
245
279
  end
246
280
 
247
281
  end
data/taxamatch_rb.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "taxamatch_rb"
8
- s.version = "0.8.7"
8
+ s.version = "0.9.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Dmitry Mozzherin"]
12
- s.date = "2012-03-02"
12
+ s.date = "2012-03-08"
13
13
  s.description = "This gem implements algorithm for fuzzy matching scientific names developed by Tony Rees"
14
14
  s.email = "dmozzherin@eol.org"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxamatch_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.7
4
+ version: 0.9.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-02 00:00:00.000000000Z
12
+ date: 2012-03-08 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: biodiversity19
16
- requirement: &70203765081460 !ruby/object:Gem::Requirement
16
+ requirement: &70230050526820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.0.10
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70203765081460
24
+ version_requirements: *70230050526820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: damerau-levenshtein
27
- requirement: &70203765080860 !ruby/object:Gem::Requirement
27
+ requirement: &70230050526220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.5.4
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70203765080860
35
+ version_requirements: *70230050526220
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake-compiler
38
- requirement: &70203765067760 !ruby/object:Gem::Requirement
38
+ requirement: &70230050513120 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70203765067760
46
+ version_requirements: *70230050513120
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &70203765067160 !ruby/object:Gem::Requirement
49
+ requirement: &70230050512520 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.3.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70203765067160
57
+ version_requirements: *70230050512520
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &70203765066560 !ruby/object:Gem::Requirement
60
+ requirement: &70230050511860 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70203765066560
68
+ version_requirements: *70230050511860
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &70203765065960 !ruby/object:Gem::Requirement
71
+ requirement: &70230050511260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70203765065960
79
+ version_requirements: *70230050511260
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &70203765065380 !ruby/object:Gem::Requirement
82
+ requirement: &70230050510740 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 1.6.0
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *70203765065380
90
+ version_requirements: *70230050510740
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rcov
93
- requirement: &70203765064780 !ruby/object:Gem::Requirement
93
+ requirement: &70230050510140 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *70203765064780
101
+ version_requirements: *70230050510140
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: ruby-debug19
104
- requirement: &70203765064180 !ruby/object:Gem::Requirement
104
+ requirement: &70230050509540 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *70203765064180
112
+ version_requirements: *70230050509540
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: ruby-prof
115
- requirement: &70203765063600 !ruby/object:Gem::Requirement
115
+ requirement: &70230050508960 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,10 +120,10 @@ dependencies:
120
120
  version: '0'
121
121
  type: :development
122
122
  prerelease: false
123
- version_requirements: *70203765063600
123
+ version_requirements: *70230050508960
124
124
  - !ruby/object:Gem::Dependency
125
125
  name: shoulda
126
- requirement: &70203765063000 !ruby/object:Gem::Requirement
126
+ requirement: &70230050508360 !ruby/object:Gem::Requirement
127
127
  none: false
128
128
  requirements:
129
129
  - - ! '>='
@@ -131,10 +131,10 @@ dependencies:
131
131
  version: '0'
132
132
  type: :development
133
133
  prerelease: false
134
- version_requirements: *70203765063000
134
+ version_requirements: *70230050508360
135
135
  - !ruby/object:Gem::Dependency
136
136
  name: mocha
137
- requirement: &70203765062400 !ruby/object:Gem::Requirement
137
+ requirement: &70230050507760 !ruby/object:Gem::Requirement
138
138
  none: false
139
139
  requirements:
140
140
  - - ! '>='
@@ -142,7 +142,7 @@ dependencies:
142
142
  version: '0'
143
143
  type: :development
144
144
  prerelease: false
145
- version_requirements: *70203765062400
145
+ version_requirements: *70230050507760
146
146
  description: This gem implements algorithm for fuzzy matching scientific names developed
147
147
  by Tony Rees
148
148
  email: dmozzherin@eol.org
@@ -183,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
183
183
  version: '0'
184
184
  segments:
185
185
  - 0
186
- hash: 2292985085106246765
186
+ hash: 1938855165172895621
187
187
  required_rubygems_version: !ruby/object:Gem::Requirement
188
188
  none: false
189
189
  requirements: