taxamatch_rb 0.8.7 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.7
1
+ 0.9.0
data/lib/taxamatch_rb.rb CHANGED
@@ -36,7 +36,7 @@ module Taxamatch
36
36
  result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
37
37
  result = match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
38
38
  if result && result['match']
39
- result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
39
+ result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ? false : true
40
40
  end
41
41
  return result
42
42
  end
@@ -63,12 +63,17 @@ module Taxamatch
63
63
  match_hash
64
64
  end
65
65
 
66
- def match_genera(genus1, genus2)
66
+ def match_genera(genus1, genus2, opts = {})
67
67
  genus1_length = genus1[:normalized].size
68
68
  genus2_length = genus2[:normalized].size
69
+ opts = {:with_phonetic_match => true}.merge(opts)
69
70
  min_length = [genus1_length, genus2_length].min
71
+ unless opts[:with_phonetic_match]
72
+ genus1[:phonetized] = "A"
73
+ genus2[:phonetized] = "B"
74
+ end
70
75
  match = false
71
- ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
76
+ ed = @dlm.distance(genus1[:normalized], genus2[:normalized], 1, 3) #TODO put block = 2
72
77
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.2
73
78
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
74
79
 
@@ -76,10 +81,15 @@ module Taxamatch
76
81
  {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
77
82
  end
78
83
 
79
- def match_species(sp1, sp2)
84
+ def match_species(sp1, sp2, opts = {})
80
85
  sp1_length = sp1[:normalized].size
81
86
  sp2_length = sp2[:normalized].size
87
+ opts = {:with_phonetic_match => true}.merge(opts)
82
88
  min_length = [sp1_length, sp2_length].min
89
+ unless opts[:with_phonetic_match]
90
+ sp1[:phonetized] = "A"
91
+ sp2[:phonetized] = "B"
92
+ end
83
93
  sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
84
94
  sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
85
95
  match = false
@@ -93,11 +103,25 @@ module Taxamatch
93
103
  end
94
104
 
95
105
  def match_authors(preparsed_1, preparsed_2)
96
- au1 = preparsed_1[:all_authors]
97
- au2 = preparsed_2[:all_authors]
98
- yr1 = preparsed_1[:all_years]
99
- yr2 = preparsed_2[:all_years]
100
- Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
106
+ p1 = { :normalized_authors => [], :years => [] }
107
+ p2 = { :normalized_authors => [], :years => [] }
108
+ if preparsed_1[:infraspecies] || preparsed_2[:infraspecies]
109
+ p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
110
+ p2 = preparsed_2[:infraspecies].last if preparsed_2[:infraspecies]
111
+ elsif preparsed_1[:species] || preparsed_2[:species]
112
+ p1 = preparsed_1[:species] if preparsed_1[:species]
113
+ p2 = preparsed_2[:species] if preparsed_2[:species]
114
+ elsif preparsed_1[:uninomial] && preparsed_2[:uninomial]
115
+ p1 = preparsed_1[:uninomial]
116
+ p2 = preparsed_2[:uninomial]
117
+ end
118
+ au1 = p1[:normalized_authors]
119
+ au2 = p2[:normalized_authors]
120
+ yr1 = p1[:years]
121
+ yr2 = p2[:years]
122
+ return 0 if au1.empty? || au2.empty?
123
+ score = Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
124
+ score == 0 ? -1 : 1
101
125
  end
102
126
 
103
127
  def match_matches(genus_match, species_match, infraspecies_match = nil)
@@ -76,7 +76,7 @@ module Taxamatch
76
76
  au1_length = author1.size
77
77
  au2_length = author2.size
78
78
  dlm = DamerauLevenshtein
79
- ed = dlm.distance(author1, author2,2,3) #get around a bug in C code, but it really has to be fixed
79
+ ed = dlm.distance(author1, author2,1,3) #get around a bug in C code, but it really has to be fixed
80
80
  (ed <= 3 && ([au1_length, au2_length].min > ed * 2) && (ed < 2 || author1[0] == author2[0]))
81
81
  end
82
82
 
@@ -75,6 +75,7 @@ describe 'Taxamatch::Base' do
75
75
  g1 = make_taxamatch_hash 'Plantagi'
76
76
  g2 = make_taxamatch_hash 'Plantagy'
77
77
  @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 1, 'match' => true}
78
+ @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
78
79
  #distance 1 in first letter also matches
79
80
  g1 = make_taxamatch_hash 'Xantheri'
80
81
  g2 = make_taxamatch_hash 'Pantheri'
@@ -83,6 +84,7 @@ describe 'Taxamatch::Base' do
83
84
  g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
84
85
  g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
85
86
  @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 4, 'match' => true}
87
+ @tm.match_genera(g1, g2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'edit_distance' => 4, 'match' => false}
86
88
  #same first letter and distance 2 should match
87
89
  g1 = make_taxamatch_hash 'Xaaaantherii'
88
90
  g2 = make_taxamatch_hash 'Xaaaantherrr'
@@ -106,14 +108,17 @@ describe 'Taxamatch::Base' do
106
108
  s1 = make_taxamatch_hash 'major'
107
109
  s2 = make_taxamatch_hash 'major'
108
110
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 0}
111
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 0}
109
112
  #Phonetic match always works
110
113
  s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
111
114
  s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
112
115
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 4}
116
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
113
117
  #Phonetic match works with different endings
114
118
  s1 = make_taxamatch_hash 'majorum'
115
119
  s2 = make_taxamatch_hash 'majoris'
116
120
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 2}
121
+ @tm.match_species(s1, s2, :with_phonetic_match => false).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
117
122
  #Distance 4 matches if first 3 chars are the same
118
123
  s1 = make_taxamatch_hash 'majjjjorrrrr'
119
124
  s2 = make_taxamatch_hash 'majjjjoraaaa'
@@ -155,7 +160,7 @@ describe 'Taxamatch::Base' do
155
160
  #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
156
161
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
157
162
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 2}
158
- @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>true, 'edit_distance'=>5, 'match'=>false}
163
+ @tm.match_matches(gmatch, smatch).should == {'phonetic_match' => true, 'edit_distance' => 5, 'match' => false}
159
164
  #Should not have phonetic match if one of the components does not match phonetically
160
165
  gmatch = {'match' => true, 'phonetic_match' => false, 'edit_distance' => 1}
161
166
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
@@ -173,6 +178,36 @@ describe 'Taxamatch::Base' do
173
178
  @tm.taxamatch("AJLJljljlj", "sls").should_not be_nil
174
179
  @tm.taxamatch('Olsl','a')
175
180
  end
181
+
182
+ it "should not match authors from different parts of name" do
183
+ parser = Taxamatch::Atomizer.new
184
+ t = Taxamatch::Base.new
185
+ n1 = parser.parse "Betula Linnaeus"
186
+ n2 = parser.parse "Betula alba Linnaeus"
187
+ n3 = parser.parse "Betula alba alba Linnaeus"
188
+ n4 = parser.parse "Betula alba L."
189
+ n5 = parser.parse "Betula alba"
190
+ n6 = parser.parse "Betula olba"
191
+ n7 = parser.parse "Betula alba Linnaeus alba"
192
+ n8 = parser.parse "Betula alba Linnaeus alba Smith"
193
+ n9 = parser.parse "Betula alba Smith alba L."
194
+ n10 = parser.parse "Betula Linn."
195
+ #if one authorship is empty, return 0
196
+ t.match_authors(n1, n5).should == 0
197
+ t.match_authors(n5, n1).should == 0
198
+ t.match_authors(n5, n6).should == 0
199
+ #if authorship matches on different levels ignore
200
+ t.match_authors(n7, n3).should == 0
201
+ t.match_authors(n8, n3).should == -1
202
+ t.match_authors(n2, n8).should == 0
203
+ t.match_authors(n1, n2).should == 0
204
+ # match on infraspecies level
205
+ t.match_authors(n9, n3).should == 1
206
+ # match on species level
207
+ t.match_authors(n2, n4).should == 1
208
+ # match on uninomial level
209
+ t.match_authors(n1, n10).should == 1
210
+ end
176
211
 
177
212
 
178
213
  describe 'Taxamatch::Authmatch' do
@@ -239,9 +274,8 @@ describe 'Taxamatch::Base' do
239
274
  end
240
275
 
241
276
  it 'should fuzzy match authors' do
242
- #TODO: fix the bug revealed by this test
243
- # res = @am.fuzzy_match_authors('L', 'Muller')
244
- # res.should be_false
277
+ res = @am.fuzzy_match_authors('L', 'Muller')
278
+ res.should be_false
245
279
  end
246
280
 
247
281
  end
data/taxamatch_rb.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "taxamatch_rb"
8
- s.version = "0.8.7"
8
+ s.version = "0.9.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Dmitry Mozzherin"]
12
- s.date = "2012-03-02"
12
+ s.date = "2012-03-08"
13
13
  s.description = "This gem implements algorithm for fuzzy matching scientific names developed by Tony Rees"
14
14
  s.email = "dmozzherin@eol.org"
15
15
  s.extra_rdoc_files = [
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taxamatch_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.7
4
+ version: 0.9.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-03-02 00:00:00.000000000Z
12
+ date: 2012-03-08 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: biodiversity19
16
- requirement: &70203765081460 !ruby/object:Gem::Requirement
16
+ requirement: &70230050526820 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.0.10
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70203765081460
24
+ version_requirements: *70230050526820
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: damerau-levenshtein
27
- requirement: &70203765080860 !ruby/object:Gem::Requirement
27
+ requirement: &70230050526220 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.5.4
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70203765080860
35
+ version_requirements: *70230050526220
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rake-compiler
38
- requirement: &70203765067760 !ruby/object:Gem::Requirement
38
+ requirement: &70230050513120 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70203765067760
46
+ version_requirements: *70230050513120
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rspec
49
- requirement: &70203765067160 !ruby/object:Gem::Requirement
49
+ requirement: &70230050512520 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 2.3.0
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70203765067160
57
+ version_requirements: *70230050512520
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: cucumber
60
- requirement: &70203765066560 !ruby/object:Gem::Requirement
60
+ requirement: &70230050511860 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70203765066560
68
+ version_requirements: *70230050511860
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: bundler
71
- requirement: &70203765065960 !ruby/object:Gem::Requirement
71
+ requirement: &70230050511260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.0
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70203765065960
79
+ version_requirements: *70230050511260
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: jeweler
82
- requirement: &70203765065380 !ruby/object:Gem::Requirement
82
+ requirement: &70230050510740 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: 1.6.0
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *70203765065380
90
+ version_requirements: *70230050510740
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: rcov
93
- requirement: &70203765064780 !ruby/object:Gem::Requirement
93
+ requirement: &70230050510140 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *70203765064780
101
+ version_requirements: *70230050510140
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: ruby-debug19
104
- requirement: &70203765064180 !ruby/object:Gem::Requirement
104
+ requirement: &70230050509540 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *70203765064180
112
+ version_requirements: *70230050509540
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: ruby-prof
115
- requirement: &70203765063600 !ruby/object:Gem::Requirement
115
+ requirement: &70230050508960 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,10 +120,10 @@ dependencies:
120
120
  version: '0'
121
121
  type: :development
122
122
  prerelease: false
123
- version_requirements: *70203765063600
123
+ version_requirements: *70230050508960
124
124
  - !ruby/object:Gem::Dependency
125
125
  name: shoulda
126
- requirement: &70203765063000 !ruby/object:Gem::Requirement
126
+ requirement: &70230050508360 !ruby/object:Gem::Requirement
127
127
  none: false
128
128
  requirements:
129
129
  - - ! '>='
@@ -131,10 +131,10 @@ dependencies:
131
131
  version: '0'
132
132
  type: :development
133
133
  prerelease: false
134
- version_requirements: *70203765063000
134
+ version_requirements: *70230050508360
135
135
  - !ruby/object:Gem::Dependency
136
136
  name: mocha
137
- requirement: &70203765062400 !ruby/object:Gem::Requirement
137
+ requirement: &70230050507760 !ruby/object:Gem::Requirement
138
138
  none: false
139
139
  requirements:
140
140
  - - ! '>='
@@ -142,7 +142,7 @@ dependencies:
142
142
  version: '0'
143
143
  type: :development
144
144
  prerelease: false
145
- version_requirements: *70203765062400
145
+ version_requirements: *70230050507760
146
146
  description: This gem implements algorithm for fuzzy matching scientific names developed
147
147
  by Tony Rees
148
148
  email: dmozzherin@eol.org
@@ -183,7 +183,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
183
183
  version: '0'
184
184
  segments:
185
185
  - 0
186
- hash: 2292985085106246765
186
+ hash: 1938855165172895621
187
187
  required_rubygems_version: !ruby/object:Gem::Requirement
188
188
  none: false
189
189
  requirements: