taxamatch_rb 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/taxamatch_rb.rb CHANGED
@@ -7,7 +7,6 @@ require 'taxamatch_rb/atomizer'
7
7
  require 'taxamatch_rb/normalizer'
8
8
  require 'taxamatch_rb/phonetizer'
9
9
  require 'taxamatch_rb/authmatch'
10
- require 'ruby-debug'
11
10
 
12
11
  $KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9
13
12
 
@@ -66,27 +65,29 @@ module Taxamatch
66
65
  def match_genera(genus1, genus2)
67
66
  genus1_length = genus1[:normalized].size
68
67
  genus2_length = genus2[:normalized].size
68
+ min_length = [genus1_length, genus2_length].min
69
69
  match = false
70
70
  ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
71
- return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/[genus1_length, genus2_length].min > 0.2
71
+ return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.2
72
72
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
73
73
 
74
- match = true if ed <= 3 && ([genus1_length, genus2_length].min > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
74
+ match = true if ed <= 3 && (min_length > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
75
75
  {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
76
76
  end
77
77
 
78
78
  def match_species(sp1, sp2)
79
79
  sp1_length = sp1[:normalized].size
80
80
  sp2_length = sp2[:normalized].size
81
+ min_length = [sp1_length, sp2_length].min
81
82
  sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
82
83
  sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
83
84
  match = false
84
85
  ed = @dlm.distance(sp1[:normalized], sp2[:normalized], 1, 4) #TODO put block 4
85
- return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/[sp1_length, sp2_length].min > 0.3334
86
+ return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.3334
86
87
  #puts 's: %s, %s, %s' % [sp1[:normalized], sp2[:normalized], ed]
87
88
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
88
89
 
89
- match = true if ed <= 4 && ([sp1_length, sp2_length].min >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
90
+ match = true if ed <= 4 && (min_length >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
90
91
  { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
91
92
  end
92
93
 
@@ -59,7 +59,7 @@ describe 'Taxamatch::Base' do
59
59
  if y
60
60
  y[2] = y[2] == 'true' ? true : false
61
61
  res = @tm.taxamatch(y[0], y[1], false)
62
- #puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
62
+ puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
63
63
  res['match'].should == y[2]
64
64
  res['edit_distance'].should == y[3].to_i
65
65
  end
@@ -89,16 +89,16 @@ describe 'Taxamatch::Base' do
89
89
  g2 = make_taxamatch_hash 'Pantheri'
90
90
  @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'edit_distance' => 1, 'match' => true}
91
91
  #phonetic match tramps everything
92
- g1 = make_taxamatch_hash 'Xantheriiiiiiiiiiiiiii'
93
- g2 = make_taxamatch_hash 'Zanthery'
92
+ g1 = make_taxamatch_hash 'Xaaaaantheriiiiiiiiiiiiiii'
93
+ g2 = make_taxamatch_hash 'Zaaaaaaaaaaaantheryyyyyyyy'
94
94
  @tm.match_genera(g1, g2).should == {'phonetic_match' => true, 'edit_distance' => 4, 'match' => true}
95
95
  #same first letter and distance 2 should match
96
- g1 = make_taxamatch_hash 'Xantherii'
97
- g2 = make_taxamatch_hash 'Xantherrr'
96
+ g1 = make_taxamatch_hash 'Xaaaantherii'
97
+ g2 = make_taxamatch_hash 'Xaaaantherrr'
98
98
  @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 2}
99
99
  #First letter is the same and distance is 3 should match, no phonetic match
100
- g1 = make_taxamatch_hash 'Xantheriii'
101
- g2 = make_taxamatch_hash 'Xantherrrr'
100
+ g1 = make_taxamatch_hash 'Xaaaaaaaaaaantheriii'
101
+ g2 = make_taxamatch_hash 'Xaaaaaaaaaaantherrrr'
102
102
  @tm.match_genera(g1, g2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
103
103
  #Should not match if one of words is shorter than 2x edit distance and distance is 2 or 3
104
104
  g1 = make_taxamatch_hash 'Xant'
@@ -116,24 +116,24 @@ describe 'Taxamatch::Base' do
116
116
  s2 = make_taxamatch_hash 'major'
117
117
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 0}
118
118
  #Phonetic match always works
119
- s1 = make_taxamatch_hash 'xanteriiiiiiii'
120
- s2 = make_taxamatch_hash 'zantereeeeeeee'
121
- @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 5}
119
+ s1 = make_taxamatch_hash 'xanteriiieeeeeeeeeeeee'
120
+ s2 = make_taxamatch_hash 'zantereeeeeeeeeeeeeeee'
121
+ @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 4}
122
122
  #Phonetic match works with different endings
123
123
  s1 = make_taxamatch_hash 'majorum'
124
124
  s2 = make_taxamatch_hash 'majoris'
125
125
  @tm.match_species(s1, s2).should == {'phonetic_match' => true, 'match' => true, 'edit_distance' => 2}
126
126
  #Distance 4 matches if first 3 chars are the same
127
- s1 = make_taxamatch_hash 'majorrrrr'
128
- s2 = make_taxamatch_hash 'majoraaaa'
127
+ s1 = make_taxamatch_hash 'majjjjorrrrr'
128
+ s2 = make_taxamatch_hash 'majjjjoraaaa'
129
129
  @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 4}
130
130
  #Should not match if Distance 4 matches and first 3 chars are not the same
131
131
  s1 = make_taxamatch_hash 'majorrrrr'
132
132
  s2 = make_taxamatch_hash 'marorraaa'
133
133
  @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 4}
134
134
  #Distance 2 or 3 matches if first 1 char is the same
135
- s1 = make_taxamatch_hash 'morrrr'
136
- s2 = make_taxamatch_hash 'moraaa'
135
+ s1 = make_taxamatch_hash 'moooorrrr'
136
+ s2 = make_taxamatch_hash 'mooooraaa'
137
137
  @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 3}
138
138
  #Should not match if Distance 2 or 3 and first 1 char is not the same
139
139
  s1 = make_taxamatch_hash 'morrrr'
@@ -9,6 +9,7 @@ Pomatomus|Pomatomas|true|1
9
9
  Pomatomus L.|Pomatomas Linn.|true|1
10
10
  Pomatomus Ber|Pomatomas Linn|false|1
11
11
  Pomatomus L. 1753|Pomatomus Linn. 1800|false|0
12
+ Patella|Abbella|false|3
12
13
 
13
14
  ## additional authorship should match
14
15
  Puma concolor|Puma concolor L.|true|0
@@ -17,7 +18,7 @@ Puma concolor|Puma concolor L.|true|0
17
18
  Puma concolor|Puma cancolor|true|1
18
19
  #
19
20
  Pomatomus saltatrix|Pomatomus saltratix|true|2
20
- Pomatomus saltator|Pomatomus saltatrix|true|3
21
+ Pomatomus saltator|Pomatomus saltatrix|false|3 #!!!
21
22
  #
22
23
  Loligo pealeii|Loligo plei|false|3
23
24
  #
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 6
8
- - 3
9
- version: 0.6.3
8
+ - 4
9
+ version: 0.6.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Dmitry Mozzherin
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-03-30 00:00:00 -04:00
17
+ date: 2010-04-08 00:00:00 -04:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency