dimus-taxamatch_rb 0.5.3 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/lib/taxamatch_rb.rb CHANGED
@@ -21,10 +21,11 @@ module Taxamatch
21
21
 
22
22
 
23
23
  #takes two scientific names and returns true if names match and false if they don't
24
- def taxamatch(str1, str2)
24
+ def taxamatch(str1, str2, return_boolean = true)
25
25
  preparsed_1 = @parser.parse(str1)
26
26
  preparsed_2 = @parser.parse(str2)
27
- taxamatch_preparsed(preparsed_1, preparsed_2)['match'] rescue false
27
+ match = taxamatch_preparsed(preparsed_1, preparsed_2) rescue nil
28
+ return_boolean && match ? match['match'] : match
28
29
  end
29
30
 
30
31
  #takes two hashes of parsed scientific names, analyses them and returns back
@@ -34,7 +35,7 @@ module Taxamatch
34
35
  result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
35
36
  result = match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
36
37
  if result && result['match']
37
- result['match'] = false if match_authors(preparsed_1, preparsed_2) == 0
38
+ result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
38
39
  end
39
40
  return result
40
41
  end
@@ -46,17 +47,16 @@ module Taxamatch
46
47
  def match_multinomial(preparsed_1, preparsed_2)
47
48
  gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
48
49
  sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
49
- au_match = match_authors(preparsed_1, preparsed_2)
50
50
  total_length = preparsed_1[:genus][:epitheton].size + preparsed_2[:genus][:epitheton].size + preparsed_1[:species][:epitheton].size + preparsed_2[:species][:epitheton].size
51
51
  match = match_matches(gen_match, sp_match)
52
- match.merge({'score' => (1- match['edit_distance']/(total_length/2))})
52
+ match.merge({'score' => (1 - match['edit_distance']/(total_length/2))})
53
53
  end
54
54
 
55
55
  def match_genera(genus1, genus2)
56
56
  genus1_length = genus1[:normalized].size
57
57
  genus2_length = genus2[:normalized].size
58
58
  match = false
59
- ed = @dlm.distance(genus1[:normalized], genus2[:normalized],2,3)
59
+ ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
60
60
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
61
61
 
62
62
  match = true if ed <= 3 && ([genus1_length, genus2_length].min > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
@@ -69,7 +69,8 @@ module Taxamatch
69
69
  sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
70
70
  sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
71
71
  match = false
72
- ed = @dlm.distance(sp1[:normalized], sp2[:normalized], 4, 4)
72
+ ed = @dlm.distance(sp1[:normalized], sp2[:normalized], 1, 4) #TODO put block 4
73
+ #puts 's: %s, %s, %s' % [sp1[:normalized], sp2[:normalized], ed]
73
74
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
74
75
 
75
76
  match = true if ed <= 4 && ([sp1_length, sp2_length].min >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
@@ -58,3 +58,6 @@ trimerophyton|mertriophyton|10|3|3
58
58
  #it should stop trying if distance exceeds maximum allowed distance
59
59
  Pxxxxomus|Pomatomus|10|1|4
60
60
  Pxxxxomus|Pomatomus|2|1|3
61
+
62
+ #
63
+ PUNCTATA|PUNCTATA|10|1|0
@@ -55,12 +55,13 @@ describe 'Taxamatch::Base' do
55
55
 
56
56
  it 'should get txt tests' do
57
57
  dl = Taxamatch::DamerauLevenshteinMod.new
58
- read_test_file(File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt', 3) do |y|
58
+ read_test_file(File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt', 4) do |y|
59
59
  if y
60
60
  y[2] = y[2] == 'true' ? true : false
61
- res = @tm.taxamatch(y[0], y[1])
62
- puts "%s, %s, %s" % [y[0], y[1], y[2]] if res != y[2]
63
- res.should == y[2]
61
+ res = @tm.taxamatch(y[0], y[1], false)
62
+ #puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]] if res != y[2]
63
+ res['match'].should == y[2]
64
+ res['edit_distance'].should == y[3].to_i
64
65
  end
65
66
  end
66
67
  end
@@ -1,28 +1,34 @@
1
1
  ###
2
2
  #
3
3
  # Tests for string comparison by taxamatch algorithm
4
+ # name1|name2|match|edit_distance
4
5
  #
5
6
  ##
6
7
 
7
- # additional authorship should match
8
- Puma concolor|Puma concolor L.|true
9
-
10
- # one-letter misspeling in species epithet should match
11
- Puma concolor|Puma cancolor|true
12
-
13
- Pomatomus saltatrix|Pomatomus saltratix|true
14
- Pomatomus saltator|Pomatomus saltatrix|true
15
-
16
- Loligo pealeii|Loligo plei|false
17
-
18
- # different authors should not match
19
- Puma concolor Linnaeus|Puma concolor Kurtz|false
8
+ ## additional authorship should match
9
+ Puma concolor|Puma concolor L.|true|0
10
+ #
11
+ ## one-letter misspeling in species epithet should match
12
+ Puma concolor|Puma cancolor|true|1
13
+ #
14
+ Pomatomus saltatrix|Pomatomus saltratix|true|2
15
+ Pomatomus saltator|Pomatomus saltatrix|true|3
16
+ #
17
+ Loligo pealeii|Loligo plei|false|3
18
+ #
19
+ ## different authors should not match
20
+ Puma concolor Linnaeus|Puma concolor Kurtz|false|0
21
+ #
22
+ ##real life examples
23
+ Biatora borealis|Bactra borealis Diakonoff 1964|false|3
24
+ #
25
+ Homo sapien|Homo sapiens|true|1
26
+ Homo sapiens Linnaeus|Homo sapens (Linn. 1758) |true|1
27
+ Homo sapiens Mozzherin|Homo sapiens Linneaus|false|0
28
+ #
29
+ Quinqueloculina punctata|Quinqueloculina punctata d'Orbigny 1905|true|0
30
+ Pomatomus saltator (Linnaeus, 1766)|Pomatomus saltatrix (Linnaeus, 1766)|true|0|3
20
31
 
21
- #real life examples
22
- Biatora borealis|Bactra borealis Diakonoff 1964|false
23
32
 
24
- Homo sapien|Homo sapiens|true
25
- Homo sapiens Linnaeus|Homo sapens (Linn. 1758) |true
26
- Homo sapiens Mozzherin|Homo sapiens Linneaus|false
27
33
 
28
34
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-taxamatch_rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.3
4
+ version: 0.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-08-09 00:00:00 -07:00
12
+ date: 2009-08-16 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: "0"
34
34
  version:
35
- description:
35
+ description: This gem implements algorithsm for fuzzy matching scientific names developed by Tony Rees
36
36
  email: dmozzherin@eol.org
37
37
  executables: []
38
38
 
@@ -55,7 +55,7 @@ files:
55
55
  - spec/taxamatch_rb_spec.rb
56
56
  - spec/taxamatch_test.txt
57
57
  - LICENSE
58
- has_rdoc: true
58
+ has_rdoc: false
59
59
  homepage: http://github.com/dimus/taxamatch_rb
60
60
  licenses:
61
61
  post_install_message:
@@ -80,8 +80,8 @@ requirements: []
80
80
  rubyforge_project:
81
81
  rubygems_version: 1.3.5
82
82
  signing_key:
83
- specification_version: 2
84
- summary: TODO
83
+ specification_version: 3
84
+ summary: Implementation of Tony Rees Taxamatch algorithms
85
85
  test_files:
86
86
  - spec/spec_helper.rb
87
87
  - spec/taxamatch_rb_spec.rb