taxamatch_rb 0.6.5 → 0.7.4

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock ADDED
@@ -0,0 +1,75 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ archive-tar-minitar (0.5.2)
5
+ biodiversity (0.5.16)
6
+ json
7
+ treetop
8
+ biodiversity19 (0.5.16)
9
+ json
10
+ treetop
11
+ builder (3.0.0)
12
+ columnize (0.3.3)
13
+ cucumber (1.0.0)
14
+ builder (>= 2.1.2)
15
+ diff-lcs (>= 1.1.2)
16
+ gherkin (~> 2.4.1)
17
+ json (>= 1.4.6)
18
+ term-ansicolor (>= 1.0.5)
19
+ diff-lcs (1.1.2)
20
+ gherkin (2.4.1)
21
+ json (>= 1.4.6)
22
+ git (1.2.5)
23
+ jeweler (1.6.2)
24
+ bundler (~> 1.0)
25
+ git (>= 1.2.5)
26
+ rake
27
+ json (1.5.3)
28
+ linecache19 (0.5.12)
29
+ ruby_core_source (>= 0.1.4)
30
+ mocha (0.9.12)
31
+ polyglot (0.3.1)
32
+ rake (0.9.2)
33
+ rake-compiler (0.7.9)
34
+ rake
35
+ rcov (0.9.9)
36
+ rspec (2.3.0)
37
+ rspec-core (~> 2.3.0)
38
+ rspec-expectations (~> 2.3.0)
39
+ rspec-mocks (~> 2.3.0)
40
+ rspec-core (2.3.1)
41
+ rspec-expectations (2.3.0)
42
+ diff-lcs (~> 1.1.2)
43
+ rspec-mocks (2.3.0)
44
+ ruby-debug-base19 (0.11.25)
45
+ columnize (>= 0.3.1)
46
+ linecache19 (>= 0.5.11)
47
+ ruby_core_source (>= 0.1.4)
48
+ ruby-debug19 (0.11.6)
49
+ columnize (>= 0.3.1)
50
+ linecache19 (>= 0.5.11)
51
+ ruby-debug-base19 (>= 0.11.19)
52
+ ruby-prof (0.10.7)
53
+ ruby_core_source (0.1.5)
54
+ archive-tar-minitar (>= 0.5.2)
55
+ shoulda (2.11.3)
56
+ term-ansicolor (1.0.5)
57
+ treetop (1.4.9)
58
+ polyglot (>= 0.3.1)
59
+
60
+ PLATFORMS
61
+ ruby
62
+
63
+ DEPENDENCIES
64
+ biodiversity (~> 0.5.13)
65
+ biodiversity19 (~> 0.5.13)
66
+ bundler (~> 1.0.0)
67
+ cucumber
68
+ jeweler (~> 1.6.0)
69
+ mocha
70
+ rake-compiler
71
+ rcov
72
+ rspec (~> 2.3.0)
73
+ ruby-debug19
74
+ ruby-prof
75
+ shoulda
data/README.rdoc CHANGED
@@ -14,11 +14,7 @@ Taxamatch_Rb is compatible with ruby versions 1.8.7 and 1.9.1 and higher
14
14
 
15
15
  == Installation
16
16
 
17
- sudo gem install dimus-taxamatch_rb --source http://gems.github.com
18
-
19
- or
20
- sudo gem sources -a http://gems.github.com #(you only have to do this once)
21
- sudo gem install dimus-taxamatch_rb
17
+ sudo gem install taxamatch_rb
22
18
 
23
19
  == Usage
24
20
 
@@ -0,0 +1,11 @@
1
+ # Loads mkmf which is used to make makefiles for Ruby extensions
2
+ require 'mkmf'
3
+
4
+ # Give it a name
5
+ extension_name = 'damerau_levenshtein'
6
+
7
+ # The destination
8
+ dir_config(extension_name)
9
+
10
+ # Do the work
11
+ create_makefile(extension_name)
data/lib/taxamatch_rb.rb CHANGED
@@ -8,38 +8,38 @@ require 'taxamatch_rb/normalizer'
8
8
  require 'taxamatch_rb/phonetizer'
9
9
  require 'taxamatch_rb/authmatch'
10
10
 
11
- $KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9
11
+ $KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9
12
12
 
13
13
  module Taxamatch
14
14
 
15
15
  class Base
16
-
16
+
17
17
  def initialize
18
18
  @parser = Taxamatch::Atomizer.new
19
19
  @dlm = Taxamatch::DamerauLevenshteinMod.new
20
20
  end
21
-
22
-
21
+
22
+
23
23
  #takes two scientific names and returns true if names match and false if they don't
24
- def taxamatch(str1, str2, return_boolean = true)
24
+ def taxamatch(str1, str2, return_boolean = true)
25
25
  preparsed_1 = @parser.parse(str1)
26
26
  preparsed_2 = @parser.parse(str2)
27
27
  match = taxamatch_preparsed(preparsed_1, preparsed_2) rescue nil
28
28
  return_boolean ? (!!match && match['match']) : match
29
29
  end
30
-
31
- #takes two hashes of parsed scientific names, analyses them and returns back
30
+
31
+ #takes two hashes of parsed scientific names, analyses them and returns back
32
32
  #this function is useful when species strings are preparsed.
33
33
  def taxamatch_preparsed(preparsed_1, preparsed_2)
34
34
  result = nil
35
- result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
35
+ result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
36
36
  result = match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
37
37
  if result && result['match']
38
- result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
38
+ result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
39
39
  end
40
40
  return result
41
41
  end
42
-
42
+
43
43
  def match_uninomial(preparsed_1, preparsed_2)
44
44
  match_genera(preparsed_1[:uninomial], preparsed_2[:uninomial])
45
45
  end
@@ -54,14 +54,14 @@ module Taxamatch
54
54
  match_hash = match_matches(gen_match, sp_match, infrasp_match)
55
55
  elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) || (!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
56
56
  match_hash = { 'match' => false, 'edit_distance' => 5, 'phonetic_match' => false }
57
- total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
57
+ total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
58
58
  else
59
59
  match_hash = match_matches(gen_match, sp_match)
60
60
  end
61
61
  match_hash.merge({'score' => (1 - match_hash['edit_distance']/(total_length/2))})
62
62
  match_hash
63
63
  end
64
-
64
+
65
65
  def match_genera(genus1, genus2)
66
66
  genus1_length = genus1[:normalized].size
67
67
  genus2_length = genus2[:normalized].size
@@ -69,10 +69,10 @@ module Taxamatch
69
69
  match = false
70
70
  ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
71
71
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.2
72
- return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
73
-
72
+ return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
73
+
74
74
  match = true if ed <= 3 && (min_length > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
75
- {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
75
+ {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
76
76
  end
77
77
 
78
78
  def match_species(sp1, sp2)
@@ -86,11 +86,11 @@ module Taxamatch
86
86
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.3334
87
87
  #puts 's: %s, %s, %s' % [sp1[:normalized], sp2[:normalized], ed]
88
88
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
89
-
89
+
90
90
  match = true if ed <= 4 && (min_length >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
91
91
  { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
92
92
  end
93
-
93
+
94
94
  def match_authors(preparsed_1, preparsed_2)
95
95
  au1 = preparsed_1[:all_authors]
96
96
  au2 = preparsed_2[:all_authors]
@@ -98,8 +98,8 @@ module Taxamatch
98
98
  yr2 = preparsed_2[:all_years]
99
99
  Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
100
100
  end
101
-
102
- def match_matches(genus_match, species_match, infraspecies_match = nil)
101
+
102
+ def match_matches(genus_match, species_match, infraspecies_match = nil)
103
103
  match = species_match
104
104
  if infraspecies_match
105
105
  match['edit_distance'] += infraspecies_match['edit_distance']
@@ -1,122 +1,25 @@
1
1
  # encoding: UTF-8
2
- require 'rubygems'
3
- require 'inline'
4
- require 'time'
2
+
3
+ require File.join(File.dirname(__FILE__), 'damerau_levenshtein')
4
+
5
5
  module Taxamatch
6
6
 
7
7
  class DamerauLevenshteinMod
8
+ include DamerauLevenshtein
9
+
8
10
  def distance(str1, str2, block_size=2, max_distance=10)
9
- # puts str1.unpack("U*");
10
11
  distance_utf(str1.unpack("U*"), str2.unpack("U*"), block_size, max_distance)
11
12
  end
12
-
13
- inline do |builder|
14
- builder.c "
15
- static VALUE distance_utf(VALUE _s, VALUE _t, int block_size, int max_distance){
16
- int i, i1, j, j1, k, sl, half_sl, tl, half_tl, cost, *d, distance, del, ins, subs, transp, block;
17
- int stop_execution = 0;
18
- int min = 0;
19
- int current_distance = 0;
20
-
21
- VALUE *sv = RARRAY_PTR(_s);
22
- VALUE *tv = RARRAY_PTR(_t);
23
-
24
- sl = RARRAY_LEN(_s);
25
- tl = RARRAY_LEN(_t);
26
-
27
- if (sl == 0) return INT2NUM(tl);
28
- if (tl == 0) return INT2NUM(sl);
29
- //case of lengths 1 must present or it will break further in the code
30
- if (sl == 1 && tl == 1 && sv[0] != tv[0]) return INT2NUM(1);
31
-
32
- int s[sl];
33
- int t[tl];
34
-
35
- for (i=0; i < sl; i++) s[i] = NUM2INT(sv[i]);
36
- for (i=0; i < tl; i++) t[i] = NUM2INT(tv[i]);
37
-
38
- sl++;
39
- tl++;
40
-
41
- //one-dimentional representation of 2 dimentional array len(s)+1 * len(t)+1
42
- d = malloc((sizeof(int))*(sl)*(tl));
43
- //populate 'vertical' row starting from the 2nd position (first one is filled already)
44
- for(i = 0; i < tl; i++){
45
- d[i*sl] = i;
46
- }
47
-
48
- //fill up array with scores
49
- for(i = 1; i<sl; i++){
50
- d[i] = i;
51
- if (stop_execution == 1) break;
52
- current_distance = 10000;
53
- for(j = 1; j<tl; j++){
54
-
55
- cost = 1;
56
- if(s[i-1] == t[j-1]) cost = 0;
57
-
58
- half_sl = (sl - 1)/2;
59
- half_tl = (tl - 1)/2;
60
-
61
- block = block_size < half_sl ? block_size : half_sl;
62
- block = block < half_tl ? block : half_tl;
63
-
64
- while (block >= 1){
65
- int swap1 = 1;
66
- int swap2 = 1;
67
- i1 = i - (block * 2);
68
- j1 = j - (block * 2);
69
- for (k = i1; k < i1 + block; k++) {
70
- if (s[k] != t[k + block]){
71
- swap1 = 0;
72
- break;
73
- }
74
- }
75
- for (k = j1; k < j1 + block; k++) {
76
- if (t[k] != s[k + block]){
77
- swap2 = 0;
78
- break;
79
- }
80
- }
81
-
82
- del = d[j*sl + i - 1] + 1;
83
- ins = d[(j-1)*sl + i] + 1;
84
- min = del;
85
- if (ins < min) min = ins;
86
- //if (i == 2 && j==2) return INT2NUM(swap2+5);
87
- if (i >= block && j >= block && swap1 == 1 && swap2 == 1){
88
- transp = d[(j - block * 2) * sl + i - block * 2] + cost + block -1;
89
- if (transp < min) min = transp;
90
- block = 0;
91
- } else if (block == 1) {
92
- subs = d[(j-1)*sl + i - 1] + cost;
93
- if (subs < min) min = subs;
94
- }
95
- block--;
96
- }
97
- d[j*sl+i]=min;
98
- if (current_distance > d[j*sl+i]) current_distance = d[j*sl+i];
99
- }
100
- if (current_distance > max_distance) {
101
- stop_execution = 1;
102
- }
103
- }
104
- distance=d[sl * tl - 1];
105
- if (stop_execution == 1) distance = current_distance;
106
-
107
- free(d);
108
- return INT2NUM(distance);
109
- }
110
- "
111
- end
112
13
  end
14
+
113
15
  end
114
16
 
115
17
  if __FILE__ == $0
116
- a=Taxamatch::DamerauLevenshteinMod.new
18
+
19
+ a = Taxamatch::DamerauLevenshteinMod.new
117
20
  s = 'Cedarinia scabra Sjöstedt 1921'.unpack('U*')
118
21
  t = 'Cedarinia scabra Söjstedt 1921'.unpack('U*')
119
-
22
+
120
23
  #puts s.join(",")
121
24
  #puts t.join(",")
122
25
 
@@ -133,7 +36,7 @@ if __FILE__ == $0
133
36
  puts 'utf time: ' + (Time.now - start).to_s + ' sec'
134
37
 
135
38
  #puts a.distance('Cedarinia scabra Sjöstedt 1921','Cedarinia scabra Söjstedt 1921')
136
- #puts a.distance_utf(s, t, 2, 10)
39
+ #puts a.distance_utf(s, t, 2, 10)
137
40
  #puts a.distance('tar','atp',1,10);
138
41
  puts a.distance('sub', 'usb', 1, 10);
139
42
  end
@@ -60,4 +60,4 @@ Pxxxxomus|Pomatomus|10|1|4
60
60
  Pxxxxomus|Pomatomus|2|1|3
61
61
 
62
62
  #
63
- PUNCTATA|PUNCTATA|10|1|0
63
+ PUNCTATA|PUNCTATA|10|1|0
data/spec/spec_helper.rb CHANGED
@@ -1,10 +1,4 @@
1
- begin
2
- require 'spec'
3
- rescue LoadError
4
- require 'rubygems' unless ENV['NO_RUBYGEMS']
5
- gem 'rspec'
6
- require 'spec'
7
- end
1
+ require 'rspec'
8
2
 
9
3
  $:.unshift(File.dirname(__FILE__) + '/../lib')
10
4
  require 'taxamatch_rb'
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/spec_helper.rb'
2
+ require 'spec_helper'
3
3
 
4
4
  describe 'DamerauLevenshteinMod' do
5
5
  it 'should get tests' do
@@ -7,7 +7,7 @@ describe 'DamerauLevenshteinMod' do
7
7
  dl = Taxamatch::DamerauLevenshteinMod.new
8
8
  if y
9
9
  res = dl.distance(y[0], y[1], y[3].to_i, y[2].to_i)
10
- #puts y if res != y[4].to_i
10
+ puts y if res != y[4].to_i
11
11
  res.should == y[4].to_i
12
12
  end
13
13
  end
@@ -18,17 +18,17 @@ describe 'Atomizer' do
18
18
  before(:all) do
19
19
  @parser = Taxamatch::Atomizer.new
20
20
  end
21
-
21
+
22
22
  it 'should parse uninomials' do
23
23
  @parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:string=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[], :normalized_authors=>[]}}
24
24
  @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:string=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"], :normalized_authors=>["LACORDAIRE"]}}
25
25
  end
26
-
26
+
27
27
  it 'should parse binomials' do
28
28
  @parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:string=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"], :normalized_authors=>["DOW"]}}
29
29
  end
30
-
31
- it 'should parse trinomials' do
30
+
31
+ it 'should parse trinomials' do
32
32
  @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[], :normalized_authors=>[]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
33
33
  end
34
34
  end
@@ -42,7 +42,7 @@ describe 'Taxamatch::Normalizer' do
42
42
  Taxamatch::Normalizer.normalize('Fallén').should == 'FALLEN'
43
43
  Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should == 'CHORIOZOPELLA TRAGARDHI'
44
44
  end
45
-
45
+
46
46
  it 'should normalize words' do
47
47
  Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should == 'L-3EOEPTURA'
48
48
  end
@@ -52,25 +52,25 @@ describe 'Taxamatch::Base' do
52
52
  before(:all) do
53
53
  @tm = Taxamatch::Base.new
54
54
  end
55
-
55
+
56
56
  it 'should get txt tests' do
57
57
  dl = Taxamatch::DamerauLevenshteinMod.new
58
58
  read_test_file(File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt', 4) do |y|
59
59
  if y
60
60
  y[2] = y[2] == 'true' ? true : false
61
61
  res = @tm.taxamatch(y[0], y[1], false)
62
- puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
62
+ puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
63
63
  res['match'].should == y[2]
64
64
  res['edit_distance'].should == y[3].to_i
65
65
  end
66
66
  end
67
67
  end
68
-
68
+
69
69
  it 'should work with names that cannot be parsed' do
70
70
  res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921','Quadraspidiotus ostreaeformis Curtis)')
71
71
  res = false
72
72
  end
73
-
73
+
74
74
  it 'should compare genera' do
75
75
  #edit distance 1 always match
76
76
  g1 = make_taxamatch_hash 'Plantago'
@@ -138,17 +138,17 @@ describe 'Taxamatch::Base' do
138
138
  #Should not match if Distance 2 or 3 and first 1 char is not the same
139
139
  s1 = make_taxamatch_hash 'morrrr'
140
140
  s2 = make_taxamatch_hash 'torraa'
141
- @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
141
+ @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
142
142
  #Distance 1 will match anywhere
143
143
  s1 = make_taxamatch_hash 'major'
144
144
  s2 = make_taxamatch_hash 'rajor'
145
- @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
145
+ @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
146
146
  #Will not match if distance 3 and length is less then twice of the edit distance
147
147
  s1 = make_taxamatch_hash 'marrr'
148
148
  s2 = make_taxamatch_hash 'maaaa'
149
149
  @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
150
150
  end
151
-
151
+
152
152
  it 'should match matches' do
153
153
  #No trobule case
154
154
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
@@ -159,7 +159,7 @@ describe 'Taxamatch::Base' do
159
159
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
160
160
  @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
161
161
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
162
- smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
162
+ smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
163
163
  @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
164
164
  #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
165
165
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
@@ -188,7 +188,7 @@ describe 'Taxamatch::Base' do
188
188
  before(:all) do
189
189
  @am = Taxamatch::Authmatch
190
190
  end
191
-
191
+
192
192
  it 'should calculate score' do
193
193
  res = @am.authmatch(['Linnaeus', 'Muller'], ['L'], [], [1788])
194
194
  res.should == 90
@@ -219,22 +219,22 @@ describe 'Taxamatch::Base' do
219
219
  res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'], ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
220
220
  res.should == 0
221
221
  end
222
-
222
+
223
223
  it 'should compare years' do
224
224
  @am.compare_years([1882],[1880]).should == 2
225
225
  @am.compare_years([1882],[]).should == nil
226
226
  @am.compare_years([],[]).should == 0
227
227
  @am.compare_years([1788,1798], [1788,1798]).should be_nil
228
228
  end
229
-
230
- it 'should remove duplicate authors' do
229
+
230
+ it 'should remove duplicate authors' do
231
231
  #Li submatches Linnaeus and it its size 3 is big enought to remove Linnaeus
232
232
  #Muller is identical
233
233
  res = @am.remove_duplicate_authors(['Lin', 'Muller'], ['Linnaeus', 'Muller'])
234
234
  res.should == [[], []]
235
235
  #same in different order
236
236
  res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'], ['Linn', 'Muller'])
237
- res.should == [[], []]
237
+ res.should == [[], []]
238
238
  #auth Li submatches Linnaeus, but Li size less then 3 required to remove Linnaeus
239
239
  res = @am.remove_duplicate_authors(['Dem', 'Li'], ['Linnaeus', 'Stepanov'])
240
240
  res.should == [["Dem"], ["Linnaeus", "Stepanov"]]
@@ -252,7 +252,7 @@ describe 'Taxamatch::Base' do
252
252
  # res = @am.fuzzy_match_authors('L', 'Muller')
253
253
  # res.should be_false
254
254
  end
255
-
255
+
256
256
  end
257
257
 
258
258
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 6
8
- - 5
9
- version: 0.6.5
7
+ - 7
8
+ - 4
9
+ version: 0.7.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Dmitry Mozzherin
@@ -14,13 +14,42 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-07 00:00:00 -04:00
17
+ date: 2011-06-23 00:00:00 -04:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: RubyInline
22
- prerelease: false
21
+ name: biodiversity
23
22
  requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ - 5
30
+ - 13
31
+ version: 0.5.13
32
+ type: :runtime
33
+ prerelease: false
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: biodiversity19
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 0
44
+ - 5
45
+ - 13
46
+ version: 0.5.13
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: rake-compiler
52
+ requirement: &id003 !ruby/object:Gem::Requirement
24
53
  none: false
25
54
  requirements:
26
55
  - - ">="
@@ -29,11 +58,134 @@ dependencies:
29
58
  - 0
30
59
  version: "0"
31
60
  type: :runtime
32
- version_requirements: *id001
61
+ prerelease: false
62
+ version_requirements: *id003
33
63
  - !ruby/object:Gem::Dependency
34
- name: biodiversity
64
+ name: rspec
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ segments:
71
+ - 2
72
+ - 3
73
+ - 0
74
+ version: 2.3.0
75
+ type: :development
35
76
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: cucumber
80
+ requirement: &id005 !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *id005
91
+ - !ruby/object:Gem::Dependency
92
+ name: bundler
93
+ requirement: &id006 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ~>
97
+ - !ruby/object:Gem::Version
98
+ segments:
99
+ - 1
100
+ - 0
101
+ - 0
102
+ version: 1.0.0
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: *id006
106
+ - !ruby/object:Gem::Dependency
107
+ name: jeweler
108
+ requirement: &id007 !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ~>
112
+ - !ruby/object:Gem::Version
113
+ segments:
114
+ - 1
115
+ - 6
116
+ - 0
117
+ version: 1.6.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: *id007
121
+ - !ruby/object:Gem::Dependency
122
+ name: rcov
123
+ requirement: &id008 !ruby/object:Gem::Requirement
124
+ none: false
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ segments:
129
+ - 0
130
+ version: "0"
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: *id008
134
+ - !ruby/object:Gem::Dependency
135
+ name: ruby-debug19
136
+ requirement: &id009 !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ segments:
142
+ - 0
143
+ version: "0"
144
+ type: :development
145
+ prerelease: false
146
+ version_requirements: *id009
147
+ - !ruby/object:Gem::Dependency
148
+ name: ruby-prof
149
+ requirement: &id010 !ruby/object:Gem::Requirement
150
+ none: false
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ segments:
155
+ - 0
156
+ version: "0"
157
+ type: :development
158
+ prerelease: false
159
+ version_requirements: *id010
160
+ - !ruby/object:Gem::Dependency
161
+ name: shoulda
162
+ requirement: &id011 !ruby/object:Gem::Requirement
163
+ none: false
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ segments:
168
+ - 0
169
+ version: "0"
170
+ type: :development
171
+ prerelease: false
172
+ version_requirements: *id011
173
+ - !ruby/object:Gem::Dependency
174
+ name: mocha
175
+ requirement: &id012 !ruby/object:Gem::Requirement
176
+ none: false
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ segments:
181
+ - 0
182
+ version: "0"
183
+ type: :development
184
+ prerelease: false
185
+ version_requirements: *id012
186
+ - !ruby/object:Gem::Dependency
187
+ name: biodiversity
188
+ requirement: &id013 !ruby/object:Gem::Requirement
37
189
  none: false
38
190
  requirements:
39
191
  - - ">="
@@ -44,21 +196,37 @@ dependencies:
44
196
  - 13
45
197
  version: 0.5.13
46
198
  type: :runtime
47
- version_requirements: *id002
199
+ prerelease: false
200
+ version_requirements: *id013
201
+ - !ruby/object:Gem::Dependency
202
+ name: rake-compiler
203
+ requirement: &id014 !ruby/object:Gem::Requirement
204
+ none: false
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ segments:
209
+ - 0
210
+ version: "0"
211
+ type: :runtime
212
+ prerelease: false
213
+ version_requirements: *id014
48
214
  description: This gem implements algorithm for fuzzy matching scientific names developed by Tony Rees
49
215
  email: dmozzherin@eol.org
50
216
  executables: []
51
217
 
52
- extensions: []
53
-
218
+ extensions:
219
+ - ext/damerau_levenshtein/extconf.rb
54
220
  extra_rdoc_files:
55
221
  - LICENSE
56
222
  - README.rdoc
57
223
  files:
224
+ - Gemfile.lock
58
225
  - README.rdoc
59
226
  - lib/taxamatch_rb.rb
60
227
  - lib/taxamatch_rb/atomizer.rb
61
228
  - lib/taxamatch_rb/authmatch.rb
229
+ - lib/taxamatch_rb/damerau_levenshtein.bundle
62
230
  - lib/taxamatch_rb/damerau_levenshtein_mod.rb
63
231
  - lib/taxamatch_rb/normalizer.rb
64
232
  - lib/taxamatch_rb/phonetizer.rb
@@ -68,13 +236,14 @@ files:
68
236
  - spec/taxamatch_rb_spec.rb
69
237
  - spec/taxamatch_test.txt
70
238
  - LICENSE
239
+ - ext/damerau_levenshtein/extconf.rb
71
240
  has_rdoc: true
72
241
  homepage: http://github.com/GlobalNamesArchitecture/taxamatch_rb
73
242
  licenses: []
74
243
 
75
244
  post_install_message:
76
- rdoc_options:
77
- - --charset=UTF-8
245
+ rdoc_options: []
246
+
78
247
  require_paths:
79
248
  - lib
80
249
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -82,6 +251,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
82
251
  requirements:
83
252
  - - ">="
84
253
  - !ruby/object:Gem::Version
254
+ hash: -2865757795593253659
85
255
  segments:
86
256
  - 0
87
257
  version: "0"
@@ -100,6 +270,5 @@ rubygems_version: 1.3.7
100
270
  signing_key:
101
271
  specification_version: 3
102
272
  summary: Implementation of Tony Rees Taxamatch algorithms
103
- test_files:
104
- - spec/spec_helper.rb
105
- - spec/taxamatch_rb_spec.rb
273
+ test_files: []
274
+