taxamatch_rb 0.6.5 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock ADDED
@@ -0,0 +1,75 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ archive-tar-minitar (0.5.2)
5
+ biodiversity (0.5.16)
6
+ json
7
+ treetop
8
+ biodiversity19 (0.5.16)
9
+ json
10
+ treetop
11
+ builder (3.0.0)
12
+ columnize (0.3.3)
13
+ cucumber (1.0.0)
14
+ builder (>= 2.1.2)
15
+ diff-lcs (>= 1.1.2)
16
+ gherkin (~> 2.4.1)
17
+ json (>= 1.4.6)
18
+ term-ansicolor (>= 1.0.5)
19
+ diff-lcs (1.1.2)
20
+ gherkin (2.4.1)
21
+ json (>= 1.4.6)
22
+ git (1.2.5)
23
+ jeweler (1.6.2)
24
+ bundler (~> 1.0)
25
+ git (>= 1.2.5)
26
+ rake
27
+ json (1.5.3)
28
+ linecache19 (0.5.12)
29
+ ruby_core_source (>= 0.1.4)
30
+ mocha (0.9.12)
31
+ polyglot (0.3.1)
32
+ rake (0.9.2)
33
+ rake-compiler (0.7.9)
34
+ rake
35
+ rcov (0.9.9)
36
+ rspec (2.3.0)
37
+ rspec-core (~> 2.3.0)
38
+ rspec-expectations (~> 2.3.0)
39
+ rspec-mocks (~> 2.3.0)
40
+ rspec-core (2.3.1)
41
+ rspec-expectations (2.3.0)
42
+ diff-lcs (~> 1.1.2)
43
+ rspec-mocks (2.3.0)
44
+ ruby-debug-base19 (0.11.25)
45
+ columnize (>= 0.3.1)
46
+ linecache19 (>= 0.5.11)
47
+ ruby_core_source (>= 0.1.4)
48
+ ruby-debug19 (0.11.6)
49
+ columnize (>= 0.3.1)
50
+ linecache19 (>= 0.5.11)
51
+ ruby-debug-base19 (>= 0.11.19)
52
+ ruby-prof (0.10.7)
53
+ ruby_core_source (0.1.5)
54
+ archive-tar-minitar (>= 0.5.2)
55
+ shoulda (2.11.3)
56
+ term-ansicolor (1.0.5)
57
+ treetop (1.4.9)
58
+ polyglot (>= 0.3.1)
59
+
60
+ PLATFORMS
61
+ ruby
62
+
63
+ DEPENDENCIES
64
+ biodiversity (~> 0.5.13)
65
+ biodiversity19 (~> 0.5.13)
66
+ bundler (~> 1.0.0)
67
+ cucumber
68
+ jeweler (~> 1.6.0)
69
+ mocha
70
+ rake-compiler
71
+ rcov
72
+ rspec (~> 2.3.0)
73
+ ruby-debug19
74
+ ruby-prof
75
+ shoulda
data/README.rdoc CHANGED
@@ -14,11 +14,7 @@ Taxamatch_Rb is compatible with ruby versions 1.8.7 and 1.9.1 and higher
14
14
 
15
15
  == Installation
16
16
 
17
- sudo gem install dimus-taxamatch_rb --source http://gems.github.com
18
-
19
- or
20
- sudo gem sources -a http://gems.github.com #(you only have to do this once)
21
- sudo gem install dimus-taxamatch_rb
17
+ sudo gem install taxamatch_rb
22
18
 
23
19
  == Usage
24
20
 
@@ -0,0 +1,11 @@
1
+ # Loads mkmf which is used to make makefiles for Ruby extensions
2
+ require 'mkmf'
3
+
4
+ # Give it a name
5
+ extension_name = 'damerau_levenshtein'
6
+
7
+ # The destination
8
+ dir_config(extension_name)
9
+
10
+ # Do the work
11
+ create_makefile(extension_name)
data/lib/taxamatch_rb.rb CHANGED
@@ -8,38 +8,38 @@ require 'taxamatch_rb/normalizer'
8
8
  require 'taxamatch_rb/phonetizer'
9
9
  require 'taxamatch_rb/authmatch'
10
10
 
11
- $KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9
11
+ $KCODE='u' if RUBY_VERSION.split('.')[1].to_i < 9
12
12
 
13
13
  module Taxamatch
14
14
 
15
15
  class Base
16
-
16
+
17
17
  def initialize
18
18
  @parser = Taxamatch::Atomizer.new
19
19
  @dlm = Taxamatch::DamerauLevenshteinMod.new
20
20
  end
21
-
22
-
21
+
22
+
23
23
  #takes two scientific names and returns true if names match and false if they don't
24
- def taxamatch(str1, str2, return_boolean = true)
24
+ def taxamatch(str1, str2, return_boolean = true)
25
25
  preparsed_1 = @parser.parse(str1)
26
26
  preparsed_2 = @parser.parse(str2)
27
27
  match = taxamatch_preparsed(preparsed_1, preparsed_2) rescue nil
28
28
  return_boolean ? (!!match && match['match']) : match
29
29
  end
30
-
31
- #takes two hashes of parsed scientific names, analyses them and returns back
30
+
31
+ #takes two hashes of parsed scientific names, analyses them and returns back
32
32
  #this function is useful when species strings are preparsed.
33
33
  def taxamatch_preparsed(preparsed_1, preparsed_2)
34
34
  result = nil
35
- result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
35
+ result = match_uninomial(preparsed_1, preparsed_2) if preparsed_1[:uninomial] && preparsed_2[:uninomial]
36
36
  result = match_multinomial(preparsed_1, preparsed_2) if preparsed_1[:genus] && preparsed_2[:genus]
37
37
  if result && result['match']
38
- result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
38
+ result['match'] = match_authors(preparsed_1, preparsed_2) == 0 ? false : true
39
39
  end
40
40
  return result
41
41
  end
42
-
42
+
43
43
  def match_uninomial(preparsed_1, preparsed_2)
44
44
  match_genera(preparsed_1[:uninomial], preparsed_2[:uninomial])
45
45
  end
@@ -54,14 +54,14 @@ module Taxamatch
54
54
  match_hash = match_matches(gen_match, sp_match, infrasp_match)
55
55
  elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) || (!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
56
56
  match_hash = { 'match' => false, 'edit_distance' => 5, 'phonetic_match' => false }
57
- total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
57
+ total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
58
58
  else
59
59
  match_hash = match_matches(gen_match, sp_match)
60
60
  end
61
61
  match_hash.merge({'score' => (1 - match_hash['edit_distance']/(total_length/2))})
62
62
  match_hash
63
63
  end
64
-
64
+
65
65
  def match_genera(genus1, genus2)
66
66
  genus1_length = genus1[:normalized].size
67
67
  genus2_length = genus2[:normalized].size
@@ -69,10 +69,10 @@ module Taxamatch
69
69
  match = false
70
70
  ed = @dlm.distance(genus1[:normalized], genus2[:normalized],1,3) #TODO put block = 2
71
71
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.2
72
- return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
73
-
72
+ return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if genus1[:phonetized] == genus2[:phonetized]
73
+
74
74
  match = true if ed <= 3 && (min_length > ed * 2) && (ed < 2 || genus1[0] == genus2[0])
75
- {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
75
+ {'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
76
76
  end
77
77
 
78
78
  def match_species(sp1, sp2)
@@ -86,11 +86,11 @@ module Taxamatch
86
86
  return {'edit_distance' => ed, 'phonetic_match' => false, 'match' => false} if ed/min_length.to_f > 0.3334
87
87
  #puts 's: %s, %s, %s' % [sp1[:normalized], sp2[:normalized], ed]
88
88
  return {'edit_distance' => ed, 'phonetic_match' => true, 'match' => true} if sp1[:phonetized] == sp2[:phonetized]
89
-
89
+
90
90
  match = true if ed <= 4 && (min_length >= ed * 2) && (ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) && (ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
91
91
  { 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false}
92
92
  end
93
-
93
+
94
94
  def match_authors(preparsed_1, preparsed_2)
95
95
  au1 = preparsed_1[:all_authors]
96
96
  au2 = preparsed_2[:all_authors]
@@ -98,8 +98,8 @@ module Taxamatch
98
98
  yr2 = preparsed_2[:all_years]
99
99
  Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
100
100
  end
101
-
102
- def match_matches(genus_match, species_match, infraspecies_match = nil)
101
+
102
+ def match_matches(genus_match, species_match, infraspecies_match = nil)
103
103
  match = species_match
104
104
  if infraspecies_match
105
105
  match['edit_distance'] += infraspecies_match['edit_distance']
@@ -1,122 +1,25 @@
1
1
  # encoding: UTF-8
2
- require 'rubygems'
3
- require 'inline'
4
- require 'time'
2
+
3
+ require File.join(File.dirname(__FILE__), 'damerau_levenshtein')
4
+
5
5
  module Taxamatch
6
6
 
7
7
  class DamerauLevenshteinMod
8
+ include DamerauLevenshtein
9
+
8
10
  def distance(str1, str2, block_size=2, max_distance=10)
9
- # puts str1.unpack("U*");
10
11
  distance_utf(str1.unpack("U*"), str2.unpack("U*"), block_size, max_distance)
11
12
  end
12
-
13
- inline do |builder|
14
- builder.c "
15
- static VALUE distance_utf(VALUE _s, VALUE _t, int block_size, int max_distance){
16
- int i, i1, j, j1, k, sl, half_sl, tl, half_tl, cost, *d, distance, del, ins, subs, transp, block;
17
- int stop_execution = 0;
18
- int min = 0;
19
- int current_distance = 0;
20
-
21
- VALUE *sv = RARRAY_PTR(_s);
22
- VALUE *tv = RARRAY_PTR(_t);
23
-
24
- sl = RARRAY_LEN(_s);
25
- tl = RARRAY_LEN(_t);
26
-
27
- if (sl == 0) return INT2NUM(tl);
28
- if (tl == 0) return INT2NUM(sl);
29
- //case of lengths 1 must present or it will break further in the code
30
- if (sl == 1 && tl == 1 && sv[0] != tv[0]) return INT2NUM(1);
31
-
32
- int s[sl];
33
- int t[tl];
34
-
35
- for (i=0; i < sl; i++) s[i] = NUM2INT(sv[i]);
36
- for (i=0; i < tl; i++) t[i] = NUM2INT(tv[i]);
37
-
38
- sl++;
39
- tl++;
40
-
41
- //one-dimentional representation of 2 dimentional array len(s)+1 * len(t)+1
42
- d = malloc((sizeof(int))*(sl)*(tl));
43
- //populate 'vertical' row starting from the 2nd position (first one is filled already)
44
- for(i = 0; i < tl; i++){
45
- d[i*sl] = i;
46
- }
47
-
48
- //fill up array with scores
49
- for(i = 1; i<sl; i++){
50
- d[i] = i;
51
- if (stop_execution == 1) break;
52
- current_distance = 10000;
53
- for(j = 1; j<tl; j++){
54
-
55
- cost = 1;
56
- if(s[i-1] == t[j-1]) cost = 0;
57
-
58
- half_sl = (sl - 1)/2;
59
- half_tl = (tl - 1)/2;
60
-
61
- block = block_size < half_sl ? block_size : half_sl;
62
- block = block < half_tl ? block : half_tl;
63
-
64
- while (block >= 1){
65
- int swap1 = 1;
66
- int swap2 = 1;
67
- i1 = i - (block * 2);
68
- j1 = j - (block * 2);
69
- for (k = i1; k < i1 + block; k++) {
70
- if (s[k] != t[k + block]){
71
- swap1 = 0;
72
- break;
73
- }
74
- }
75
- for (k = j1; k < j1 + block; k++) {
76
- if (t[k] != s[k + block]){
77
- swap2 = 0;
78
- break;
79
- }
80
- }
81
-
82
- del = d[j*sl + i - 1] + 1;
83
- ins = d[(j-1)*sl + i] + 1;
84
- min = del;
85
- if (ins < min) min = ins;
86
- //if (i == 2 && j==2) return INT2NUM(swap2+5);
87
- if (i >= block && j >= block && swap1 == 1 && swap2 == 1){
88
- transp = d[(j - block * 2) * sl + i - block * 2] + cost + block -1;
89
- if (transp < min) min = transp;
90
- block = 0;
91
- } else if (block == 1) {
92
- subs = d[(j-1)*sl + i - 1] + cost;
93
- if (subs < min) min = subs;
94
- }
95
- block--;
96
- }
97
- d[j*sl+i]=min;
98
- if (current_distance > d[j*sl+i]) current_distance = d[j*sl+i];
99
- }
100
- if (current_distance > max_distance) {
101
- stop_execution = 1;
102
- }
103
- }
104
- distance=d[sl * tl - 1];
105
- if (stop_execution == 1) distance = current_distance;
106
-
107
- free(d);
108
- return INT2NUM(distance);
109
- }
110
- "
111
- end
112
13
  end
14
+
113
15
  end
114
16
 
115
17
  if __FILE__ == $0
116
- a=Taxamatch::DamerauLevenshteinMod.new
18
+
19
+ a = Taxamatch::DamerauLevenshteinMod.new
117
20
  s = 'Cedarinia scabra Sjöstedt 1921'.unpack('U*')
118
21
  t = 'Cedarinia scabra Söjstedt 1921'.unpack('U*')
119
-
22
+
120
23
  #puts s.join(",")
121
24
  #puts t.join(",")
122
25
 
@@ -133,7 +36,7 @@ if __FILE__ == $0
133
36
  puts 'utf time: ' + (Time.now - start).to_s + ' sec'
134
37
 
135
38
  #puts a.distance('Cedarinia scabra Sjöstedt 1921','Cedarinia scabra Söjstedt 1921')
136
- #puts a.distance_utf(s, t, 2, 10)
39
+ #puts a.distance_utf(s, t, 2, 10)
137
40
  #puts a.distance('tar','atp',1,10);
138
41
  puts a.distance('sub', 'usb', 1, 10);
139
42
  end
@@ -60,4 +60,4 @@ Pxxxxomus|Pomatomus|10|1|4
60
60
  Pxxxxomus|Pomatomus|2|1|3
61
61
 
62
62
  #
63
- PUNCTATA|PUNCTATA|10|1|0
63
+ PUNCTATA|PUNCTATA|10|1|0
data/spec/spec_helper.rb CHANGED
@@ -1,10 +1,4 @@
1
- begin
2
- require 'spec'
3
- rescue LoadError
4
- require 'rubygems' unless ENV['NO_RUBYGEMS']
5
- gem 'rspec'
6
- require 'spec'
7
- end
1
+ require 'rspec'
8
2
 
9
3
  $:.unshift(File.dirname(__FILE__) + '/../lib')
10
4
  require 'taxamatch_rb'
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/spec_helper.rb'
2
+ require 'spec_helper'
3
3
 
4
4
  describe 'DamerauLevenshteinMod' do
5
5
  it 'should get tests' do
@@ -7,7 +7,7 @@ describe 'DamerauLevenshteinMod' do
7
7
  dl = Taxamatch::DamerauLevenshteinMod.new
8
8
  if y
9
9
  res = dl.distance(y[0], y[1], y[3].to_i, y[2].to_i)
10
- #puts y if res != y[4].to_i
10
+ puts y if res != y[4].to_i
11
11
  res.should == y[4].to_i
12
12
  end
13
13
  end
@@ -18,17 +18,17 @@ describe 'Atomizer' do
18
18
  before(:all) do
19
19
  @parser = Taxamatch::Atomizer.new
20
20
  end
21
-
21
+
22
22
  it 'should parse uninomials' do
23
23
  @parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:string=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[], :normalized_authors=>[]}}
24
24
  @parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:string=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"], :normalized_authors=>["LACORDAIRE"]}}
25
25
  end
26
-
26
+
27
27
  it 'should parse binomials' do
28
28
  @parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:string=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"], :normalized_authors=>["DOW"]}}
29
29
  end
30
-
31
- it 'should parse trinomials' do
30
+
31
+ it 'should parse trinomials' do
32
32
  @parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[], :normalized_authors=>[]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
33
33
  end
34
34
  end
@@ -42,7 +42,7 @@ describe 'Taxamatch::Normalizer' do
42
42
  Taxamatch::Normalizer.normalize('Fallén').should == 'FALLEN'
43
43
  Taxamatch::Normalizer.normalize('Choriozopella trägårdhi').should == 'CHORIOZOPELLA TRAGARDHI'
44
44
  end
45
-
45
+
46
46
  it 'should normalize words' do
47
47
  Taxamatch::Normalizer.normalize_word('L-3eœ|pt[ura$').should == 'L-3EOEPTURA'
48
48
  end
@@ -52,25 +52,25 @@ describe 'Taxamatch::Base' do
52
52
  before(:all) do
53
53
  @tm = Taxamatch::Base.new
54
54
  end
55
-
55
+
56
56
  it 'should get txt tests' do
57
57
  dl = Taxamatch::DamerauLevenshteinMod.new
58
58
  read_test_file(File.expand_path(File.dirname(__FILE__)) + '/taxamatch_test.txt', 4) do |y|
59
59
  if y
60
60
  y[2] = y[2] == 'true' ? true : false
61
61
  res = @tm.taxamatch(y[0], y[1], false)
62
- puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
62
+ puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
63
63
  res['match'].should == y[2]
64
64
  res['edit_distance'].should == y[3].to_i
65
65
  end
66
66
  end
67
67
  end
68
-
68
+
69
69
  it 'should work with names that cannot be parsed' do
70
70
  res = @tm.taxamatch('Quadraspidiotus ostreaeformis MacGillivray, 1921','Quadraspidiotus ostreaeformis Curtis)')
71
71
  res = false
72
72
  end
73
-
73
+
74
74
  it 'should compare genera' do
75
75
  #edit distance 1 always match
76
76
  g1 = make_taxamatch_hash 'Plantago'
@@ -138,17 +138,17 @@ describe 'Taxamatch::Base' do
138
138
  #Should not match if Distance 2 or 3 and first 1 char is not the same
139
139
  s1 = make_taxamatch_hash 'morrrr'
140
140
  s2 = make_taxamatch_hash 'torraa'
141
- @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
141
+ @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
142
142
  #Distance 1 will match anywhere
143
143
  s1 = make_taxamatch_hash 'major'
144
144
  s2 = make_taxamatch_hash 'rajor'
145
- @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
145
+ @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => true, 'edit_distance' => 1}
146
146
  #Will not match if distance 3 and length is less then twice of the edit distance
147
147
  s1 = make_taxamatch_hash 'marrr'
148
148
  s2 = make_taxamatch_hash 'maaaa'
149
149
  @tm.match_species(s1, s2).should == {'phonetic_match' => false, 'match' => false, 'edit_distance' => 3}
150
150
  end
151
-
151
+
152
152
  it 'should match matches' do
153
153
  #No trobule case
154
154
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
@@ -159,7 +159,7 @@ describe 'Taxamatch::Base' do
159
159
  smatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
160
160
  @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
161
161
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 1}
162
- smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
162
+ smatch = {'match' => false, 'phonetic_match' => false, 'edit_distance' => 1}
163
163
  @tm.match_matches(gmatch, smatch).should == {'phonetic_match'=>false, 'edit_distance'=>2, 'match'=>false}
164
164
  #Should not match if binomial edit distance > 4 NOTE: EVEN with full phonetic match
165
165
  gmatch = {'match' => true, 'phonetic_match' => true, 'edit_distance' => 3}
@@ -188,7 +188,7 @@ describe 'Taxamatch::Base' do
188
188
  before(:all) do
189
189
  @am = Taxamatch::Authmatch
190
190
  end
191
-
191
+
192
192
  it 'should calculate score' do
193
193
  res = @am.authmatch(['Linnaeus', 'Muller'], ['L'], [], [1788])
194
194
  res.should == 90
@@ -219,22 +219,22 @@ describe 'Taxamatch::Base' do
219
219
  res = @am.authmatch(['Stepanov', 'Linnaeus', 'Muller'], ['Muller', 'Kurtz', 'Stepanov'], [1766], [1765])
220
220
  res.should == 0
221
221
  end
222
-
222
+
223
223
  it 'should compare years' do
224
224
  @am.compare_years([1882],[1880]).should == 2
225
225
  @am.compare_years([1882],[]).should == nil
226
226
  @am.compare_years([],[]).should == 0
227
227
  @am.compare_years([1788,1798], [1788,1798]).should be_nil
228
228
  end
229
-
230
- it 'should remove duplicate authors' do
229
+
230
+ it 'should remove duplicate authors' do
231
231
  #Li submatches Linnaeus and it its size 3 is big enought to remove Linnaeus
232
232
  #Muller is identical
233
233
  res = @am.remove_duplicate_authors(['Lin', 'Muller'], ['Linnaeus', 'Muller'])
234
234
  res.should == [[], []]
235
235
  #same in different order
236
236
  res = @am.remove_duplicate_authors(['Linnaeus', 'Muller'], ['Linn', 'Muller'])
237
- res.should == [[], []]
237
+ res.should == [[], []]
238
238
  #auth Li submatches Linnaeus, but Li size less then 3 required to remove Linnaeus
239
239
  res = @am.remove_duplicate_authors(['Dem', 'Li'], ['Linnaeus', 'Stepanov'])
240
240
  res.should == [["Dem"], ["Linnaeus", "Stepanov"]]
@@ -252,7 +252,7 @@ describe 'Taxamatch::Base' do
252
252
  # res = @am.fuzzy_match_authors('L', 'Muller')
253
253
  # res.should be_false
254
254
  end
255
-
255
+
256
256
  end
257
257
 
258
258
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 6
8
- - 5
9
- version: 0.6.5
7
+ - 7
8
+ - 4
9
+ version: 0.7.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Dmitry Mozzherin
@@ -14,13 +14,42 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-10-07 00:00:00 -04:00
17
+ date: 2011-06-23 00:00:00 -04:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
- name: RubyInline
22
- prerelease: false
21
+ name: biodiversity
23
22
  requirement: &id001 !ruby/object:Gem::Requirement
23
+ none: false
24
+ requirements:
25
+ - - ~>
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ - 5
30
+ - 13
31
+ version: 0.5.13
32
+ type: :runtime
33
+ prerelease: false
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: biodiversity19
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 0
44
+ - 5
45
+ - 13
46
+ version: 0.5.13
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: *id002
50
+ - !ruby/object:Gem::Dependency
51
+ name: rake-compiler
52
+ requirement: &id003 !ruby/object:Gem::Requirement
24
53
  none: false
25
54
  requirements:
26
55
  - - ">="
@@ -29,11 +58,134 @@ dependencies:
29
58
  - 0
30
59
  version: "0"
31
60
  type: :runtime
32
- version_requirements: *id001
61
+ prerelease: false
62
+ version_requirements: *id003
33
63
  - !ruby/object:Gem::Dependency
34
- name: biodiversity
64
+ name: rspec
65
+ requirement: &id004 !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ~>
69
+ - !ruby/object:Gem::Version
70
+ segments:
71
+ - 2
72
+ - 3
73
+ - 0
74
+ version: 2.3.0
75
+ type: :development
35
76
  prerelease: false
36
- requirement: &id002 !ruby/object:Gem::Requirement
77
+ version_requirements: *id004
78
+ - !ruby/object:Gem::Dependency
79
+ name: cucumber
80
+ requirement: &id005 !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ segments:
86
+ - 0
87
+ version: "0"
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *id005
91
+ - !ruby/object:Gem::Dependency
92
+ name: bundler
93
+ requirement: &id006 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ~>
97
+ - !ruby/object:Gem::Version
98
+ segments:
99
+ - 1
100
+ - 0
101
+ - 0
102
+ version: 1.0.0
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: *id006
106
+ - !ruby/object:Gem::Dependency
107
+ name: jeweler
108
+ requirement: &id007 !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ~>
112
+ - !ruby/object:Gem::Version
113
+ segments:
114
+ - 1
115
+ - 6
116
+ - 0
117
+ version: 1.6.0
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: *id007
121
+ - !ruby/object:Gem::Dependency
122
+ name: rcov
123
+ requirement: &id008 !ruby/object:Gem::Requirement
124
+ none: false
125
+ requirements:
126
+ - - ">="
127
+ - !ruby/object:Gem::Version
128
+ segments:
129
+ - 0
130
+ version: "0"
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: *id008
134
+ - !ruby/object:Gem::Dependency
135
+ name: ruby-debug19
136
+ requirement: &id009 !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ">="
140
+ - !ruby/object:Gem::Version
141
+ segments:
142
+ - 0
143
+ version: "0"
144
+ type: :development
145
+ prerelease: false
146
+ version_requirements: *id009
147
+ - !ruby/object:Gem::Dependency
148
+ name: ruby-prof
149
+ requirement: &id010 !ruby/object:Gem::Requirement
150
+ none: false
151
+ requirements:
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ segments:
155
+ - 0
156
+ version: "0"
157
+ type: :development
158
+ prerelease: false
159
+ version_requirements: *id010
160
+ - !ruby/object:Gem::Dependency
161
+ name: shoulda
162
+ requirement: &id011 !ruby/object:Gem::Requirement
163
+ none: false
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ segments:
168
+ - 0
169
+ version: "0"
170
+ type: :development
171
+ prerelease: false
172
+ version_requirements: *id011
173
+ - !ruby/object:Gem::Dependency
174
+ name: mocha
175
+ requirement: &id012 !ruby/object:Gem::Requirement
176
+ none: false
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ segments:
181
+ - 0
182
+ version: "0"
183
+ type: :development
184
+ prerelease: false
185
+ version_requirements: *id012
186
+ - !ruby/object:Gem::Dependency
187
+ name: biodiversity
188
+ requirement: &id013 !ruby/object:Gem::Requirement
37
189
  none: false
38
190
  requirements:
39
191
  - - ">="
@@ -44,21 +196,37 @@ dependencies:
44
196
  - 13
45
197
  version: 0.5.13
46
198
  type: :runtime
47
- version_requirements: *id002
199
+ prerelease: false
200
+ version_requirements: *id013
201
+ - !ruby/object:Gem::Dependency
202
+ name: rake-compiler
203
+ requirement: &id014 !ruby/object:Gem::Requirement
204
+ none: false
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ segments:
209
+ - 0
210
+ version: "0"
211
+ type: :runtime
212
+ prerelease: false
213
+ version_requirements: *id014
48
214
  description: This gem implements algorithm for fuzzy matching scientific names developed by Tony Rees
49
215
  email: dmozzherin@eol.org
50
216
  executables: []
51
217
 
52
- extensions: []
53
-
218
+ extensions:
219
+ - ext/damerau_levenshtein/extconf.rb
54
220
  extra_rdoc_files:
55
221
  - LICENSE
56
222
  - README.rdoc
57
223
  files:
224
+ - Gemfile.lock
58
225
  - README.rdoc
59
226
  - lib/taxamatch_rb.rb
60
227
  - lib/taxamatch_rb/atomizer.rb
61
228
  - lib/taxamatch_rb/authmatch.rb
229
+ - lib/taxamatch_rb/damerau_levenshtein.bundle
62
230
  - lib/taxamatch_rb/damerau_levenshtein_mod.rb
63
231
  - lib/taxamatch_rb/normalizer.rb
64
232
  - lib/taxamatch_rb/phonetizer.rb
@@ -68,13 +236,14 @@ files:
68
236
  - spec/taxamatch_rb_spec.rb
69
237
  - spec/taxamatch_test.txt
70
238
  - LICENSE
239
+ - ext/damerau_levenshtein/extconf.rb
71
240
  has_rdoc: true
72
241
  homepage: http://github.com/GlobalNamesArchitecture/taxamatch_rb
73
242
  licenses: []
74
243
 
75
244
  post_install_message:
76
- rdoc_options:
77
- - --charset=UTF-8
245
+ rdoc_options: []
246
+
78
247
  require_paths:
79
248
  - lib
80
249
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -82,6 +251,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
82
251
  requirements:
83
252
  - - ">="
84
253
  - !ruby/object:Gem::Version
254
+ hash: -2865757795593253659
85
255
  segments:
86
256
  - 0
87
257
  version: "0"
@@ -100,6 +270,5 @@ rubygems_version: 1.3.7
100
270
  signing_key:
101
271
  specification_version: 3
102
272
  summary: Implementation of Tony Rees Taxamatch algorithms
103
- test_files:
104
- - spec/spec_helper.rb
105
- - spec/taxamatch_rb_spec.rb
273
+ test_files: []
274
+