taxamatch_rb 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/taxamatch_rb.rb +3 -3
- data/lib/taxamatch_rb/atomizer.rb +6 -6
- data/spec/spec_helper.rb +2 -2
- data/spec/taxamatch_rb_spec.rb +6 -6
- metadata +3 -3
data/lib/taxamatch_rb.rb
CHANGED
@@ -48,14 +48,14 @@ module Taxamatch
|
|
48
48
|
def match_multinomial(preparsed_1, preparsed_2)
|
49
49
|
gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
|
50
50
|
sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
|
51
|
-
total_length = preparsed_1[:genus][:
|
51
|
+
total_length = preparsed_1[:genus][:string].size + preparsed_2[:genus][:string].size + preparsed_1[:species][:string].size + preparsed_2[:species][:string].size
|
52
52
|
if preparsed_1[:infraspecies] && preparsed_2[:infraspecies]
|
53
53
|
infrasp_match = match_species(preparsed_1[:infraspecies][0], preparsed_2[:infraspecies][0])
|
54
|
-
total_length += preparsed_1[:infraspecies][0][:
|
54
|
+
total_length += preparsed_1[:infraspecies][0][:string].size + preparsed_2[:infraspecies][0][:string].size
|
55
55
|
match_hash = match_matches(gen_match, sp_match, infrasp_match)
|
56
56
|
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) || (!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
|
57
57
|
match_hash = { 'match' => false, 'edit_distance' => 5, 'phonetic_match' => false }
|
58
|
-
total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:
|
58
|
+
total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
|
59
59
|
else
|
60
60
|
match_hash = match_matches(gen_match, sp_match)
|
61
61
|
end
|
@@ -38,9 +38,9 @@ module Taxamatch
|
|
38
38
|
def process_node(name, node, is_species = false)
|
39
39
|
return unless node
|
40
40
|
@res[name] = {}
|
41
|
-
@res[name][:
|
42
|
-
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:
|
43
|
-
@res[name][:phonetized] = Taxamatch::Phonetizer.near_match(node[:
|
41
|
+
@res[name][:string] = node[:string]
|
42
|
+
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:string])
|
43
|
+
@res[name][:phonetized] = Taxamatch::Phonetizer.near_match(node[:string], is_species)
|
44
44
|
get_authors_years(node, @res[name])
|
45
45
|
end
|
46
46
|
|
@@ -49,9 +49,9 @@ module Taxamatch
|
|
49
49
|
@res[:infraspecies] = []
|
50
50
|
node.each do |infr|
|
51
51
|
hsh = {}
|
52
|
-
hsh[:
|
53
|
-
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:
|
54
|
-
hsh[:phonetized] = Taxamatch::Phonetizer.near_match(infr[:
|
52
|
+
hsh[:string] = infr[:string]
|
53
|
+
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:string])
|
54
|
+
hsh[:phonetized] = Taxamatch::Phonetizer.near_match(infr[:string], true)
|
55
55
|
get_authors_years(infr,hsh)
|
56
56
|
@res[:infraspecies] << hsh
|
57
57
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -24,5 +24,5 @@ end
|
|
24
24
|
|
25
25
|
def make_taxamatch_hash(string)
|
26
26
|
normalized = Taxamatch::Normalizer.normalize(string)
|
27
|
-
{:
|
28
|
-
end
|
27
|
+
{:string => string, :normalized => normalized, :phonetized => Taxamatch::Phonetizer.near_match(normalized)}
|
28
|
+
end
|
data/spec/taxamatch_rb_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe 'DamerauLevenshteinMod' do
|
|
7
7
|
dl = Taxamatch::DamerauLevenshteinMod.new
|
8
8
|
if y
|
9
9
|
res = dl.distance(y[0], y[1], y[3].to_i, y[2].to_i)
|
10
|
-
puts y if res != y[4].to_i
|
10
|
+
#puts y if res != y[4].to_i
|
11
11
|
res.should == y[4].to_i
|
12
12
|
end
|
13
13
|
end
|
@@ -20,16 +20,16 @@ describe 'Atomizer' do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'should parse uninomials' do
|
23
|
-
@parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:
|
24
|
-
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:
|
23
|
+
@parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:string=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[], :normalized_authors=>[]}}
|
24
|
+
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:string=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"], :normalized_authors=>["LACORDAIRE"]}}
|
25
25
|
end
|
26
26
|
|
27
27
|
it 'should parse binomials' do
|
28
|
-
@parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:
|
28
|
+
@parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:string=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"], :normalized_authors=>["DOW"]}}
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'should parse trinomials' do
|
32
|
-
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:
|
32
|
+
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[], :normalized_authors=>[]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -59,7 +59,7 @@ describe 'Taxamatch::Base' do
|
|
59
59
|
if y
|
60
60
|
y[2] = y[2] == 'true' ? true : false
|
61
61
|
res = @tm.taxamatch(y[0], y[1], false)
|
62
|
-
puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
|
62
|
+
#puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
|
63
63
|
res['match'].should == y[2]
|
64
64
|
res['edit_distance'].should == y[3].to_i
|
65
65
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 6
|
8
|
-
-
|
9
|
-
version: 0.6.
|
8
|
+
- 1
|
9
|
+
version: 0.6.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Dmitry Mozzherin
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-03-
|
17
|
+
date: 2010-03-30 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|