taxamatch_rb 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/taxamatch_rb.rb +3 -3
- data/lib/taxamatch_rb/atomizer.rb +6 -6
- data/spec/spec_helper.rb +2 -2
- data/spec/taxamatch_rb_spec.rb +6 -6
- metadata +3 -3
data/lib/taxamatch_rb.rb
CHANGED
@@ -48,14 +48,14 @@ module Taxamatch
|
|
48
48
|
def match_multinomial(preparsed_1, preparsed_2)
|
49
49
|
gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
|
50
50
|
sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
|
51
|
-
total_length = preparsed_1[:genus][:
|
51
|
+
total_length = preparsed_1[:genus][:string].size + preparsed_2[:genus][:string].size + preparsed_1[:species][:string].size + preparsed_2[:species][:string].size
|
52
52
|
if preparsed_1[:infraspecies] && preparsed_2[:infraspecies]
|
53
53
|
infrasp_match = match_species(preparsed_1[:infraspecies][0], preparsed_2[:infraspecies][0])
|
54
|
-
total_length += preparsed_1[:infraspecies][0][:
|
54
|
+
total_length += preparsed_1[:infraspecies][0][:string].size + preparsed_2[:infraspecies][0][:string].size
|
55
55
|
match_hash = match_matches(gen_match, sp_match, infrasp_match)
|
56
56
|
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) || (!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
|
57
57
|
match_hash = { 'match' => false, 'edit_distance' => 5, 'phonetic_match' => false }
|
58
|
-
total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:
|
58
|
+
total_length += preparsed_1[:infraspecies] ? preparsed_1[:infraspecies][0][:string].size : preparsed_2[:infraspecies][0][:string].size
|
59
59
|
else
|
60
60
|
match_hash = match_matches(gen_match, sp_match)
|
61
61
|
end
|
@@ -38,9 +38,9 @@ module Taxamatch
|
|
38
38
|
def process_node(name, node, is_species = false)
|
39
39
|
return unless node
|
40
40
|
@res[name] = {}
|
41
|
-
@res[name][:
|
42
|
-
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:
|
43
|
-
@res[name][:phonetized] = Taxamatch::Phonetizer.near_match(node[:
|
41
|
+
@res[name][:string] = node[:string]
|
42
|
+
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:string])
|
43
|
+
@res[name][:phonetized] = Taxamatch::Phonetizer.near_match(node[:string], is_species)
|
44
44
|
get_authors_years(node, @res[name])
|
45
45
|
end
|
46
46
|
|
@@ -49,9 +49,9 @@ module Taxamatch
|
|
49
49
|
@res[:infraspecies] = []
|
50
50
|
node.each do |infr|
|
51
51
|
hsh = {}
|
52
|
-
hsh[:
|
53
|
-
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:
|
54
|
-
hsh[:phonetized] = Taxamatch::Phonetizer.near_match(infr[:
|
52
|
+
hsh[:string] = infr[:string]
|
53
|
+
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:string])
|
54
|
+
hsh[:phonetized] = Taxamatch::Phonetizer.near_match(infr[:string], true)
|
55
55
|
get_authors_years(infr,hsh)
|
56
56
|
@res[:infraspecies] << hsh
|
57
57
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -24,5 +24,5 @@ end
|
|
24
24
|
|
25
25
|
def make_taxamatch_hash(string)
|
26
26
|
normalized = Taxamatch::Normalizer.normalize(string)
|
27
|
-
{:
|
28
|
-
end
|
27
|
+
{:string => string, :normalized => normalized, :phonetized => Taxamatch::Phonetizer.near_match(normalized)}
|
28
|
+
end
|
data/spec/taxamatch_rb_spec.rb
CHANGED
@@ -7,7 +7,7 @@ describe 'DamerauLevenshteinMod' do
|
|
7
7
|
dl = Taxamatch::DamerauLevenshteinMod.new
|
8
8
|
if y
|
9
9
|
res = dl.distance(y[0], y[1], y[3].to_i, y[2].to_i)
|
10
|
-
puts y if res != y[4].to_i
|
10
|
+
#puts y if res != y[4].to_i
|
11
11
|
res.should == y[4].to_i
|
12
12
|
end
|
13
13
|
end
|
@@ -20,16 +20,16 @@ describe 'Atomizer' do
|
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'should parse uninomials' do
|
23
|
-
@parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:
|
24
|
-
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:
|
23
|
+
@parser.parse('Betula').should == {:all_authors=>[], :all_years=>[], :uninomial=>{:string=>"Betula", :normalized=>"BETULA", :phonetized=>"BITILA", :authors=>[], :years=>[], :normalized_authors=>[]}}
|
24
|
+
@parser.parse('Ærenea Lacordaire, 1872').should == {:all_authors=>["LACORDAIRE"], :all_years=>["1872"], :uninomial=>{:string=>"Aerenea", :normalized=>"AERENEA", :phonetized=>"ERINIA", :authors=>["Lacordaire"], :years=>["1872"], :normalized_authors=>["LACORDAIRE"]}}
|
25
25
|
end
|
26
26
|
|
27
27
|
it 'should parse binomials' do
|
28
|
-
@parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:
|
28
|
+
@parser.parse('Leœptura laetifica Dow, 1913').should == {:all_authors=>["DOW"], :all_years=>["1913"], :genus=>{:string=>"Leoeptura", :normalized=>"LEOEPTURA", :phonetized=>"LIPTIRA", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"laetifica", :normalized=>"LAETIFICA", :phonetized=>"LITIFICA", :authors=>["Dow"], :years=>["1913"], :normalized_authors=>["DOW"]}}
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'should parse trinomials' do
|
32
|
-
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:
|
32
|
+
@parser.parse('Hydnellum scrobiculatum zonatum (Banker) D. Hall et D.E. Stuntz 1972').should == {:all_authors=>["BANKER", "D HALL", "D E STUNTZ"], :all_years=>["1972"], :genus=>{:string=>"Hydnellum", :normalized=>"HYDNELLUM", :phonetized=>"HIDNILIM", :authors=>[], :years=>[], :normalized_authors=>[]}, :species=>{:string=>"scrobiculatum", :normalized=>"SCROBICULATUM", :phonetized=>"SCRABICILATA", :authors=>[], :years=>[], :normalized_authors=>[]}, :infraspecies=>[{:string=>"zonatum", :normalized=>"ZONATUM", :phonetized=>"ZANATA", :authors=>["Banker", "D. Hall", "D.E. Stuntz"], :years=>["1972"], :normalized_authors=>["BANKER", "D HALL", "D E STUNTZ"]}]}
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
@@ -59,7 +59,7 @@ describe 'Taxamatch::Base' do
|
|
59
59
|
if y
|
60
60
|
y[2] = y[2] == 'true' ? true : false
|
61
61
|
res = @tm.taxamatch(y[0], y[1], false)
|
62
|
-
puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
|
62
|
+
#puts "%s, %s, %s, %s" % [y[0], y[1], y[2], y[3]]
|
63
63
|
res['match'].should == y[2]
|
64
64
|
res['edit_distance'].should == y[3].to_i
|
65
65
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 6
|
8
|
-
-
|
9
|
-
version: 0.6.
|
8
|
+
- 1
|
9
|
+
version: 0.6.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Dmitry Mozzherin
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-03-
|
17
|
+
date: 2010-03-30 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|