taxamatch_rb 0.9.10 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -2
- data/Gemfile +14 -16
- data/Gemfile.lock +18 -19
- data/LICENSE +1 -1
- data/{README.rdoc → README.md} +26 -7
- data/Rakefile +11 -9
- data/VERSION +1 -1
- data/lib/taxamatch_rb.rb +76 -43
- data/lib/taxamatch_rb/atomizer.rb +19 -10
- data/lib/taxamatch_rb/authmatch.rb +29 -16
- data/lib/taxamatch_rb/normalizer.rb +4 -4
- data/lib/taxamatch_rb/phonetizer.rb +9 -8
- data/spec/taxamatch_rb_spec.rb +223 -109
- data/taxamatch_rb.gemspec +11 -41
- metadata +11 -171
data/CHANGELOG
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
+
1.0.0 - fixed a parsing problem with infraspecies without string,
|
2
|
+
upgraded version to 1 because the signature of the gem did stabilized
|
3
|
+
|
1
4
|
0.9.8 - fixed a parsing problem with species nodes without name
|
2
5
|
|
3
6
|
0.9.4 - updated parser (to 1.0.16), updated code to ruby 1.9.3
|
4
7
|
|
5
|
-
0.9.3 - Taxamatch::Normalizer substitutes multiplication sign to 'x'
|
8
|
+
0.9.3 - Taxamatch::Normalizer substitutes multiplication sign to 'x'
|
6
9
|
(lowcase) instead of '?'
|
7
10
|
|
8
|
-
0.9.2 - Taxamatch::Normalizer.normalize always returns only ASCII
|
11
|
+
0.9.2 - Taxamatch::Normalizer.normalize always returns only ASCII
|
9
12
|
characters, all utf-8 characters unknown to normalizer are becoming '?'
|
10
13
|
|
11
14
|
0.9.1 - updated gems
|
data/Gemfile
CHANGED
@@ -1,21 +1,19 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
require 'yaml'
|
3
|
-
# YAML::ENGINE.yamler= 'syck'
|
4
3
|
|
5
|
-
gem
|
6
|
-
gem
|
4
|
+
gem 'biodiversity','~> 3.0.1'
|
5
|
+
gem 'damerau-levenshtein', '~> 0.5.4'
|
7
6
|
gem 'json', '~> 1.7.7'
|
8
7
|
|
9
|
-
|
10
|
-
|
11
|
-
gem
|
12
|
-
gem
|
13
|
-
gem
|
14
|
-
gem
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
18
|
-
gem
|
19
|
-
gem
|
20
|
-
gem "mocha"
|
8
|
+
group :test do
|
9
|
+
gem 'rake', '~> 10.0'
|
10
|
+
gem 'rake-compiler', '~> 0.8'
|
11
|
+
gem 'rspec', '~> 2.13'
|
12
|
+
gem 'cucumber', '~> 1.3'
|
13
|
+
gem 'bundler', '~> 1.3'
|
14
|
+
gem 'jeweler', '~> 1.8'
|
15
|
+
gem 'debugger', '~> 1.5'
|
16
|
+
gem 'ruby-prof', '~> 0.13'
|
17
|
+
gem 'shoulda', '~> 3.5'
|
18
|
+
gem 'mocha', '~> 0.13'
|
21
19
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
4
|
activesupport (3.2.13)
|
5
5
|
i18n (= 0.6.1)
|
6
6
|
multi_json (~> 1.0)
|
7
|
-
|
7
|
+
biodiversity (3.0.1)
|
8
8
|
parallel
|
9
9
|
parallel (~> 0.6)
|
10
10
|
rake (~> 10.0)
|
@@ -18,11 +18,7 @@ GEM
|
|
18
18
|
diff-lcs (>= 1.1.3)
|
19
19
|
gherkin (~> 2.12.0)
|
20
20
|
multi_json (~> 1.3)
|
21
|
-
damerau-levenshtein (
|
22
|
-
bundler (~> 1)
|
23
|
-
jeweler (~> 1)
|
24
|
-
rake (~> 10)
|
25
|
-
rake-compiler (~> 0.8)
|
21
|
+
damerau-levenshtein (0.5.4)
|
26
22
|
debugger (1.5.0)
|
27
23
|
columnize (>= 0.3.1)
|
28
24
|
debugger-linecache (~> 1.2.0)
|
@@ -34,10 +30,11 @@ GEM
|
|
34
30
|
multi_json (~> 1.3)
|
35
31
|
git (1.2.5)
|
36
32
|
i18n (0.6.1)
|
37
|
-
jeweler (1.
|
33
|
+
jeweler (1.8.4)
|
38
34
|
bundler (~> 1.0)
|
39
35
|
git (>= 1.2.5)
|
40
36
|
rake
|
37
|
+
rdoc
|
41
38
|
json (1.7.7)
|
42
39
|
metaclass (0.0.1)
|
43
40
|
mocha (0.13.3)
|
@@ -48,6 +45,8 @@ GEM
|
|
48
45
|
rake (10.0.4)
|
49
46
|
rake-compiler (0.8.3)
|
50
47
|
rake
|
48
|
+
rdoc (4.0.1)
|
49
|
+
json (~> 1.4)
|
51
50
|
rspec (2.13.0)
|
52
51
|
rspec-core (~> 2.13.0)
|
53
52
|
rspec-expectations (~> 2.13.0)
|
@@ -72,16 +71,16 @@ PLATFORMS
|
|
72
71
|
ruby
|
73
72
|
|
74
73
|
DEPENDENCIES
|
75
|
-
|
74
|
+
biodiversity (~> 3.0.1)
|
76
75
|
bundler (~> 1.3)
|
77
|
-
cucumber
|
78
|
-
damerau-levenshtein (
|
79
|
-
debugger
|
80
|
-
jeweler (~> 1.
|
76
|
+
cucumber (~> 1.3)
|
77
|
+
damerau-levenshtein (~> 0.5.4)
|
78
|
+
debugger (~> 1.5)
|
79
|
+
jeweler (~> 1.8)
|
81
80
|
json (~> 1.7.7)
|
82
|
-
mocha
|
83
|
-
rake
|
84
|
-
rake-compiler
|
85
|
-
rspec
|
86
|
-
ruby-prof
|
87
|
-
shoulda
|
81
|
+
mocha (~> 0.13)
|
82
|
+
rake (~> 10.0)
|
83
|
+
rake-compiler (~> 0.8)
|
84
|
+
rspec (~> 2.13)
|
85
|
+
ruby-prof (~> 0.13)
|
86
|
+
shoulda (~> 3.5)
|
data/LICENSE
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,8 +1,16 @@
|
|
1
|
-
|
1
|
+
Taxamatch_Rb
|
2
|
+
============
|
2
3
|
|
3
|
-
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
4
7
|
|
5
|
-
|
8
|
+
Taxamatch_Rb is a ruby implementation of Taxamatch algorithms
|
9
|
+
[developed by Tony Rees][7]:
|
10
|
+
|
11
|
+
The purpose of Taxamatch gem is to facilitate fuzzy comparison of
|
12
|
+
two scientific name renderings to find out if they actually point to
|
13
|
+
the same scientific name.
|
6
14
|
|
7
15
|
require 'taxamatch_rb'
|
8
16
|
tm = Taxamatch::Base.new
|
@@ -12,11 +20,13 @@ The purpose of Taxamatch gem is to facilitate fuzzy comparison of two scientific
|
|
12
20
|
|
13
21
|
Taxamatch_Rb is compatible with ruby versions 1.9.1 and higher
|
14
22
|
|
15
|
-
|
23
|
+
Installation
|
24
|
+
------------
|
16
25
|
|
17
26
|
sudo gem install taxamatch_rb
|
18
27
|
|
19
|
-
|
28
|
+
Usage
|
29
|
+
-----
|
20
30
|
|
21
31
|
require 'taxamatch_rb'
|
22
32
|
|
@@ -51,6 +61,15 @@ Taxamatch_Rb is compatible with ruby versions 1.9.1 and higher
|
|
51
61
|
|
52
62
|
You can find more examples in spec section of the code
|
53
63
|
|
54
|
-
|
64
|
+
Copyright
|
65
|
+
---------
|
66
|
+
|
67
|
+
Copyright (c) 2009-2013 Marine Biological Laboratory. See LICENSE for details.
|
55
68
|
|
56
|
-
|
69
|
+
[1]: https://badge.fury.io/rb/taxamatch_rb.png
|
70
|
+
[2]: http://badge.fury.io/rb/taxamatch_rb
|
71
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/taxamatch_rb.png
|
72
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/taxamatch_rb
|
73
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/taxamatch_rb.png
|
74
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/taxamatch_rb
|
75
|
+
[7]: http://www.cmar.csiro.au/datacentre/taxamatch.htm
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
Bundler.setup(:default, :development)
|
6
6
|
rescue Bundler::BundlerError => e
|
7
7
|
$stderr.puts e.message
|
8
|
-
$stderr.puts
|
8
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
9
9
|
exit e.status_code
|
10
10
|
end
|
11
11
|
|
@@ -14,21 +14,23 @@ require 'rake'
|
|
14
14
|
begin
|
15
15
|
require 'jeweler'
|
16
16
|
Jeweler::Tasks.new do |gem|
|
17
|
-
gem.name =
|
17
|
+
gem.name = 'taxamatch_rb'
|
18
18
|
gem.summary = 'Implementation of Tony Rees Taxamatch algorithms'
|
19
|
-
gem.description = 'This gem implements algorithm
|
20
|
-
|
21
|
-
gem.
|
22
|
-
gem.
|
23
|
-
gem.
|
19
|
+
gem.description = 'This gem implements algorithm ' +
|
20
|
+
'for fuzzy matching scientific names developed by Tony Rees'
|
21
|
+
gem.email = 'dmozzherin@gmail.com'
|
22
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/taxamatch_rb'
|
23
|
+
gem.authors = ['Dmitry Mozzherin']
|
24
|
+
gem.files = FileList['[A-Z]*',
|
25
|
+
'*.gemspec', '{bin,generators,lib,spec}/**/*']
|
24
26
|
gem.files -= FileList['lib/**/*.bundle', 'lib/**/*.dll', 'lib/**/*.so']
|
25
27
|
gem.files += FileList['ext/**/*.c']
|
26
28
|
gem.extensions = FileList['ext/**/extconf.rb']
|
27
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
28
29
|
end
|
29
30
|
|
30
31
|
rescue LoadError
|
31
|
-
puts
|
32
|
+
puts 'Jeweler (or a dependency) not available.' +
|
33
|
+
' Install it with: sudo gem install jeweler'
|
32
34
|
end
|
33
35
|
|
34
36
|
require 'rspec/core'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/lib/taxamatch_rb.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
$:.unshift(File.dirname(__FILE__)) unless
|
3
|
-
$:.include?(File.dirname(__FILE__)) ||
|
3
|
+
$:.include?(File.dirname(__FILE__)) ||
|
4
|
+
$:.include?(File.expand_path(File.dirname(__FILE__)))
|
4
5
|
# $:.unshift('taxamatch_rb')
|
5
6
|
require 'damerau-levenshtein'
|
6
7
|
require 'taxamatch_rb/atomizer'
|
@@ -8,8 +9,9 @@ require 'taxamatch_rb/normalizer'
|
|
8
9
|
require 'taxamatch_rb/phonetizer'
|
9
10
|
require 'taxamatch_rb/authmatch'
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
if RUBY_VERSION < '1.9.1'
|
13
|
+
raise 'IMPORTANT: Parsley-store gem requires ruby >= 1.9.1'
|
14
|
+
end
|
13
15
|
|
14
16
|
module Taxamatch
|
15
17
|
|
@@ -21,7 +23,8 @@ module Taxamatch
|
|
21
23
|
end
|
22
24
|
|
23
25
|
|
24
|
-
#takes two scientific names and returns true
|
26
|
+
# takes two scientific names and returns true
|
27
|
+
# if names match and false if they don't
|
25
28
|
def taxamatch(str1, str2, return_boolean = true)
|
26
29
|
preparsed_1 = @parser.parse(str1)
|
27
30
|
preparsed_2 = @parser.parse(str2)
|
@@ -29,14 +32,19 @@ module Taxamatch
|
|
29
32
|
return_boolean ? (!!match && match['match']) : match
|
30
33
|
end
|
31
34
|
|
32
|
-
#takes two hashes of parsed scientific names, analyses them and
|
33
|
-
#this function is useful when species strings are preparsed.
|
35
|
+
# takes two hashes of parsed scientific names, analyses them and
|
36
|
+
# returns back this function is useful when species strings are preparsed.
|
34
37
|
def taxamatch_preparsed(preparsed_1, preparsed_2)
|
35
38
|
result = nil
|
36
|
-
|
37
|
-
|
39
|
+
if preparsed_1[:uninomial] && preparsed_2[:uninomial]
|
40
|
+
result = match_uninomial(preparsed_1, preparsed_2)
|
41
|
+
end
|
42
|
+
if preparsed_1[:genus] && preparsed_2[:genus]
|
43
|
+
result = match_multinomial(preparsed_1, preparsed_2)
|
44
|
+
end
|
38
45
|
if result && result['match']
|
39
|
-
result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ?
|
46
|
+
result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ?
|
47
|
+
false : true
|
40
48
|
end
|
41
49
|
return result
|
42
50
|
end
|
@@ -48,65 +56,89 @@ module Taxamatch
|
|
48
56
|
def match_multinomial(preparsed_1, preparsed_2)
|
49
57
|
gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
|
50
58
|
sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
|
51
|
-
total_length = preparsed_1[:genus][:string].size +
|
59
|
+
total_length = preparsed_1[:genus][:string].size +
|
60
|
+
preparsed_2[:genus][:string].size +
|
61
|
+
preparsed_1[:species][:string].size +
|
62
|
+
preparsed_2[:species][:string].size
|
52
63
|
if preparsed_1[:infraspecies] && preparsed_2[:infraspecies]
|
53
|
-
infrasp_match = match_species(preparsed_1[:infraspecies][0],
|
54
|
-
|
64
|
+
infrasp_match = match_species(preparsed_1[:infraspecies][0],
|
65
|
+
preparsed_2[:infraspecies][0])
|
66
|
+
total_length += preparsed_1[:infraspecies][0][:string].size +
|
67
|
+
preparsed_2[:infraspecies][0][:string].size
|
55
68
|
match_hash = match_matches(gen_match, sp_match, infrasp_match)
|
56
|
-
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) ||
|
57
|
-
|
58
|
-
|
69
|
+
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) ||
|
70
|
+
(!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
|
71
|
+
match_hash = { 'match' => false,
|
72
|
+
'edit_distance' => 5,
|
73
|
+
'phonetic_match' => false }
|
74
|
+
total_length += preparsed_1[:infraspecies] ?
|
75
|
+
preparsed_1[:infraspecies][0][:string].size :
|
76
|
+
preparsed_2[:infraspecies][0][:string].size
|
59
77
|
else
|
60
78
|
match_hash = match_matches(gen_match, sp_match)
|
61
79
|
end
|
62
|
-
match_hash.merge({'score' =>
|
80
|
+
match_hash.merge({ 'score' =>
|
81
|
+
(1 - match_hash['edit_distance']/(total_length/2)) })
|
63
82
|
match_hash
|
64
83
|
end
|
65
84
|
|
66
85
|
def match_genera(genus1, genus2, opts = {})
|
67
86
|
genus1_length = genus1[:normalized].size
|
68
87
|
genus2_length = genus2[:normalized].size
|
69
|
-
opts = {:
|
88
|
+
opts = { with_phonetic_match: true }.merge(opts)
|
70
89
|
min_length = [genus1_length, genus2_length].min
|
71
|
-
unless opts[:with_phonetic_match]
|
72
|
-
genus1[:phonetized] =
|
73
|
-
genus2[:phonetized] =
|
90
|
+
unless opts[:with_phonetic_match]
|
91
|
+
genus1[:phonetized] = 'A'
|
92
|
+
genus2[:phonetized] = 'B'
|
74
93
|
end
|
75
94
|
match = false
|
76
|
-
ed = @dlm.distance(genus1[:normalized],
|
77
|
-
|
78
|
-
return {'edit_distance' => ed,
|
79
|
-
|
80
|
-
|
81
|
-
{'edit_distance' => ed,
|
95
|
+
ed = @dlm.distance(genus1[:normalized],
|
96
|
+
genus2[:normalized], 1, 3) #TODO put block = 2
|
97
|
+
return { 'edit_distance' => ed,
|
98
|
+
'phonetic_match' => false,
|
99
|
+
'match' => false } if ed/min_length.to_f > 0.2
|
100
|
+
return { 'edit_distance' => ed,
|
101
|
+
'phonetic_match' => true,
|
102
|
+
'match' => true } if genus1[:phonetized] == genus2[:phonetized]
|
103
|
+
|
104
|
+
match = true if ed <= 3 && (min_length > ed * 2) &&
|
105
|
+
(ed < 2 || genus1[0] == genus2[0])
|
106
|
+
{ 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
|
82
107
|
end
|
83
108
|
|
84
109
|
def match_species(sp1, sp2, opts = {})
|
85
110
|
sp1_length = sp1[:normalized].size
|
86
111
|
sp2_length = sp2[:normalized].size
|
87
|
-
opts = {:
|
112
|
+
opts = { with_phonetic_match: true }.merge(opts)
|
88
113
|
min_length = [sp1_length, sp2_length].min
|
89
114
|
unless opts[:with_phonetic_match]
|
90
|
-
sp1[:phonetized] =
|
91
|
-
sp2[:phonetized] =
|
92
|
-
end
|
115
|
+
sp1[:phonetized] = 'A'
|
116
|
+
sp2[:phonetized] = 'B'
|
117
|
+
end
|
93
118
|
sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
|
94
119
|
sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
|
95
120
|
match = false
|
96
|
-
ed = @dlm.distance(sp1[:normalized],
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
121
|
+
ed = @dlm.distance(sp1[:normalized],
|
122
|
+
sp2[:normalized], 1, 4) #TODO put block 4
|
123
|
+
return { 'edit_distance' => ed,
|
124
|
+
'phonetic_match' => false,
|
125
|
+
'match' => false } if ed/min_length.to_f > 0.3334
|
126
|
+
return {'edit_distance' => ed,
|
127
|
+
'phonetic_match' => true,
|
128
|
+
'match' => true} if sp1[:phonetized] == sp2[:phonetized]
|
129
|
+
|
130
|
+
match = true if ed <= 4 &&
|
131
|
+
(min_length >= ed * 2) &&
|
132
|
+
(ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) &&
|
133
|
+
(ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
|
134
|
+
{ 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
|
103
135
|
end
|
104
136
|
|
105
137
|
def match_authors(preparsed_1, preparsed_2)
|
106
|
-
p1 = { :
|
107
|
-
p2 = { :
|
138
|
+
p1 = { normalized_authors: [], years: [] }
|
139
|
+
p2 = { normalized_authors: [], years: [] }
|
108
140
|
if preparsed_1[:infraspecies] || preparsed_2[:infraspecies]
|
109
|
-
p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
|
141
|
+
p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
|
110
142
|
p2 = preparsed_2[:infraspecies].last if preparsed_2[:infraspecies]
|
111
143
|
elsif preparsed_1[:species] || preparsed_2[:species]
|
112
144
|
p1 = preparsed_1[:species] if preparsed_1[:species]
|
@@ -119,7 +151,7 @@ module Taxamatch
|
|
119
151
|
au2 = p2[:normalized_authors]
|
120
152
|
yr1 = p1[:years]
|
121
153
|
yr2 = p2[:years]
|
122
|
-
return 0 if au1.empty? || au2.empty?
|
154
|
+
return 0 if au1.empty? || au2.empty?
|
123
155
|
score = Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
|
124
156
|
score == 0 ? -1 : 1
|
125
157
|
end
|
@@ -132,12 +164,13 @@ module Taxamatch
|
|
132
164
|
match['phonetic_match'] &&= infraspecies_match['phonetic_match']
|
133
165
|
end
|
134
166
|
match['edit_distance'] += genus_match['edit_distance']
|
135
|
-
|
167
|
+
if match['edit_distance'] > (infraspecies_match ? 6 : 4)
|
168
|
+
match['match'] = false
|
169
|
+
end
|
136
170
|
match['match'] &&= genus_match['match']
|
137
171
|
match['phonetic_match'] &&= genus_match['phonetic_match']
|
138
172
|
match
|
139
173
|
end
|
140
174
|
|
141
175
|
end
|
142
|
-
|
143
176
|
end
|
@@ -9,12 +9,12 @@ module Taxamatch
|
|
9
9
|
@parsed_raw = nil
|
10
10
|
@res = {}
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def parse(name)
|
14
14
|
@parsed_raw = @parser.parse(name)[:scientificName]
|
15
15
|
organize_results(@parsed_raw)
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
def parsed_raw
|
19
19
|
return @parsed_raw
|
20
20
|
end
|
@@ -29,11 +29,13 @@ module Taxamatch
|
|
29
29
|
process_node(:genus, d[:genus])
|
30
30
|
process_node(:species, d[:species], true)
|
31
31
|
process_infraspecies(d[:infraspecies])
|
32
|
-
@res[:all_authors] = @res[:all_authors].uniq.map
|
32
|
+
@res[:all_authors] = @res[:all_authors].uniq.map do |a|
|
33
|
+
Taxamatch::Normalizer.normalize(a)
|
34
|
+
end
|
33
35
|
@res[:all_years].uniq!
|
34
36
|
@res.keys.size > 2 ? @res : nil
|
35
37
|
end
|
36
|
-
|
38
|
+
|
37
39
|
private
|
38
40
|
|
39
41
|
def process_node(name, node, is_species = false)
|
@@ -41,14 +43,16 @@ module Taxamatch
|
|
41
43
|
@res[name] = {}
|
42
44
|
@res[name][:string] = node[:string]
|
43
45
|
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:string])
|
44
|
-
@res[name][:phonetized] =
|
46
|
+
@res[name][:phonetized] =
|
47
|
+
Taxamatch::Phonetizer.near_match(node[:string], is_species)
|
45
48
|
get_authors_years(node, @res[name])
|
46
49
|
end
|
47
|
-
|
50
|
+
|
48
51
|
def process_infraspecies(node)
|
49
52
|
return unless node
|
50
53
|
@res[:infraspecies] = []
|
51
54
|
node.each do |infr|
|
55
|
+
next unless infr[:string]
|
52
56
|
hsh = {}
|
53
57
|
hsh[:string] = infr[:string]
|
54
58
|
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:string])
|
@@ -57,7 +61,7 @@ module Taxamatch
|
|
57
61
|
@res[:infraspecies] << hsh
|
58
62
|
end
|
59
63
|
end
|
60
|
-
|
64
|
+
|
61
65
|
def get_authors_years(node, res)
|
62
66
|
res[:authors] = []
|
63
67
|
res[:years] = []
|
@@ -71,16 +75,21 @@ module Taxamatch
|
|
71
75
|
if node[au][:exAuthorTeam]
|
72
76
|
res[:authors] += node[au][:exAuthorTeam][:author]
|
73
77
|
if node[au][:exAuthorTeam][:year]
|
74
|
-
year =
|
78
|
+
year = node[au][:exAuthorTeam][:year]
|
79
|
+
year = Taxamatch::Normalizer.normalize_year(year)
|
75
80
|
res[:years] << year if year
|
76
81
|
end
|
77
82
|
end
|
78
83
|
end
|
79
84
|
end
|
80
85
|
res[:authors].uniq!
|
81
|
-
res[:normalized_authors] = res[:authors].map
|
86
|
+
res[:normalized_authors] = res[:authors].map do |a|
|
87
|
+
Taxamatch::Normalizer.normalize_author(a)
|
88
|
+
end
|
82
89
|
res[:years].uniq!
|
83
|
-
|
90
|
+
if res[:normalized_authors].size > 0
|
91
|
+
@res[:all_authors] += res[:normalized_authors]
|
92
|
+
end
|
84
93
|
@res[:all_years] += res[:years] if res[:years].size > 0
|
85
94
|
end
|
86
95
|
|