taxamatch_rb 0.9.10 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -2
- data/Gemfile +14 -16
- data/Gemfile.lock +18 -19
- data/LICENSE +1 -1
- data/{README.rdoc → README.md} +26 -7
- data/Rakefile +11 -9
- data/VERSION +1 -1
- data/lib/taxamatch_rb.rb +76 -43
- data/lib/taxamatch_rb/atomizer.rb +19 -10
- data/lib/taxamatch_rb/authmatch.rb +29 -16
- data/lib/taxamatch_rb/normalizer.rb +4 -4
- data/lib/taxamatch_rb/phonetizer.rb +9 -8
- data/spec/taxamatch_rb_spec.rb +223 -109
- data/taxamatch_rb.gemspec +11 -41
- metadata +11 -171
data/CHANGELOG
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
+
1.0.0 - fixed a parsing problem with infraspecies without string,
|
2
|
+
upgraded version to 1 because the signature of the gem did stabilized
|
3
|
+
|
1
4
|
0.9.8 - fixed a parsing problem with species nodes without name
|
2
5
|
|
3
6
|
0.9.4 - updated parser (to 1.0.16), updated code to ruby 1.9.3
|
4
7
|
|
5
|
-
0.9.3 - Taxamatch::Normalizer substitutes multiplication sign to 'x'
|
8
|
+
0.9.3 - Taxamatch::Normalizer substitutes multiplication sign to 'x'
|
6
9
|
(lowcase) instead of '?'
|
7
10
|
|
8
|
-
0.9.2 - Taxamatch::Normalizer.normalize always returns only ASCII
|
11
|
+
0.9.2 - Taxamatch::Normalizer.normalize always returns only ASCII
|
9
12
|
characters, all utf-8 characters unknown to normalizer are becoming '?'
|
10
13
|
|
11
14
|
0.9.1 - updated gems
|
data/Gemfile
CHANGED
@@ -1,21 +1,19 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
require 'yaml'
|
3
|
-
# YAML::ENGINE.yamler= 'syck'
|
4
3
|
|
5
|
-
gem
|
6
|
-
gem
|
4
|
+
gem 'biodiversity','~> 3.0.1'
|
5
|
+
gem 'damerau-levenshtein', '~> 0.5.4'
|
7
6
|
gem 'json', '~> 1.7.7'
|
8
7
|
|
9
|
-
|
10
|
-
|
11
|
-
gem
|
12
|
-
gem
|
13
|
-
gem
|
14
|
-
gem
|
15
|
-
gem
|
16
|
-
gem
|
17
|
-
gem
|
18
|
-
gem
|
19
|
-
gem
|
20
|
-
gem "mocha"
|
8
|
+
group :test do
|
9
|
+
gem 'rake', '~> 10.0'
|
10
|
+
gem 'rake-compiler', '~> 0.8'
|
11
|
+
gem 'rspec', '~> 2.13'
|
12
|
+
gem 'cucumber', '~> 1.3'
|
13
|
+
gem 'bundler', '~> 1.3'
|
14
|
+
gem 'jeweler', '~> 1.8'
|
15
|
+
gem 'debugger', '~> 1.5'
|
16
|
+
gem 'ruby-prof', '~> 0.13'
|
17
|
+
gem 'shoulda', '~> 3.5'
|
18
|
+
gem 'mocha', '~> 0.13'
|
21
19
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
4
|
activesupport (3.2.13)
|
5
5
|
i18n (= 0.6.1)
|
6
6
|
multi_json (~> 1.0)
|
7
|
-
|
7
|
+
biodiversity (3.0.1)
|
8
8
|
parallel
|
9
9
|
parallel (~> 0.6)
|
10
10
|
rake (~> 10.0)
|
@@ -18,11 +18,7 @@ GEM
|
|
18
18
|
diff-lcs (>= 1.1.3)
|
19
19
|
gherkin (~> 2.12.0)
|
20
20
|
multi_json (~> 1.3)
|
21
|
-
damerau-levenshtein (
|
22
|
-
bundler (~> 1)
|
23
|
-
jeweler (~> 1)
|
24
|
-
rake (~> 10)
|
25
|
-
rake-compiler (~> 0.8)
|
21
|
+
damerau-levenshtein (0.5.4)
|
26
22
|
debugger (1.5.0)
|
27
23
|
columnize (>= 0.3.1)
|
28
24
|
debugger-linecache (~> 1.2.0)
|
@@ -34,10 +30,11 @@ GEM
|
|
34
30
|
multi_json (~> 1.3)
|
35
31
|
git (1.2.5)
|
36
32
|
i18n (0.6.1)
|
37
|
-
jeweler (1.
|
33
|
+
jeweler (1.8.4)
|
38
34
|
bundler (~> 1.0)
|
39
35
|
git (>= 1.2.5)
|
40
36
|
rake
|
37
|
+
rdoc
|
41
38
|
json (1.7.7)
|
42
39
|
metaclass (0.0.1)
|
43
40
|
mocha (0.13.3)
|
@@ -48,6 +45,8 @@ GEM
|
|
48
45
|
rake (10.0.4)
|
49
46
|
rake-compiler (0.8.3)
|
50
47
|
rake
|
48
|
+
rdoc (4.0.1)
|
49
|
+
json (~> 1.4)
|
51
50
|
rspec (2.13.0)
|
52
51
|
rspec-core (~> 2.13.0)
|
53
52
|
rspec-expectations (~> 2.13.0)
|
@@ -72,16 +71,16 @@ PLATFORMS
|
|
72
71
|
ruby
|
73
72
|
|
74
73
|
DEPENDENCIES
|
75
|
-
|
74
|
+
biodiversity (~> 3.0.1)
|
76
75
|
bundler (~> 1.3)
|
77
|
-
cucumber
|
78
|
-
damerau-levenshtein (
|
79
|
-
debugger
|
80
|
-
jeweler (~> 1.
|
76
|
+
cucumber (~> 1.3)
|
77
|
+
damerau-levenshtein (~> 0.5.4)
|
78
|
+
debugger (~> 1.5)
|
79
|
+
jeweler (~> 1.8)
|
81
80
|
json (~> 1.7.7)
|
82
|
-
mocha
|
83
|
-
rake
|
84
|
-
rake-compiler
|
85
|
-
rspec
|
86
|
-
ruby-prof
|
87
|
-
shoulda
|
81
|
+
mocha (~> 0.13)
|
82
|
+
rake (~> 10.0)
|
83
|
+
rake-compiler (~> 0.8)
|
84
|
+
rspec (~> 2.13)
|
85
|
+
ruby-prof (~> 0.13)
|
86
|
+
shoulda (~> 3.5)
|
data/LICENSE
CHANGED
data/{README.rdoc → README.md}
RENAMED
@@ -1,8 +1,16 @@
|
|
1
|
-
|
1
|
+
Taxamatch_Rb
|
2
|
+
============
|
2
3
|
|
3
|
-
|
4
|
+
[![Gem Version][1]][2]
|
5
|
+
[![Continuous Integration Status][3]][4]
|
6
|
+
[![Dependency Status][5]][6]
|
4
7
|
|
5
|
-
|
8
|
+
Taxamatch_Rb is a ruby implementation of Taxamatch algorithms
|
9
|
+
[developed by Tony Rees][7]:
|
10
|
+
|
11
|
+
The purpose of Taxamatch gem is to facilitate fuzzy comparison of
|
12
|
+
two scientific name renderings to find out if they actually point to
|
13
|
+
the same scientific name.
|
6
14
|
|
7
15
|
require 'taxamatch_rb'
|
8
16
|
tm = Taxamatch::Base.new
|
@@ -12,11 +20,13 @@ The purpose of Taxamatch gem is to facilitate fuzzy comparison of two scientific
|
|
12
20
|
|
13
21
|
Taxamatch_Rb is compatible with ruby versions 1.9.1 and higher
|
14
22
|
|
15
|
-
|
23
|
+
Installation
|
24
|
+
------------
|
16
25
|
|
17
26
|
sudo gem install taxamatch_rb
|
18
27
|
|
19
|
-
|
28
|
+
Usage
|
29
|
+
-----
|
20
30
|
|
21
31
|
require 'taxamatch_rb'
|
22
32
|
|
@@ -51,6 +61,15 @@ Taxamatch_Rb is compatible with ruby versions 1.9.1 and higher
|
|
51
61
|
|
52
62
|
You can find more examples in spec section of the code
|
53
63
|
|
54
|
-
|
64
|
+
Copyright
|
65
|
+
---------
|
66
|
+
|
67
|
+
Copyright (c) 2009-2013 Marine Biological Laboratory. See LICENSE for details.
|
55
68
|
|
56
|
-
|
69
|
+
[1]: https://badge.fury.io/rb/taxamatch_rb.png
|
70
|
+
[2]: http://badge.fury.io/rb/taxamatch_rb
|
71
|
+
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/taxamatch_rb.png
|
72
|
+
[4]: http://travis-ci.org/GlobalNamesArchitecture/taxamatch_rb
|
73
|
+
[5]: https://gemnasium.com/GlobalNamesArchitecture/taxamatch_rb.png
|
74
|
+
[6]: https://gemnasium.com/GlobalNamesArchitecture/taxamatch_rb
|
75
|
+
[7]: http://www.cmar.csiro.au/datacentre/taxamatch.htm
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ begin
|
|
5
5
|
Bundler.setup(:default, :development)
|
6
6
|
rescue Bundler::BundlerError => e
|
7
7
|
$stderr.puts e.message
|
8
|
-
$stderr.puts
|
8
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
9
9
|
exit e.status_code
|
10
10
|
end
|
11
11
|
|
@@ -14,21 +14,23 @@ require 'rake'
|
|
14
14
|
begin
|
15
15
|
require 'jeweler'
|
16
16
|
Jeweler::Tasks.new do |gem|
|
17
|
-
gem.name =
|
17
|
+
gem.name = 'taxamatch_rb'
|
18
18
|
gem.summary = 'Implementation of Tony Rees Taxamatch algorithms'
|
19
|
-
gem.description = 'This gem implements algorithm
|
20
|
-
|
21
|
-
gem.
|
22
|
-
gem.
|
23
|
-
gem.
|
19
|
+
gem.description = 'This gem implements algorithm ' +
|
20
|
+
'for fuzzy matching scientific names developed by Tony Rees'
|
21
|
+
gem.email = 'dmozzherin@gmail.com'
|
22
|
+
gem.homepage = 'http://github.com/GlobalNamesArchitecture/taxamatch_rb'
|
23
|
+
gem.authors = ['Dmitry Mozzherin']
|
24
|
+
gem.files = FileList['[A-Z]*',
|
25
|
+
'*.gemspec', '{bin,generators,lib,spec}/**/*']
|
24
26
|
gem.files -= FileList['lib/**/*.bundle', 'lib/**/*.dll', 'lib/**/*.so']
|
25
27
|
gem.files += FileList['ext/**/*.c']
|
26
28
|
gem.extensions = FileList['ext/**/extconf.rb']
|
27
|
-
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
28
29
|
end
|
29
30
|
|
30
31
|
rescue LoadError
|
31
|
-
puts
|
32
|
+
puts 'Jeweler (or a dependency) not available.' +
|
33
|
+
' Install it with: sudo gem install jeweler'
|
32
34
|
end
|
33
35
|
|
34
36
|
require 'rspec/core'
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.0
|
data/lib/taxamatch_rb.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
$:.unshift(File.dirname(__FILE__)) unless
|
3
|
-
$:.include?(File.dirname(__FILE__)) ||
|
3
|
+
$:.include?(File.dirname(__FILE__)) ||
|
4
|
+
$:.include?(File.expand_path(File.dirname(__FILE__)))
|
4
5
|
# $:.unshift('taxamatch_rb')
|
5
6
|
require 'damerau-levenshtein'
|
6
7
|
require 'taxamatch_rb/atomizer'
|
@@ -8,8 +9,9 @@ require 'taxamatch_rb/normalizer'
|
|
8
9
|
require 'taxamatch_rb/phonetizer'
|
9
10
|
require 'taxamatch_rb/authmatch'
|
10
11
|
|
11
|
-
|
12
|
-
|
12
|
+
if RUBY_VERSION < '1.9.1'
|
13
|
+
raise 'IMPORTANT: Parsley-store gem requires ruby >= 1.9.1'
|
14
|
+
end
|
13
15
|
|
14
16
|
module Taxamatch
|
15
17
|
|
@@ -21,7 +23,8 @@ module Taxamatch
|
|
21
23
|
end
|
22
24
|
|
23
25
|
|
24
|
-
#takes two scientific names and returns true
|
26
|
+
# takes two scientific names and returns true
|
27
|
+
# if names match and false if they don't
|
25
28
|
def taxamatch(str1, str2, return_boolean = true)
|
26
29
|
preparsed_1 = @parser.parse(str1)
|
27
30
|
preparsed_2 = @parser.parse(str2)
|
@@ -29,14 +32,19 @@ module Taxamatch
|
|
29
32
|
return_boolean ? (!!match && match['match']) : match
|
30
33
|
end
|
31
34
|
|
32
|
-
#takes two hashes of parsed scientific names, analyses them and
|
33
|
-
#this function is useful when species strings are preparsed.
|
35
|
+
# takes two hashes of parsed scientific names, analyses them and
|
36
|
+
# returns back this function is useful when species strings are preparsed.
|
34
37
|
def taxamatch_preparsed(preparsed_1, preparsed_2)
|
35
38
|
result = nil
|
36
|
-
|
37
|
-
|
39
|
+
if preparsed_1[:uninomial] && preparsed_2[:uninomial]
|
40
|
+
result = match_uninomial(preparsed_1, preparsed_2)
|
41
|
+
end
|
42
|
+
if preparsed_1[:genus] && preparsed_2[:genus]
|
43
|
+
result = match_multinomial(preparsed_1, preparsed_2)
|
44
|
+
end
|
38
45
|
if result && result['match']
|
39
|
-
result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ?
|
46
|
+
result['match'] = match_authors(preparsed_1, preparsed_2) == -1 ?
|
47
|
+
false : true
|
40
48
|
end
|
41
49
|
return result
|
42
50
|
end
|
@@ -48,65 +56,89 @@ module Taxamatch
|
|
48
56
|
def match_multinomial(preparsed_1, preparsed_2)
|
49
57
|
gen_match = match_genera(preparsed_1[:genus], preparsed_2[:genus])
|
50
58
|
sp_match = match_species(preparsed_1[:species], preparsed_2[:species])
|
51
|
-
total_length = preparsed_1[:genus][:string].size +
|
59
|
+
total_length = preparsed_1[:genus][:string].size +
|
60
|
+
preparsed_2[:genus][:string].size +
|
61
|
+
preparsed_1[:species][:string].size +
|
62
|
+
preparsed_2[:species][:string].size
|
52
63
|
if preparsed_1[:infraspecies] && preparsed_2[:infraspecies]
|
53
|
-
infrasp_match = match_species(preparsed_1[:infraspecies][0],
|
54
|
-
|
64
|
+
infrasp_match = match_species(preparsed_1[:infraspecies][0],
|
65
|
+
preparsed_2[:infraspecies][0])
|
66
|
+
total_length += preparsed_1[:infraspecies][0][:string].size +
|
67
|
+
preparsed_2[:infraspecies][0][:string].size
|
55
68
|
match_hash = match_matches(gen_match, sp_match, infrasp_match)
|
56
|
-
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) ||
|
57
|
-
|
58
|
-
|
69
|
+
elsif (preparsed_1[:infraspecies] && !preparsed_2[:infraspecies]) ||
|
70
|
+
(!preparsed_1[:infraspecies] && preparsed_2[:infraspecies])
|
71
|
+
match_hash = { 'match' => false,
|
72
|
+
'edit_distance' => 5,
|
73
|
+
'phonetic_match' => false }
|
74
|
+
total_length += preparsed_1[:infraspecies] ?
|
75
|
+
preparsed_1[:infraspecies][0][:string].size :
|
76
|
+
preparsed_2[:infraspecies][0][:string].size
|
59
77
|
else
|
60
78
|
match_hash = match_matches(gen_match, sp_match)
|
61
79
|
end
|
62
|
-
match_hash.merge({'score' =>
|
80
|
+
match_hash.merge({ 'score' =>
|
81
|
+
(1 - match_hash['edit_distance']/(total_length/2)) })
|
63
82
|
match_hash
|
64
83
|
end
|
65
84
|
|
66
85
|
def match_genera(genus1, genus2, opts = {})
|
67
86
|
genus1_length = genus1[:normalized].size
|
68
87
|
genus2_length = genus2[:normalized].size
|
69
|
-
opts = {:
|
88
|
+
opts = { with_phonetic_match: true }.merge(opts)
|
70
89
|
min_length = [genus1_length, genus2_length].min
|
71
|
-
unless opts[:with_phonetic_match]
|
72
|
-
genus1[:phonetized] =
|
73
|
-
genus2[:phonetized] =
|
90
|
+
unless opts[:with_phonetic_match]
|
91
|
+
genus1[:phonetized] = 'A'
|
92
|
+
genus2[:phonetized] = 'B'
|
74
93
|
end
|
75
94
|
match = false
|
76
|
-
ed = @dlm.distance(genus1[:normalized],
|
77
|
-
|
78
|
-
return {'edit_distance' => ed,
|
79
|
-
|
80
|
-
|
81
|
-
{'edit_distance' => ed,
|
95
|
+
ed = @dlm.distance(genus1[:normalized],
|
96
|
+
genus2[:normalized], 1, 3) #TODO put block = 2
|
97
|
+
return { 'edit_distance' => ed,
|
98
|
+
'phonetic_match' => false,
|
99
|
+
'match' => false } if ed/min_length.to_f > 0.2
|
100
|
+
return { 'edit_distance' => ed,
|
101
|
+
'phonetic_match' => true,
|
102
|
+
'match' => true } if genus1[:phonetized] == genus2[:phonetized]
|
103
|
+
|
104
|
+
match = true if ed <= 3 && (min_length > ed * 2) &&
|
105
|
+
(ed < 2 || genus1[0] == genus2[0])
|
106
|
+
{ 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
|
82
107
|
end
|
83
108
|
|
84
109
|
def match_species(sp1, sp2, opts = {})
|
85
110
|
sp1_length = sp1[:normalized].size
|
86
111
|
sp2_length = sp2[:normalized].size
|
87
|
-
opts = {:
|
112
|
+
opts = { with_phonetic_match: true }.merge(opts)
|
88
113
|
min_length = [sp1_length, sp2_length].min
|
89
114
|
unless opts[:with_phonetic_match]
|
90
|
-
sp1[:phonetized] =
|
91
|
-
sp2[:phonetized] =
|
92
|
-
end
|
115
|
+
sp1[:phonetized] = 'A'
|
116
|
+
sp2[:phonetized] = 'B'
|
117
|
+
end
|
93
118
|
sp1[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp1[:phonetized]
|
94
119
|
sp2[:phonetized] = Taxamatch::Phonetizer.normalize_ending sp2[:phonetized]
|
95
120
|
match = false
|
96
|
-
ed = @dlm.distance(sp1[:normalized],
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
121
|
+
ed = @dlm.distance(sp1[:normalized],
|
122
|
+
sp2[:normalized], 1, 4) #TODO put block 4
|
123
|
+
return { 'edit_distance' => ed,
|
124
|
+
'phonetic_match' => false,
|
125
|
+
'match' => false } if ed/min_length.to_f > 0.3334
|
126
|
+
return {'edit_distance' => ed,
|
127
|
+
'phonetic_match' => true,
|
128
|
+
'match' => true} if sp1[:phonetized] == sp2[:phonetized]
|
129
|
+
|
130
|
+
match = true if ed <= 4 &&
|
131
|
+
(min_length >= ed * 2) &&
|
132
|
+
(ed < 2 || sp1[:normalized][0] == sp2[:normalized][0]) &&
|
133
|
+
(ed < 4 || sp1[:normalized][0...3] == sp2[:normalized][0...3])
|
134
|
+
{ 'edit_distance' => ed, 'match' => match, 'phonetic_match' => false }
|
103
135
|
end
|
104
136
|
|
105
137
|
def match_authors(preparsed_1, preparsed_2)
|
106
|
-
p1 = { :
|
107
|
-
p2 = { :
|
138
|
+
p1 = { normalized_authors: [], years: [] }
|
139
|
+
p2 = { normalized_authors: [], years: [] }
|
108
140
|
if preparsed_1[:infraspecies] || preparsed_2[:infraspecies]
|
109
|
-
p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
|
141
|
+
p1 = preparsed_1[:infraspecies].last if preparsed_1[:infraspecies]
|
110
142
|
p2 = preparsed_2[:infraspecies].last if preparsed_2[:infraspecies]
|
111
143
|
elsif preparsed_1[:species] || preparsed_2[:species]
|
112
144
|
p1 = preparsed_1[:species] if preparsed_1[:species]
|
@@ -119,7 +151,7 @@ module Taxamatch
|
|
119
151
|
au2 = p2[:normalized_authors]
|
120
152
|
yr1 = p1[:years]
|
121
153
|
yr2 = p2[:years]
|
122
|
-
return 0 if au1.empty? || au2.empty?
|
154
|
+
return 0 if au1.empty? || au2.empty?
|
123
155
|
score = Taxamatch::Authmatch.authmatch(au1, au2, yr1, yr2)
|
124
156
|
score == 0 ? -1 : 1
|
125
157
|
end
|
@@ -132,12 +164,13 @@ module Taxamatch
|
|
132
164
|
match['phonetic_match'] &&= infraspecies_match['phonetic_match']
|
133
165
|
end
|
134
166
|
match['edit_distance'] += genus_match['edit_distance']
|
135
|
-
|
167
|
+
if match['edit_distance'] > (infraspecies_match ? 6 : 4)
|
168
|
+
match['match'] = false
|
169
|
+
end
|
136
170
|
match['match'] &&= genus_match['match']
|
137
171
|
match['phonetic_match'] &&= genus_match['phonetic_match']
|
138
172
|
match
|
139
173
|
end
|
140
174
|
|
141
175
|
end
|
142
|
-
|
143
176
|
end
|
@@ -9,12 +9,12 @@ module Taxamatch
|
|
9
9
|
@parsed_raw = nil
|
10
10
|
@res = {}
|
11
11
|
end
|
12
|
-
|
12
|
+
|
13
13
|
def parse(name)
|
14
14
|
@parsed_raw = @parser.parse(name)[:scientificName]
|
15
15
|
organize_results(@parsed_raw)
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
def parsed_raw
|
19
19
|
return @parsed_raw
|
20
20
|
end
|
@@ -29,11 +29,13 @@ module Taxamatch
|
|
29
29
|
process_node(:genus, d[:genus])
|
30
30
|
process_node(:species, d[:species], true)
|
31
31
|
process_infraspecies(d[:infraspecies])
|
32
|
-
@res[:all_authors] = @res[:all_authors].uniq.map
|
32
|
+
@res[:all_authors] = @res[:all_authors].uniq.map do |a|
|
33
|
+
Taxamatch::Normalizer.normalize(a)
|
34
|
+
end
|
33
35
|
@res[:all_years].uniq!
|
34
36
|
@res.keys.size > 2 ? @res : nil
|
35
37
|
end
|
36
|
-
|
38
|
+
|
37
39
|
private
|
38
40
|
|
39
41
|
def process_node(name, node, is_species = false)
|
@@ -41,14 +43,16 @@ module Taxamatch
|
|
41
43
|
@res[name] = {}
|
42
44
|
@res[name][:string] = node[:string]
|
43
45
|
@res[name][:normalized] = Taxamatch::Normalizer.normalize(node[:string])
|
44
|
-
@res[name][:phonetized] =
|
46
|
+
@res[name][:phonetized] =
|
47
|
+
Taxamatch::Phonetizer.near_match(node[:string], is_species)
|
45
48
|
get_authors_years(node, @res[name])
|
46
49
|
end
|
47
|
-
|
50
|
+
|
48
51
|
def process_infraspecies(node)
|
49
52
|
return unless node
|
50
53
|
@res[:infraspecies] = []
|
51
54
|
node.each do |infr|
|
55
|
+
next unless infr[:string]
|
52
56
|
hsh = {}
|
53
57
|
hsh[:string] = infr[:string]
|
54
58
|
hsh[:normalized] = Taxamatch::Normalizer.normalize(infr[:string])
|
@@ -57,7 +61,7 @@ module Taxamatch
|
|
57
61
|
@res[:infraspecies] << hsh
|
58
62
|
end
|
59
63
|
end
|
60
|
-
|
64
|
+
|
61
65
|
def get_authors_years(node, res)
|
62
66
|
res[:authors] = []
|
63
67
|
res[:years] = []
|
@@ -71,16 +75,21 @@ module Taxamatch
|
|
71
75
|
if node[au][:exAuthorTeam]
|
72
76
|
res[:authors] += node[au][:exAuthorTeam][:author]
|
73
77
|
if node[au][:exAuthorTeam][:year]
|
74
|
-
year =
|
78
|
+
year = node[au][:exAuthorTeam][:year]
|
79
|
+
year = Taxamatch::Normalizer.normalize_year(year)
|
75
80
|
res[:years] << year if year
|
76
81
|
end
|
77
82
|
end
|
78
83
|
end
|
79
84
|
end
|
80
85
|
res[:authors].uniq!
|
81
|
-
res[:normalized_authors] = res[:authors].map
|
86
|
+
res[:normalized_authors] = res[:authors].map do |a|
|
87
|
+
Taxamatch::Normalizer.normalize_author(a)
|
88
|
+
end
|
82
89
|
res[:years].uniq!
|
83
|
-
|
90
|
+
if res[:normalized_authors].size > 0
|
91
|
+
@res[:all_authors] += res[:normalized_authors]
|
92
|
+
end
|
84
93
|
@res[:all_years] += res[:years] if res[:years].size > 0
|
85
94
|
end
|
86
95
|
|