biodiversity 3.1.10 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/.ruby-version +1 -1
- data/CHANGELOG +5 -0
- data/README.md +95 -71
- data/biodiversity.gemspec +1 -0
- data/lib/biodiversity/parser.rb +33 -30
- data/lib/biodiversity/parser/scientific_name_clean.rb +45 -36
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1 -1
- data/lib/biodiversity/version.rb +1 -1
- data/spec/biodiversity_spec.rb +0 -2
- data/spec/files/t.rb +15 -0
- data/spec/files/test_data.txt +345 -335
- data/spec/files/test_data.txt.new +463 -0
- data/spec/guid/lsid.spec.rb +0 -2
- data/spec/parser/scientific_name_canonical_spec.rb +0 -1
- data/spec/parser/scientific_name_clean_spec.rb +0 -2
- data/spec/parser/scientific_name_dirty_spec.rb +0 -1
- data/spec/parser/scientific_name_spec.rb +5 -4
- metadata +20 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d7bb0304f5e151933f5350780677b9a47a099716
|
|
4
|
+
data.tar.gz: 7adaf2c1bfce44db79bc2c04d75c584d53957fd0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1a450a93fb07f985b5f1e7761e669ed772c8add2c01955e4a8e70a575f3b5f0bc86b1b215507260c48a1604b386b60543f27048c7a65119f71a4b0ddfd7bcefe
|
|
7
|
+
data.tar.gz: 19090297f99d64580b4b6012a06729ede74fd3ccf7fb9ffdfeecd6480d2a1a0bd0ce8dc8654747f51a60301464cbaebd1f9693ceb54f2827140163d72314f62b
|
data/.rspec
ADDED
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
2.1.
|
|
1
|
+
2.1.6
|
data/CHANGELOG
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
3.2.0 -- added UUID version 5 identifiers for every name string, better
|
|
2
|
+
normalizing for the names with apostrophes, underscore-formatted names are
|
|
3
|
+
supported. Minor version increase because of change in the output format ("id"
|
|
4
|
+
field)
|
|
5
|
+
|
|
1
6
|
3.1.10 -- NPV viruses added
|
|
2
7
|
|
|
3
8
|
3.1.9 -- more virus keywords, better handling of apostrophes in
|
data/README.md
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Biodiversity
|
|
2
2
|
============
|
|
3
3
|
|
|
4
|
-
[![Gem Version][
|
|
5
|
-
[![Continuous Integration Status][
|
|
6
|
-
[![CodePolice][
|
|
7
|
-
[![Dependency Status][
|
|
4
|
+
[![Gem Version][gem_svg]][gem_link]
|
|
5
|
+
[![Continuous Integration Status][ci_svg]][ci_link]
|
|
6
|
+
[![CodePolice][cc_svg]][cc_link]
|
|
7
|
+
[![Dependency Status][deps_svg]][deps_link]
|
|
8
8
|
|
|
9
9
|
Parses taxonomic scientific name and breaks it into semantic elements.
|
|
10
10
|
|
|
@@ -12,10 +12,12 @@ Parses taxonomic scientific name and breaks it into semantic elements.
|
|
|
12
12
|
Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
|
|
13
13
|
biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
|
|
14
14
|
|
|
15
|
-
biodiversity19 is now deprecated and will be
|
|
15
|
+
biodiversity19 is now deprecated and will not be updated anymore.
|
|
16
16
|
You are strongly encouraged to change your dependencies from
|
|
17
17
|
biodiversity19 to biodiversity
|
|
18
18
|
|
|
19
|
+
Follow [biodiversity issues][waffle] on waffle.io
|
|
20
|
+
|
|
19
21
|
Installation
|
|
20
22
|
------------
|
|
21
23
|
|
|
@@ -46,7 +48,7 @@ you can use a socket server
|
|
|
46
48
|
parserver -h
|
|
47
49
|
Usage: parserver [options]
|
|
48
50
|
|
|
49
|
-
-r, --canonical_with_rank Adds infraspecies rank
|
|
51
|
+
-r, --canonical_with_rank Adds infraspecies rank
|
|
50
52
|
to canonical forms
|
|
51
53
|
|
|
52
54
|
-o, --output=output Specifies the type of the output:
|
|
@@ -65,7 +67,7 @@ you can use a socket server
|
|
|
65
67
|
|
|
66
68
|
With default settings you can access parserserver via 4334 port using a
|
|
67
69
|
socket client library of your programming language. You can find
|
|
68
|
-
[socket client script example][
|
|
70
|
+
[socket client script example][socket_example] in the examples directory of the gem.
|
|
69
71
|
|
|
70
72
|
If you want to check if socket server works for you:
|
|
71
73
|
|
|
@@ -93,76 +95,94 @@ of scientific name
|
|
|
93
95
|
|
|
94
96
|
You can use it as a library in Ruby, JRuby etc.
|
|
95
97
|
|
|
96
|
-
require 'biodiversity'
|
|
97
98
|
|
|
98
|
-
|
|
99
|
+
```ruby
|
|
100
|
+
require 'biodiversity'
|
|
101
|
+
|
|
102
|
+
parser = ScientificNameParser.new
|
|
99
103
|
|
|
100
|
-
|
|
101
|
-
|
|
104
|
+
#to find version number
|
|
105
|
+
ScientificNameParser.version
|
|
102
106
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
107
|
+
# to fix capitalization in canonicals
|
|
108
|
+
ScientificNameParser.fix_case("QUERCUS (QUERCUS) ALBA")
|
|
109
|
+
# Output: Quercus (Quercus) alba
|
|
106
110
|
|
|
107
|
-
|
|
108
|
-
|
|
111
|
+
# to parse a scientific name into a ruby hash
|
|
112
|
+
parser.parse("Plantago major")
|
|
109
113
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
114
|
+
#to get json representation
|
|
115
|
+
parser.parse("Plantago").to_json
|
|
116
|
+
#or
|
|
117
|
+
parser.parse("Plantago")
|
|
118
|
+
parser.all_json
|
|
115
119
|
|
|
116
|
-
|
|
117
|
-
|
|
120
|
+
# to clean name up
|
|
121
|
+
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
|
118
122
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
123
|
+
# to get only cleaned up latin part of the name
|
|
124
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. \
|
|
125
|
+
Braun & Crous 2003")[:scientificName][:canonical]
|
|
122
126
|
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
127
|
+
# to get detailed information about elements of the name
|
|
128
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
|
129
|
+
Braun & Crous 2003")[:scientificName][:details]
|
|
130
|
+
```
|
|
126
131
|
|
|
127
132
|
Returned result is not always linear, if name is complex. To get simple linear
|
|
128
133
|
representation of the name you can use:
|
|
129
134
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) \
|
|
138
|
+
U. Braun & Crous 2003")[:scientificName][:position]
|
|
139
|
+
# returns {0=>["genus", 16], 17=>["species", 26],
|
|
140
|
+
# 28=>["author_word", 32], 33=>["author_word", 40],
|
|
141
|
+
# 42=>["author_word", 44], 45=>["author_word", 50],
|
|
142
|
+
# 53=>["author_word", 58], 59=>["year", 63]}
|
|
143
|
+
# where the key is the char index of the start of
|
|
144
|
+
# a word, first element of the value is a semantic meaning
|
|
145
|
+
# of the word, second element of the value is the character index
|
|
146
|
+
# of end of the word
|
|
147
|
+
```
|
|
140
148
|
|
|
141
149
|
'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
|
|
142
150
|
undetermined names with cf. sp. spp. nr. in them:
|
|
143
|
-
|
|
144
|
-
parser.parse("Coleoptera BOLD:1234567")[:scientificName][:surrogate]
|
|
145
151
|
|
|
146
|
-
|
|
152
|
+
```ruby
|
|
153
|
+
parser.parse("Coleoptera BOLD:1234567")[:scientificName][:surrogate]
|
|
154
|
+
```
|
|
155
|
+
### What is "id" in the parsed results?
|
|
156
|
+
|
|
157
|
+
ID field contains UUID v5 hexadecimal string. ID is generated out of bytes
|
|
158
|
+
from the name string itself, and identical id can be generated using [any
|
|
159
|
+
popular programming language][uuid_examples]. You can read more about UUID
|
|
160
|
+
version 5 in a [blog post][uuid_blog]
|
|
161
|
+
|
|
162
|
+
### Parse using several CPUs (4 threads seem to be optimal)
|
|
147
163
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
164
|
+
```ruby
|
|
165
|
+
parser = ParallelParser.new
|
|
166
|
+
# ParallelParser.new(4) will try to run 4 processes if hardware allows
|
|
167
|
+
array_of_names = ["Betula alba", "Homo sapiens"....]
|
|
168
|
+
parser.parse(array_of_names)
|
|
169
|
+
# Output: {"Betula alba" => {:scientificName...},
|
|
170
|
+
# "Homo sapiens" => {:scientificName...}, ...}
|
|
171
|
+
```
|
|
154
172
|
|
|
155
|
-
parallel parser takes list of names and returns back a hash with names as
|
|
173
|
+
parallel parser takes list of names and returns back a hash with names as
|
|
156
174
|
keys and parsed data as values
|
|
157
175
|
|
|
158
|
-
|
|
176
|
+
### Canonicals with ranks for infraspecific epithets:
|
|
159
177
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
178
|
+
```ruby
|
|
179
|
+
parser = ScientificNameParser.new(canonical_with_rank: true)
|
|
180
|
+
parser.parse('Cola cordifolia var. puberula \
|
|
181
|
+
A. Chev.')[:scientificName][:canonical]
|
|
182
|
+
# Output: Cola cordifolia var. puberula
|
|
183
|
+
```
|
|
164
184
|
|
|
165
|
-
|
|
185
|
+
### Resolving lsid and geting back RDF file
|
|
166
186
|
|
|
167
187
|
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
|
168
188
|
|
|
@@ -174,7 +194,7 @@ If nnparse or parserver do not start -- try to run
|
|
|
174
194
|
gem uninstall biodiversity
|
|
175
195
|
gem uninstall biodiversity19
|
|
176
196
|
|
|
177
|
-
and make sure you remove all versions and all nnparse and parserver scripts.
|
|
197
|
+
and make sure you remove all versions and all nnparse and parserver scripts.
|
|
178
198
|
Then install biodiversity again
|
|
179
199
|
|
|
180
200
|
gem install biodiversity
|
|
@@ -184,18 +204,22 @@ It should fix the problem.
|
|
|
184
204
|
Copyright
|
|
185
205
|
---------
|
|
186
206
|
|
|
187
|
-
Authors: [Dmitry Mozzherin][
|
|
188
|
-
|
|
189
|
-
Copyright (c) 2008-2015 Marine Biological Laboratory. See LICENSE
|
|
190
|
-
further details.
|
|
191
|
-
|
|
192
|
-
[
|
|
193
|
-
[
|
|
194
|
-
[
|
|
195
|
-
[
|
|
196
|
-
[
|
|
197
|
-
[
|
|
198
|
-
[
|
|
199
|
-
[
|
|
200
|
-
[
|
|
201
|
-
[
|
|
207
|
+
Authors: [Dmitry Mozzherin][dimus]
|
|
208
|
+
|
|
209
|
+
Copyright (c) 2008-2015 Marine Biological Laboratory. See [LICENSE][license]
|
|
210
|
+
for further details.
|
|
211
|
+
|
|
212
|
+
[gem_svg]: https://badge.fury.io/rb/biodiversity.svg
|
|
213
|
+
[gem_link]: http://badge.fury.io/rb/biodiversity
|
|
214
|
+
[ci_svg]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.svg
|
|
215
|
+
[ci_link]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
|
216
|
+
[cc_svg]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity.svg
|
|
217
|
+
[cc_link]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity
|
|
218
|
+
[deps_svg]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity.svg
|
|
219
|
+
[deps_link]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity
|
|
220
|
+
[socket_example]: http://bit.ly/149iLm5
|
|
221
|
+
[dimus]: https://github.com/dimus
|
|
222
|
+
[license]: https://github.com/GlobalNamesArchitecture/biodiversity/blob/master/LICENSE
|
|
223
|
+
[waffle]: https://waffle.io/GlobalNamesArchitecture/biodiversity
|
|
224
|
+
[uuid_examples]: https://github.com/GlobalNamesArchitecture/gn_uuid_examples
|
|
225
|
+
[uuid_blog]: http://globalnamesarchitecture.github.io/crossmap/gna/2015/05/31/gn-uuid-0-5-0.html
|
data/biodiversity.gemspec
CHANGED
|
@@ -19,6 +19,7 @@ Gem::Specification.new do |gem|
|
|
|
19
19
|
gem.add_runtime_dependency "treetop", "~> 1.4.1"
|
|
20
20
|
gem.add_runtime_dependency "parallel", "~> 1.4"
|
|
21
21
|
gem.add_runtime_dependency "unicode_utils", "~> 1.4"
|
|
22
|
+
gem.add_runtime_dependency "gn_uuid", "~> 0.5"
|
|
22
23
|
|
|
23
24
|
gem.add_development_dependency "bundler", "~> 1.6"
|
|
24
25
|
gem.add_development_dependency "rake", "~> 10.4"
|
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
|
-
|
|
3
|
-
require_relative
|
|
4
|
-
require_relative
|
|
2
|
+
require "gn_uuid"
|
|
3
|
+
require_relative "parser/scientific_name_clean"
|
|
4
|
+
require_relative "parser/scientific_name_dirty"
|
|
5
|
+
require_relative "parser/scientific_name_canonical"
|
|
5
6
|
|
|
6
7
|
module PreProcessor
|
|
7
8
|
NOTES = /\s+(species\s+group|species\s+complex|group|author)\b.*$/i
|
|
@@ -24,9 +25,10 @@ module PreProcessor
|
|
|
24
25
|
def self.clean(a_string)
|
|
25
26
|
[NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2,
|
|
26
27
|
TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
|
|
27
|
-
a_string = a_string.gsub(i,
|
|
28
|
+
a_string = a_string.gsub(i, "")
|
|
28
29
|
end
|
|
29
|
-
a_string = a_string.tr(
|
|
30
|
+
a_string = a_string.tr("ſ","s") #old "s"
|
|
31
|
+
a_string = a_string.tr("_", " ") if a_string.strip.match(/\s/).nil?
|
|
30
32
|
a_string
|
|
31
33
|
end
|
|
32
34
|
end
|
|
@@ -36,7 +38,7 @@ end
|
|
|
36
38
|
# Examples
|
|
37
39
|
#
|
|
38
40
|
# parser = ParallelParser.new(4)
|
|
39
|
-
# parser.parse([
|
|
41
|
+
# parser.parse(["Betula L.", "Pardosa moesta"])
|
|
40
42
|
class ParallelParser
|
|
41
43
|
|
|
42
44
|
# Public: Initialize ParallelParser.
|
|
@@ -45,7 +47,7 @@ class ParallelParser
|
|
|
45
47
|
# If processes number is not set it will be determined
|
|
46
48
|
# automatically.
|
|
47
49
|
def initialize(processes_num = nil)
|
|
48
|
-
require
|
|
50
|
+
require "parallel"
|
|
49
51
|
cpu_num
|
|
50
52
|
if processes_num.to_i > 0
|
|
51
53
|
@processes_num = [processes_num, cpu_num - 1].min
|
|
@@ -66,7 +68,7 @@ class ParallelParser
|
|
|
66
68
|
# Examples
|
|
67
69
|
#
|
|
68
70
|
# parser = ParallelParser.new(4)
|
|
69
|
-
# parser.parse([
|
|
71
|
+
# parser.parse(["Homo sapiens L.", "Quercus quercus"])
|
|
70
72
|
#
|
|
71
73
|
# Returns a Hash with scientific names as a key, and parsing results as
|
|
72
74
|
# a value.
|
|
@@ -108,7 +110,8 @@ class ScientificNameParser
|
|
|
108
110
|
|
|
109
111
|
FAILED_RESULT = ->(name) do
|
|
110
112
|
{ scientificName:
|
|
111
|
-
{ parsed: false, verbatim: name
|
|
113
|
+
{ id: GnUUID.uuid(name), parsed: false, verbatim: name,
|
|
114
|
+
error: "Parser internal error" }
|
|
112
115
|
}
|
|
113
116
|
end
|
|
114
117
|
|
|
@@ -121,7 +124,7 @@ class ScientificNameParser
|
|
|
121
124
|
words_num = name_ary.size
|
|
122
125
|
res = nil
|
|
123
126
|
if words_num == 1
|
|
124
|
-
res = name_ary[0].gsub(/[\(\)\{\}]/,
|
|
127
|
+
res = name_ary[0].gsub(/[\(\)\{\}]/, "")
|
|
125
128
|
if res.size > 1
|
|
126
129
|
res = UnicodeUtils.upcase(res[0]) + UnicodeUtils.downcase(res[1..-1])
|
|
127
130
|
else
|
|
@@ -135,15 +138,15 @@ class ScientificNameParser
|
|
|
135
138
|
word1 = name_ary[0]
|
|
136
139
|
end
|
|
137
140
|
if name_ary[1].match(/^\(/)
|
|
138
|
-
word2 = name_ary[1].gsub(/\)$/,
|
|
141
|
+
word2 = name_ary[1].gsub(/\)$/, "") + ")"
|
|
139
142
|
word2 = word2[0] + UnicodeUtils.upcase(word2[1]) +
|
|
140
143
|
UnicodeUtils.downcase(word2[2..-1])
|
|
141
144
|
else
|
|
142
145
|
word2 = UnicodeUtils.downcase(name_ary[1])
|
|
143
146
|
end
|
|
144
|
-
res = word1 +
|
|
145
|
-
word2 +
|
|
146
|
-
name_ary[2..-1].map { |w| UnicodeUtils.downcase(w) }.join(
|
|
147
|
+
res = word1 + " " +
|
|
148
|
+
word2 + " " +
|
|
149
|
+
name_ary[2..-1].map { |w| UnicodeUtils.downcase(w) }.join(" ")
|
|
147
150
|
res.strip!
|
|
148
151
|
end
|
|
149
152
|
res
|
|
@@ -152,7 +155,7 @@ class ScientificNameParser
|
|
|
152
155
|
|
|
153
156
|
def initialize(opts = {})
|
|
154
157
|
@canonical_with_rank = !!opts[:canonical_with_rank]
|
|
155
|
-
@verbatim =
|
|
158
|
+
@verbatim = ""
|
|
156
159
|
@clean = ScientificNameCleanParser.new
|
|
157
160
|
@dirty = ScientificNameDirtyParser.new
|
|
158
161
|
@canonical = ScientificNameCanonicalParser.new
|
|
@@ -180,23 +183,23 @@ class ScientificNameParser
|
|
|
180
183
|
end
|
|
181
184
|
|
|
182
185
|
def parse(a_string)
|
|
183
|
-
@verbatim = a_string
|
|
186
|
+
@verbatim = a_string
|
|
184
187
|
a_string = PreProcessor::clean(a_string)
|
|
185
188
|
|
|
186
189
|
if virus?(a_string)
|
|
187
|
-
@parsed = { verbatim:
|
|
190
|
+
@parsed = { verbatim: @verbatim, virus: true }
|
|
188
191
|
elsif noparse?(a_string)
|
|
189
|
-
@parsed = { verbatim:
|
|
192
|
+
@parsed = { verbatim: @verbatim }
|
|
190
193
|
else
|
|
191
194
|
begin
|
|
192
195
|
@parsed = @clean.parse(a_string) || @dirty.parse(a_string)
|
|
193
196
|
unless @parsed
|
|
194
197
|
index = @dirty.index || @clean.index
|
|
195
198
|
salvage_match = a_string[0..index].split(/\s+/)[0..-2]
|
|
196
|
-
salvage_string = salvage_match ? salvage_match.join(
|
|
199
|
+
salvage_string = salvage_match ? salvage_match.join(" ") : a_string
|
|
197
200
|
@parsed = @dirty.parse(salvage_string) ||
|
|
198
201
|
@canonical.parse(a_string) ||
|
|
199
|
-
{ verbatim:
|
|
202
|
+
{ verbatim: @verbatim }
|
|
200
203
|
end
|
|
201
204
|
rescue
|
|
202
205
|
@parsed = FAILED_RESULT.(@verbatim)
|
|
@@ -205,12 +208,14 @@ class ScientificNameParser
|
|
|
205
208
|
|
|
206
209
|
def @parsed.verbatim=(a_string)
|
|
207
210
|
@verbatim = a_string
|
|
211
|
+
@id = GnUUID.uuid(@verbatim)
|
|
208
212
|
end
|
|
209
213
|
|
|
210
214
|
def @parsed.all(opts = {})
|
|
211
215
|
canonical_with_rank = !!opts[:canonical_with_rank]
|
|
212
216
|
parsed = self.class != Hash
|
|
213
|
-
res = {
|
|
217
|
+
res = { id: @id, parsed: parsed,
|
|
218
|
+
parser_version: ScientificNameParser::version}
|
|
214
219
|
if parsed
|
|
215
220
|
hybrid = self.hybrid rescue false
|
|
216
221
|
res.merge!({
|
|
@@ -226,7 +231,7 @@ class ScientificNameParser
|
|
|
226
231
|
res.merge!(self)
|
|
227
232
|
end
|
|
228
233
|
if (canonical_with_rank &&
|
|
229
|
-
canonical.count(
|
|
234
|
+
canonical.count(" ") > 1 &&
|
|
230
235
|
res[:details][0][:infraspecies])
|
|
231
236
|
ScientificNameParser.add_rank_to_canonical(res)
|
|
232
237
|
end
|
|
@@ -235,11 +240,11 @@ class ScientificNameParser
|
|
|
235
240
|
end
|
|
236
241
|
|
|
237
242
|
def @parsed.pos_json
|
|
238
|
-
self.pos.to_json rescue
|
|
243
|
+
self.pos.to_json rescue ""
|
|
239
244
|
end
|
|
240
245
|
|
|
241
246
|
def @parsed.all_json
|
|
242
|
-
self.all.to_json rescue
|
|
247
|
+
self.all.to_json rescue ""
|
|
243
248
|
end
|
|
244
249
|
|
|
245
250
|
@parsed.verbatim = @verbatim
|
|
@@ -256,7 +261,7 @@ class ScientificNameParser
|
|
|
256
261
|
surrogate2 = /\b(spp|sp|nr|cf)[\.]?[\s]*$/i
|
|
257
262
|
is_surrogate = false
|
|
258
263
|
|
|
259
|
-
ai_index = pos.index(
|
|
264
|
+
ai_index = pos.index("annotation_identification")
|
|
260
265
|
if ai_index
|
|
261
266
|
ai = name[pos[ai_index - 1]..pos[ai_index + 1]]
|
|
262
267
|
is_surrogate = true if ai.match(/^(spp|cf|sp|nr)/)
|
|
@@ -267,15 +272,13 @@ class ScientificNameParser
|
|
|
267
272
|
end
|
|
268
273
|
|
|
269
274
|
def self.add_rank_to_canonical(parsed)
|
|
270
|
-
parts = parsed[:canonical].split(
|
|
275
|
+
parts = parsed[:canonical].split(" ")
|
|
271
276
|
name_ary = parts[0..1]
|
|
272
277
|
parsed[:details][0][:infraspecies].each do |data|
|
|
273
278
|
infrasp = data[:string]
|
|
274
279
|
rank = data[:rank]
|
|
275
|
-
name_ary << (rank && rank !=
|
|
280
|
+
name_ary << (rank && rank != "n/a" ? "#{rank} #{infrasp}" : infrasp)
|
|
276
281
|
end
|
|
277
|
-
parsed[:canonical] = name_ary.join(
|
|
282
|
+
parsed[:canonical] = name_ary.join(" ")
|
|
278
283
|
end
|
|
279
|
-
|
|
280
284
|
end
|
|
281
|
-
|