biodiversity 3.0.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/LICENSE +3 -1
- data/README.md +44 -10
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +19 -0
- data/spec/parser/test_data.txt +29 -22
- metadata +2 -5
data/CHANGELOG
CHANGED
data/LICENSE
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2008-2013 Marine Biological Laboratory
|
2
4
|
|
3
5
|
Permission is hereby granted, free of charge, to any person obtaining
|
4
6
|
a copy of this software and associated documentation files (the
|
data/README.md
CHANGED
@@ -46,7 +46,8 @@ you can use a socket server
|
|
46
46
|
parserver -h
|
47
47
|
Usage: parserver [options]
|
48
48
|
|
49
|
-
-r, --canonical_with_rank Adds infraspecies rank
|
49
|
+
-r, --canonical_with_rank Adds infraspecies rank
|
50
|
+
to canonical forms
|
50
51
|
|
51
52
|
-o, --output=output Specifies the type of the output:
|
52
53
|
json - parsed results in json
|
@@ -116,15 +117,18 @@ You can use it as a library in Ruby, JRuby etc.
|
|
116
117
|
parser.parse(" Plantago major ")[:scientificName][:normalized]
|
117
118
|
|
118
119
|
# to get only cleaned up latin part of the name
|
119
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U.
|
120
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. \
|
121
|
+
Braun & Crous 2003")[:scientificName][:canonical]
|
120
122
|
|
121
123
|
# to get detailed information about elements of the name
|
122
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U.
|
124
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. \
|
125
|
+
Braun & Crous 2003")[:scientificName][:details]
|
123
126
|
|
124
127
|
Returned result is not always linear, if name is complex. To get simple linear
|
125
128
|
representation of the name you can use:
|
126
129
|
|
127
|
-
parser.parse("Pseudocercospora dendrobii (H.C. Burnett)
|
130
|
+
parser.parse("Pseudocercospora dendrobii (H.C. Burnett) \
|
131
|
+
U. Braun & Crous 2003")[:scientificName][:position]
|
128
132
|
# returns {0=>["genus", 16], 17=>["species", 26],
|
129
133
|
# 28=>["author_word", 32], 33=>["author_word", 40],
|
130
134
|
# 42=>["author_word", 44], 45=>["author_word", 50],
|
@@ -134,34 +138,64 @@ representation of the name you can use:
|
|
134
138
|
# of the word, second element of the value is the character index
|
135
139
|
# of end of the word
|
136
140
|
|
141
|
+
'Surrogate' is a broad group which includes 'Barcode of Life' names, and various
|
142
|
+
undetermined names with cf. sp. spp. nr. in them:
|
143
|
+
|
144
|
+
parser.parse("Coleoptera BOLD:1234567")[:scientificName][:surrogate]
|
145
|
+
|
137
146
|
To parse using several CPUs (4 seem to be optimal)
|
138
147
|
|
139
148
|
parser = ParallelParser.new
|
140
149
|
# ParallelParser.new(4) will try to run 4 processes if hardware allows
|
141
150
|
array_of_names = ["Betula alba", "Homo sapiens"....]
|
142
151
|
parser.parse(array_of_names)
|
143
|
-
# Output: {"Betula alba" => {:scientificName...},
|
152
|
+
# Output: {"Betula alba" => {:scientificName...},
|
153
|
+
# "Homo sapiens" => {:scientificName...}, ...}
|
144
154
|
|
145
|
-
parallel parser takes list of names and returns back a hash with names as
|
155
|
+
parallel parser takes list of names and returns back a hash with names as
|
156
|
+
keys and parsed data as values
|
146
157
|
|
147
158
|
To get canonicals with ranks for infraspecific epithets:
|
148
159
|
|
149
160
|
parser = ScientificNameParser.new(canonical_with_rank: true)
|
150
|
-
parser.parse('Cola cordifolia var. puberula
|
161
|
+
parser.parse('Cola cordifolia var. puberula \
|
162
|
+
A. Chev.')[:scientificName][:canonical]
|
151
163
|
# Output: Cola cordifolia var. puberula
|
152
164
|
|
153
165
|
To resolve lsid and get back RDF file
|
154
166
|
|
155
167
|
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
156
168
|
|
169
|
+
Troubleshooting
|
170
|
+
---------------
|
171
|
+
|
172
|
+
If nnparse or parserver do not start -- try to run
|
173
|
+
|
174
|
+
gem uninstall biodiversity
|
175
|
+
gem uninstall biodiversity19
|
176
|
+
|
177
|
+
and make sure you remove all versions and all nnparse and parserver scripts.
|
178
|
+
Then install biodiversity again
|
179
|
+
|
180
|
+
gem install biodiversity
|
181
|
+
|
182
|
+
It should fix the problem.
|
183
|
+
|
184
|
+
Copyright
|
185
|
+
---------
|
186
|
+
|
187
|
+
Authors: [Dmitry Mozzherin][10],
|
157
188
|
|
189
|
+
Copyright (c) 2008-2013 Marine Biological Laboratory. See LICENSE for
|
190
|
+
further details.
|
158
191
|
|
159
|
-
[1]: https://badge.fury.io/rb/
|
160
|
-
[2]: http://badge.fury.io/rb/
|
192
|
+
[1]: https://badge.fury.io/rb/biodiversity.png
|
193
|
+
[2]: http://badge.fury.io/rb/biodiversity
|
161
194
|
[3]: https://secure.travis-ci.org/GlobalNamesArchitecture/biodiversity.png
|
162
195
|
[4]: http://travis-ci.org/GlobalNamesArchitecture/biodiversity
|
163
196
|
[5]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity.png
|
164
197
|
[6]: https://codeclimate.com/github/GlobalNamesArchitecture/biodiversity
|
165
198
|
[7]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity.png
|
166
199
|
[8]: https://gemnasium.com/GlobalNamesArchitecture/biodiversity
|
167
|
-
[9]:
|
200
|
+
[9]: http://bit.ly/149iLm5
|
201
|
+
[10]: https://github.com/dimus
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0
|
1
|
+
3.1.0
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -233,6 +233,7 @@ class ScientificNameParser
|
|
233
233
|
res[:details][0][:infraspecies])
|
234
234
|
ScientificNameParser.add_rank_to_canonical(res)
|
235
235
|
end
|
236
|
+
res[:surrogate] = true if ScientificNameParser.surrogate?(res)
|
236
237
|
res = {:scientificName => res}
|
237
238
|
end
|
238
239
|
|
@@ -250,6 +251,24 @@ class ScientificNameParser
|
|
250
251
|
|
251
252
|
private
|
252
253
|
|
254
|
+
def self.surrogate?(parsed_data)
|
255
|
+
return false unless parsed_data[:parsed]
|
256
|
+
name = parsed_data[:verbatim]
|
257
|
+
pos = parsed_data[:positions].to_a.flatten
|
258
|
+
surrogate1 = /BOLD:|[\d]{5,}/i
|
259
|
+
surrogate2 = /(spp|sp|nr|cf)[\.]?[\s]*$/i
|
260
|
+
is_surrogate = false
|
261
|
+
|
262
|
+
ai_index = pos.index('annotation_identification')
|
263
|
+
if ai_index
|
264
|
+
ai = name[pos[ai_index - 1]..pos[ai_index + 1]]
|
265
|
+
is_surrogate = true if ai.match(/^(spp|cf|sp|nr)/)
|
266
|
+
end
|
267
|
+
is_surrogate = true if !is_surrogate && (name.match(surrogate1) ||
|
268
|
+
name.match(surrogate2))
|
269
|
+
is_surrogate
|
270
|
+
end
|
271
|
+
|
253
272
|
def self.add_rank_to_canonical(parsed)
|
254
273
|
parts = parsed[:canonical].split(' ')
|
255
274
|
name_ary = parts[0..1]
|
data/spec/parser/test_data.txt
CHANGED
@@ -171,27 +171,27 @@ Euastrum divergens var. rhodesiense f. coronulum A.M. Scott & Prescott|{"scienti
|
|
171
171
|
#species and infraspecies without epithets, comparisons
|
172
172
|
Acarinina aff. pentacamerata|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Acarinina aff. pentacamerata", "normalized":"Acarinina", "canonical":"Acarinina", "hybrid":false, "details":[{"genus":{"string":"Acarinina"}, "annotation_identification":"aff.", "ignored":{"species":{"string":"pentacamerata"}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["annotation_identification", 14]}}}
|
173
173
|
Acarinina aff pentacamerata|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Acarinina aff pentacamerata", "normalized":"Acarinina", "canonical":"Acarinina", "hybrid":false, "details":[{"genus":{"string":"Acarinina"}, "annotation_identification":"aff", "ignored":{"species":{"string":"pentacamerata"}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["annotation_identification", 14]}}}
|
174
|
-
Abturia cf. alabamensis (Morton )|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Abturia cf. alabamensis (Morton )", "normalized":"Abturia cf. alabamensis (Morton)", "canonical":"Abturia alabamensis", "hybrid":false, "details":[{"genus":{"string":"Abturia"}, "annotation_identification":"cf.", "species":{"species":{"string":"alabamensis", "authorship":"(Morton )", "basionymAuthorTeam":{"authorTeam":"Morton", "author":["Morton"]}}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["annotation_identification", 11], "12":["species", 23], "25":["author_word", 31]}}}
|
175
|
-
Abturia cf alabamensis (Morton )|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Abturia cf alabamensis (Morton )", "normalized":"Abturia cf alabamensis (Morton)", "canonical":"Abturia alabamensis", "hybrid":false, "details":[{"genus":{"string":"Abturia"}, "annotation_identification":"cf", "species":{"species":{"string":"alabamensis", "authorship":"(Morton )", "basionymAuthorTeam":{"authorTeam":"Morton", "author":["Morton"]}}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["annotation_identification", 11], "11":["species", 22], "24":["author_word", 30]}}}
|
174
|
+
Abturia cf. alabamensis (Morton )|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Abturia cf. alabamensis (Morton )", "normalized":"Abturia cf. alabamensis (Morton)", "canonical":"Abturia alabamensis", "hybrid":false, "details":[{"genus":{"string":"Abturia"}, "annotation_identification":"cf.", "species":{"species":{"string":"alabamensis", "authorship":"(Morton )", "basionymAuthorTeam":{"authorTeam":"Morton", "author":["Morton"]}}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["annotation_identification", 11], "12":["species", 23], "25":["author_word", 31]}, "surrogate": true}}
|
175
|
+
Abturia cf alabamensis (Morton )|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Abturia cf alabamensis (Morton )", "normalized":"Abturia cf alabamensis (Morton)", "canonical":"Abturia alabamensis", "hybrid":false, "details":[{"genus":{"string":"Abturia"}, "annotation_identification":"cf", "species":{"species":{"string":"alabamensis", "authorship":"(Morton )", "basionymAuthorTeam":{"authorTeam":"Morton", "author":["Morton"]}}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["annotation_identification", 11], "11":["species", 22], "24":["author_word", 30]}, "surrogate": true}}
|
176
176
|
Alyxia reinwardti var|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
177
177
|
Alyxia reinwardti var.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti var.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
178
|
-
Alyxia reinwardti ssp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
179
|
-
Alyxia reinwardti ssp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}}}
|
180
|
-
Alaria spp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
|
181
|
-
Alaria spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp.", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}}}
|
182
|
-
Xenodon sp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Xenodon sp", "normalized":"Xenodon", "canonical":"Xenodon", "hybrid":false, "details":[{"uninomial":{"string":"Xenodon"}}], "parser_run":1, "positions":{"0":["uninomial", 7]}}}
|
183
|
-
Xenodon sp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Xenodon sp.", "normalized":"Xenodon", "canonical":"Xenodon", "hybrid":false, "details":[{"uninomial":{"string":"Xenodon"}}], "parser_run":1, "positions":{"0":["uninomial", 7]}}}
|
184
|
-
Formicidae cf.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Formicidae cf.", "normalized":"Formicidae", "canonical":"Formicidae", "hybrid":false, "details":[{"uninomial":{"string":"Formicidae"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}}}
|
185
|
-
Formicidae cf|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Formicidae cf", "normalized":"Formicidae", "canonical":"Formicidae", "hybrid":false, "details":[{"uninomial":{"string":"Formicidae"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}}}
|
186
|
-
Acastoides spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Acastoides spp.", "normalized":"Acastoides", "canonical":"Acastoides", "hybrid":false, "details":[{"uninomial":{"string":"Acastoides"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}}}
|
187
|
-
Sphingomonas sp. 37|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sphingomonas sp. 37", "normalized":"Sphingomonas", "canonical":"Sphingomonas", "hybrid":false, "details":[{"genus":{"string":"Sphingomonas"}, "annotation_identification":"sp.", "ignored":{"unparsed":"37"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["annotation_identification", 16]}}}
|
188
|
-
Thryothorus leucotis spp. bogotensis|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Thryothorus leucotis spp. bogotensis", "normalized":"Thryothorus leucotis", "canonical":"Thryothorus leucotis", "hybrid":false, "details":[{"genus":{"string":"Thryothorus"}, "species":{"string":"leucotis"}, "infraspecies":[{"annotation_identification":"spp.", "ignored":{"infraspecies":{"string":"bogotensis", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 11], "12":["species", 20], "21":["annotation_identification", 25]}}}
|
189
|
-
Endoxyla sp. GM-, 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Endoxyla sp. GM-, 2003", "normalized":"Endoxyla", "canonical":"Endoxyla", "hybrid":false, "details":[{"genus":{"string":"Endoxyla"}, "annotation_identification":"sp.", "ignored":{"unparsed":"GM-, 2003"}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["annotation_identification", 12]}}}
|
190
|
-
Liopropoma sp.2 Not applicable|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Liopropoma sp.2 Not applicable", "normalized":"Liopropoma", "canonical":"Liopropoma", "hybrid":false, "details":[{"genus":{"string":"Liopropoma"}, "annotation_identification":"sp.", "ignored":{"unparsed":"2 Not applicable"}}], "parser_run":1, "positions":{"0":["genus", 10], "11":["annotation_identification", 14]}}}
|
191
|
-
Lacanobia nr. subjuncta Bold:Aab, 0925|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lacanobia nr. subjuncta Bold:Aab, 0925", "normalized":"Lacanobia", "canonical":"Lacanobia", "hybrid":false, "details":[{"genus":{"string":"Lacanobia"}, "annotation_identification":"nr.", "ignored":{"species":{"string":"subjuncta", "authorship":"Bold:Aab", "basionymAuthorTeam":{"authorTeam":"Bold:Aab", "author":["Bold:Aab"]}}}}], "parser_run":2, "positions":{"0":["genus", 9], "10":["annotation_identification", 13]}}}
|
192
|
-
Lacanobia sp. nr. subjuncta Bold:Aab, 0925|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lacanobia sp. nr. subjuncta Bold:Aab, 0925", "normalized":"Lacanobia", "canonical":"Lacanobia", "hybrid":false, "details":[{"genus":{"string":"Lacanobia"}, "annotation_identification":"sp. nr.", "ignored":{"species":{"string":"subjuncta", "authorship":"Bold:Aab", "basionymAuthorTeam":{"authorTeam":"Bold:Aab", "author":["Bold:Aab"]}}}}], "parser_run":2, "positions":{"0":["genus", 9], "10":["annotation_identification", 17]}}}
|
178
|
+
Alyxia reinwardti ssp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}, "surrogate": true}}
|
179
|
+
Alyxia reinwardti ssp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alyxia reinwardti ssp.", "normalized":"Alyxia reinwardti", "canonical":"Alyxia reinwardti", "hybrid":false, "details":[{"genus":{"string":"Alyxia"}, "species":{"string":"reinwardti"}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17]}, "surrogate": true}}
|
180
|
+
Alaria spp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}, "surrogate": true}}
|
181
|
+
Alaria spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Alaria spp.", "normalized":"Alaria", "canonical":"Alaria", "hybrid":false, "details":[{"uninomial":{"string":"Alaria"}}], "parser_run":1, "positions":{"0":["uninomial", 6]}, "surrogate": true}}
|
182
|
+
Xenodon sp|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Xenodon sp", "normalized":"Xenodon", "canonical":"Xenodon", "hybrid":false, "details":[{"uninomial":{"string":"Xenodon"}}], "parser_run":1, "positions":{"0":["uninomial", 7]}, "surrogate": true}}
|
183
|
+
Xenodon sp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Xenodon sp.", "normalized":"Xenodon", "canonical":"Xenodon", "hybrid":false, "details":[{"uninomial":{"string":"Xenodon"}}], "parser_run":1, "positions":{"0":["uninomial", 7]}, "surrogate": true}}
|
184
|
+
Formicidae cf.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Formicidae cf.", "normalized":"Formicidae", "canonical":"Formicidae", "hybrid":false, "details":[{"uninomial":{"string":"Formicidae"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}, "surrogate": true}}
|
185
|
+
Formicidae cf|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Formicidae cf", "normalized":"Formicidae", "canonical":"Formicidae", "hybrid":false, "details":[{"uninomial":{"string":"Formicidae"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}, "surrogate": true}}
|
186
|
+
Acastoides spp.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Acastoides spp.", "normalized":"Acastoides", "canonical":"Acastoides", "hybrid":false, "details":[{"uninomial":{"string":"Acastoides"}}], "parser_run":1, "positions":{"0":["uninomial", 10]}, "surrogate": true}}
|
187
|
+
Sphingomonas sp. 37|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sphingomonas sp. 37", "normalized":"Sphingomonas", "canonical":"Sphingomonas", "hybrid":false, "details":[{"genus":{"string":"Sphingomonas"}, "annotation_identification":"sp.", "ignored":{"unparsed":"37"}}], "parser_run":1, "positions":{"0":["genus", 12], "13":["annotation_identification", 16]}, "surrogate": true}}
|
188
|
+
Thryothorus leucotis spp. bogotensis|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Thryothorus leucotis spp. bogotensis", "normalized":"Thryothorus leucotis", "canonical":"Thryothorus leucotis", "hybrid":false, "details":[{"genus":{"string":"Thryothorus"}, "species":{"string":"leucotis"}, "infraspecies":[{"annotation_identification":"spp.", "ignored":{"infraspecies":{"string":"bogotensis", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 11], "12":["species", 20], "21":["annotation_identification", 25]}, "surrogate": true}}
|
189
|
+
Endoxyla sp. GM-, 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Endoxyla sp. GM-, 2003", "normalized":"Endoxyla", "canonical":"Endoxyla", "hybrid":false, "details":[{"genus":{"string":"Endoxyla"}, "annotation_identification":"sp.", "ignored":{"unparsed":"GM-, 2003"}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["annotation_identification", 12]}, "surrogate": true}}
|
190
|
+
Liopropoma sp.2 Not applicable|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Liopropoma sp.2 Not applicable", "normalized":"Liopropoma", "canonical":"Liopropoma", "hybrid":false, "details":[{"genus":{"string":"Liopropoma"}, "annotation_identification":"sp.", "ignored":{"unparsed":"2 Not applicable"}}], "parser_run":1, "positions":{"0":["genus", 10], "11":["annotation_identification", 14]}, "surrogate": true}}
|
191
|
+
Lacanobia nr. subjuncta Bold:Aab, 0925|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lacanobia nr. subjuncta Bold:Aab, 0925", "normalized":"Lacanobia", "canonical":"Lacanobia", "hybrid":false, "details":[{"genus":{"string":"Lacanobia"}, "annotation_identification":"nr.", "ignored":{"species":{"string":"subjuncta", "authorship":"Bold:Aab", "basionymAuthorTeam":{"authorTeam":"Bold:Aab", "author":["Bold:Aab"]}}}}], "parser_run":2, "positions":{"0":["genus", 9], "10":["annotation_identification", 13]}, "surrogate": true}}
|
192
|
+
Lacanobia sp. nr. subjuncta Bold:Aab, 0925|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Lacanobia sp. nr. subjuncta Bold:Aab, 0925", "normalized":"Lacanobia", "canonical":"Lacanobia", "hybrid":false, "details":[{"genus":{"string":"Lacanobia"}, "annotation_identification":"sp. nr.", "ignored":{"species":{"string":"subjuncta", "authorship":"Bold:Aab", "basionymAuthorTeam":{"authorTeam":"Bold:Aab", "author":["Bold:Aab"]}}}}], "parser_run":2, "positions":{"0":["genus", 9], "10":["annotation_identification", 17]}, "surrogate": true}}
|
193
193
|
#Larus occidentalis cf. wymani|{}
|
194
|
-
Calidris cf. cooperi|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calidris cf. cooperi", "normalized":"Calidris cf. cooperi", "canonical":"Calidris cooperi", "hybrid":false, "details":[{"genus":{"string":"Calidris"}, "annotation_identification":"cf.", "species":{"species":{"string":"cooperi"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["annotation_identification", 12], "13":["species", 20]}}}
|
194
|
+
Calidris cf. cooperi|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calidris cf. cooperi", "normalized":"Calidris cf. cooperi", "canonical":"Calidris cooperi", "hybrid":false, "details":[{"genus":{"string":"Calidris"}, "annotation_identification":"cf.", "species":{"species":{"string":"cooperi"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["annotation_identification", 12], "13":["species", 20]}, "surrogate": true}}
|
195
195
|
#TODO:Gemmula cf. cosmoi NP-2008 -- generates wrong authorship
|
196
196
|
|
197
197
|
#unknown authorship
|
@@ -393,12 +393,12 @@ Zygaena witti Wiegel [1973]|{"scientificName":{"parsed":true, "parser_version":"
|
|
393
393
|
Deyeuxia coarctata Kunth, 1815 [1816]|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Deyeuxia coarctata Kunth, 1815 [1816]","normalized":"Deyeuxia coarctata Kunth 1815 (1816)","canonical":"Deyeuxia coarctata","hybrid":false,"details":[{"genus":{"string":"Deyeuxia"},"species":{"string":"coarctata","authorship":"Kunth, 1815 [1816]","basionymAuthorTeam":{"authorTeam":"Kunth","author":["Kunth"],"year":"1815","approximate_year":"(1816)"}}}],"positions":{"0":["genus",8],"9":["species",18],"19":["author_word",24],"26":["year",30],"32":["year",36]}}}
|
394
394
|
|
395
395
|
#names with garbage at the end
|
396
|
-
Morea (Morea) Burt 2342343242 23424322342 23424234|{"scientificName":{"parsed":true, "verbatim":"Morea (Morea) Burt 2342343242 23424322342 23424234", "normalized":"Morea (Morea) Burt", "canonical":"Morea", "hybrid":false, "details":[{"uninomial":{"string":"Morea", "infragenus":{"string":"Morea"}, "authorship":"Burt", "basionymAuthorTeam":{"authorTeam":"Burt", "author":["Burt"]}}}],
|
396
|
+
Morea (Morea) Burt 2342343242 23424322342 23424234|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Morea (Morea) Burt 2342343242 23424322342 23424234", "normalized":"Morea (Morea) Burt", "canonical":"Morea", "hybrid":false, "details":[{"uninomial":{"string":"Morea", "infragenus":{"string":"Morea"}, "authorship":"Burt", "basionymAuthorTeam":{"authorTeam":"Burt", "author":["Burt"]}}}], "parser_run":2, "positions":{"0":["uninomial", 5], "7":["infragenus", 12], "14":["author_word", 18]}, "surrogate":true}}
|
397
397
|
Nautilus asterizans von|{"scientificName":{"parsed":true, "verbatim":"Nautilus asterizans von", "normalized":"Nautilus asterizans", "canonical":"Nautilus asterizans", "hybrid":false, "details":[{"genus":{"string":"Nautilus"}, "species":{"string":"asterizans"}}], "parser_version":"test_version", "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 19]}}}
|
398
398
|
|
399
399
|
#salvage canonical
|
400
|
-
Morea ssjjlajajaj324$33 234243242|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Morea ssjjlajajaj324$33 234243242", "normalized":"Morea", "canonical":"Morea", "hybrid":false, "details":[{"uninomial":{"string":"Morea"}}], "parser_run":2, "positions":{"0":["uninomial", 5]}}}
|
401
|
-
Morea (Morea) burtius 2342343242 23424322342 23424234|{"scientificName":{"parsed":true, "parser_version":"test_version", "
|
400
|
+
Morea ssjjlajajaj324$33 234243242|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Morea ssjjlajajaj324$33 234243242", "normalized":"Morea", "canonical":"Morea", "hybrid":false, "details":[{"uninomial":{"string":"Morea"}}], "parser_run":2, "positions":{"0":["uninomial", 5]}, "surrogate":true}}
|
401
|
+
Morea (Morea) burtius 2342343242 23424322342 23424234|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Morea (Morea) burtius 2342343242 23424322342 23424234", "normalized":"Morea (Morea) burtius", "canonical":"Morea burtius", "hybrid":false, "details":[{"genus":{"string":"Morea"}, "infragenus":{"string":"Morea"}, "species":{"string":"burtius"}}], "parser_run":2, "positions":{"0":["genus", 5], "7":["infragenus", 12], "14":["species", 21]}, "surrogate":true}}
|
402
402
|
Verpericola megasoma ""Dall" Pils.|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Verpericola megasoma \"\"Dall\" Pils.","normalized":"Verpericola megasoma","canonical":"Verpericola megasoma","hybrid":false,"details":[{"genus":{"string":"Verpericola"},"species":{"string":"megasoma"}}],"positions":{"0":["genus",11],"12":["species",20]}}}
|
403
403
|
Verpericola megasoma "Dall" Pils.|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Verpericola megasoma \"Dall\" Pils.", "normalized":"Verpericola megasoma", "canonical":"Verpericola megasoma", "hybrid":false, "details":[{"genus":{"string":"Verpericola"}, "species":{"string":"megasoma"}}], "parser_run":2, "positions":{"0":["genus", 11], "12":["species", 20]}}}
|
404
404
|
Moraea spathulata ( (L. f. Klatt|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Moraea spathulata ( (L. f. Klatt","normalized":"Moraea spathulata","canonical":"Moraea spathulata","hybrid":false,"details":[{"genus":{"string":"Moraea"},"species":{"string":"spathulata"}}],"positions":{"0":["genus",6],"7":["species",17]}}}
|
@@ -412,5 +412,12 @@ Kerana var. cameroni|{"scientificName":{"parsed":true, "parser_version":"test_ve
|
|
412
412
|
#should parse names which previously caused prolems:
|
413
413
|
Buteo borealis ? ventralis|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Buteo borealis ? ventralis", "normalized":"Buteo borealis", "canonical":"Buteo borealis", "hybrid":false, "details":[{"genus":{"string":"Buteo"}, "species":{"string":"borealis"}, "infraspecies":[{"annotation_identification":"?", "ignored":{"infraspecies":{"string":"ventralis", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 5], "6":["species", 14], "15":["annotation_identification", 17]}}}
|
414
414
|
Cetraria islandica ? islandica|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Cetraria islandica ? islandica", "normalized":"Cetraria islandica", "canonical":"Cetraria islandica", "hybrid":false, "details":[{"genus":{"string":"Cetraria"}, "species":{"string":"islandica"}, "infraspecies":[{"annotation_identification":"?", "ignored":{"infraspecies":{"string":"islandica", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["annotation_identification", 21]}}}
|
415
|
-
Euxoa nr. idahoensis sp. 1clay|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Euxoa nr. idahoensis sp. 1clay", "normalized":"Euxoa idahoensis", "canonical":"Euxoa idahoensis", "hybrid":false, "details":[{"genus":{"string":"Euxoa"}, "species":{"string":"idahoensis"}, "infraspecies":[{"annotation_identification":"sp.", "ignored":{"infraspecies":{"string":"uniclay", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 5], "10":["species", 20], "21":["annotation_identification", 24]}}}
|
415
|
+
Euxoa nr. idahoensis sp. 1clay|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Euxoa nr. idahoensis sp. 1clay", "normalized":"Euxoa idahoensis", "canonical":"Euxoa idahoensis", "hybrid":false, "details":[{"genus":{"string":"Euxoa"}, "species":{"string":"idahoensis"}, "infraspecies":[{"annotation_identification":"sp.", "ignored":{"infraspecies":{"string":"uniclay", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 5], "10":["species", 20], "21":["annotation_identification", 24]}, "surrogate": true}}
|
416
416
|
Parus caeruleus species complex|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Parus caeruleus species complex", "normalized":"Parus caeruleus", "canonical":"Parus caeruleus", "hybrid":false, "details":[{"genus":{"string":"Parus"}, "species":{"string":"caeruleus"}}], "parser_run":1, "positions":{"0":["genus", 5], "6":["species", 15]}}}
|
417
|
+
|
418
|
+
#should flag surrogate names
|
419
|
+
Coleoptera sp. BOLD:AAV0432|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Coleoptera sp. BOLD:AAV0432", "normalized":"Coleoptera", "canonical":"Coleoptera", "hybrid":false, "details":[{"genus":{"string":"Coleoptera"}, "annotation_identification":"sp.", "ignored":{"unparsed":"BOLD:AAV0432"}}], "parser_run":1, "positions":{"0":["genus", 10], "11":["annotation_identification", 14]}, "surrogate":true}}
|
420
|
+
Coleoptera Bold:AAV0432|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Coleoptera Bold:AAV0432", "normalized":"Coleoptera", "canonical":"Coleoptera", "hybrid":false, "details":[{"uninomial":{"string":"Coleoptera"}}], "parser_run":2, "positions":{"0":["uninomial", 10]}, "surrogate":true}}
|
421
|
+
|
422
|
+
#should not flag as surrogates similar to surrogates names
|
423
|
+
Dryopteris X separabilis Small (pro sp.)|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Dryopteris X separabilis Small (pro sp.)", "normalized":"Dryopteris × separabilis Small", "canonical":"Dryopteris separabilis", "hybrid":false, "details":[{"genus":{"string":"Dryopteris"}, "species":{"string":"separabilis", "authorship":"Small", "basionymAuthorTeam":{"authorTeam":"Small", "author":["Small"]}}}], "parser_run":2, "positions":{"0":["genus", 10], "13":["species", 24], "25":["author_word", 30]}}}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-17 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -209,9 +209,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
209
209
|
- - ! '>='
|
210
210
|
- !ruby/object:Gem::Version
|
211
211
|
version: '0'
|
212
|
-
segments:
|
213
|
-
- 0
|
214
|
-
hash: -3305280802339019228
|
215
212
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
216
213
|
none: false
|
217
214
|
requirements:
|