biodiversity19 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +6 -0
- data/spec/parser/test_data.txt +9 -0
- metadata +3 -3
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.1
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -50,6 +50,10 @@ class ScientificNameParser
|
|
50
50
|
!!(a_string.match(/\sICTV\s*$/) || a_string.match(/\s(virus|phage|viroid|satellite|prion)\b/i))
|
51
51
|
end
|
52
52
|
|
53
|
+
def unknown_placement?(a_string)
|
54
|
+
!!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i))
|
55
|
+
end
|
56
|
+
|
53
57
|
def parsed
|
54
58
|
@parsed
|
55
59
|
end
|
@@ -60,6 +64,8 @@ class ScientificNameParser
|
|
60
64
|
|
61
65
|
if virus?(a_string)
|
62
66
|
@parsed = { :verbatim => a_string, :virus => true }
|
67
|
+
elsif unknown_placement?(a_string)
|
68
|
+
@parsed = { :verbatim => a_string }
|
63
69
|
else
|
64
70
|
@parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || { :verbatim => a_string }
|
65
71
|
end
|
data/spec/parser/test_data.txt
CHANGED
@@ -245,6 +245,15 @@ Achillea bonarota nom. in herb.|{"scientificName":{"parsed":true, "verbatim":"Ac
|
|
245
245
|
Aconitum napellus var. formosum (Rchb.) W. D. J. Koch (nom. ambig.)|{"scientificName":{"parsed":true, "verbatim":"Aconitum napellus var. formosum (Rchb.) W. D. J. Koch (nom. ambig.)", "normalized":"Aconitum napellus var. formosum (Rchb.) W. D. J. Koch", "canonical":"Aconitum napellus formosum", "hybrid":false, "details":[{"genus":{"string":"Aconitum"}, "species":{"string":"napellus"}, "infraspecies":[{"string":"formosum", "rank":"var.", "authorship":"(Rchb.) W. D. J. Koch", "combinationAuthorTeam":{"authorTeam":"W. D. J. Koch", "author":["W. D. J. Koch"]}, "basionymAuthorTeam":{"authorTeam":"Rchb.", "author":["Rchb."]}}]}], "parser_version":"test_version", "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 17], "23":["infraspecies", 31], "33":["author_word", 38], "40":["author_word", 42], "43":["author_word", 45], "46":["author_word", 48], "49":["author_word", 53]}}}
|
246
246
|
× Dialaeliopsis Hort.|{"scientificName":{"parsed":true, "verbatim":"× Dialaeliopsis Hort.", "normalized":"× Dialaeliopsis", "canonical":"Dialaeliopsis", "hybrid":true, "details":[{"uninomial":{"string":"Dialaeliopsis"}}], "parser_version":"test_version", "parser_run":1, "positions":{"2":["uninomial", 15]}}}
|
247
247
|
|
248
|
+
#should not parse incertae sedis
|
249
|
+
|
250
|
+
Incertae sedis|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Incertae sedis"}}
|
251
|
+
incertae sedis|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"incertae sedis"}}
|
252
|
+
Inc. sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Inc. sed."}}
|
253
|
+
inc.sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"inc.sed."}}
|
254
|
+
inc. sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"inc. sed."}}
|
255
|
+
|
256
|
+
|
248
257
|
#should not parse viruses
|
249
258
|
Abutilon mosaic virus [X15983] [X15984] Abutilon mosaic virus ICTV|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Abutilon mosaic virus [X15983] [X15984] Abutilon mosaic virus ICTV", "virus":true}}
|
250
259
|
Acute bee paralysis virus [AF150629] Acute bee paralysis virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Acute bee paralysis virus [AF150629] Acute bee paralysis virus", "virus":true}}
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 7
|
8
|
-
-
|
9
|
-
version: 0.7.
|
8
|
+
- 1
|
9
|
+
version: 0.7.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Dmitry Mozzherin
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-09-
|
17
|
+
date: 2010-09-17 00:00:00 -04:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|