biodiversity 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/biodiversity/parser.rb +6 -0
- data/spec/parser/test_data.txt +9 -0
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.1
|
data/lib/biodiversity/parser.rb
CHANGED
@@ -50,6 +50,10 @@ class ScientificNameParser
|
|
50
50
|
!!(a_string.match(/\sICTV\s*$/) || a_string.match(/\s(virus|phage|viroid|satellite|prion)\b/i))
|
51
51
|
end
|
52
52
|
|
53
|
+
def unknown_placement?(a_string)
|
54
|
+
!!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i))
|
55
|
+
end
|
56
|
+
|
53
57
|
def parsed
|
54
58
|
@parsed
|
55
59
|
end
|
@@ -60,6 +64,8 @@ class ScientificNameParser
|
|
60
64
|
|
61
65
|
if virus?(a_string)
|
62
66
|
@parsed = { :verbatim => a_string, :virus => true }
|
67
|
+
elsif unknown_placement?(a_string)
|
68
|
+
@parsed = { :verbatim => a_string }
|
63
69
|
else
|
64
70
|
@parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || { :verbatim => a_string }
|
65
71
|
end
|
data/spec/parser/test_data.txt
CHANGED
@@ -245,6 +245,15 @@ Achillea bonarota nom. in herb.|{"scientificName":{"parsed":true, "verbatim":"Ac
|
|
245
245
|
Aconitum napellus var. formosum (Rchb.) W. D. J. Koch (nom. ambig.)|{"scientificName":{"parsed":true, "verbatim":"Aconitum napellus var. formosum (Rchb.) W. D. J. Koch (nom. ambig.)", "normalized":"Aconitum napellus var. formosum (Rchb.) W. D. J. Koch", "canonical":"Aconitum napellus formosum", "hybrid":false, "details":[{"genus":{"string":"Aconitum"}, "species":{"string":"napellus"}, "infraspecies":[{"string":"formosum", "rank":"var.", "authorship":"(Rchb.) W. D. J. Koch", "combinationAuthorTeam":{"authorTeam":"W. D. J. Koch", "author":["W. D. J. Koch"]}, "basionymAuthorTeam":{"authorTeam":"Rchb.", "author":["Rchb."]}}]}], "parser_version":"test_version", "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 17], "23":["infraspecies", 31], "33":["author_word", 38], "40":["author_word", 42], "43":["author_word", 45], "46":["author_word", 48], "49":["author_word", 53]}}}
|
246
246
|
× Dialaeliopsis Hort.|{"scientificName":{"parsed":true, "verbatim":"× Dialaeliopsis Hort.", "normalized":"× Dialaeliopsis", "canonical":"Dialaeliopsis", "hybrid":true, "details":[{"uninomial":{"string":"Dialaeliopsis"}}], "parser_version":"test_version", "parser_run":1, "positions":{"2":["uninomial", 15]}}}
|
247
247
|
|
248
|
+
#should not parse incertae sedis
|
249
|
+
|
250
|
+
Incertae sedis|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Incertae sedis"}}
|
251
|
+
incertae sedis|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"incertae sedis"}}
|
252
|
+
Inc. sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Inc. sed."}}
|
253
|
+
inc.sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"inc.sed."}}
|
254
|
+
inc. sed.|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"inc. sed."}}
|
255
|
+
|
256
|
+
|
248
257
|
#should not parse viruses
|
249
258
|
Abutilon mosaic virus [X15983] [X15984] Abutilon mosaic virus ICTV|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Abutilon mosaic virus [X15983] [X15984] Abutilon mosaic virus ICTV", "virus":true}}
|
250
259
|
Acute bee paralysis virus [AF150629] Acute bee paralysis virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Acute bee paralysis virus [AF150629] Acute bee paralysis virus", "virus":true}}
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 1
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 7
|
9
|
-
-
|
10
|
-
version: 0.7.
|
9
|
+
- 1
|
10
|
+
version: 0.7.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Dmitry Mozzherin
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-17 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|