biodiversity 3.1.8 → 3.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/lib/biodiversity/parser.rb +11 -7
- data/lib/biodiversity/parser/scientific_name_clean.treetop +3 -9
- data/lib/biodiversity/version.rb +1 -1
- data/spec/files/test_data.txt +20 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcbab08fed86591b64c378a48328bf6163cbe778
|
4
|
+
data.tar.gz: 318aec7ddf0deaf30d9b849f06b499a22f148100
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 37a5ec870231ed2d80b3a1648d66bd3327b48abfd259354900c0f0114233bf591c6e9ca9b5b09f530c2c5c66c319d39bc5452ca57f853fb9c5efb417b54eb18b
|
7
|
+
data.tar.gz: f907d03307a85fc72eea15f1a423cc6622b81acabe08b07da19da97588021d65cd90e514cd519126166e3d4baba97ce31341f7f0da040e91dc79b50a3345deff
|
data/CHANGELOG
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
@@ -161,14 +161,18 @@ class ScientificNameParser
|
|
161
161
|
|
162
162
|
def virus?(a_string)
|
163
163
|
!!(a_string.match(/\sICTV\s*$/) ||
|
164
|
-
a_string.match(/\b(virus|viruses|
|
165
|
-
phage|phages|viroid|viroids|
|
166
|
-
|
167
|
-
a_string.match(/[A-Z]?[a-z]+virus\b/)
|
164
|
+
a_string.match(/\b(virus|viruses|particle|particles|
|
165
|
+
phage|phages|viroid|viroids|virophage|
|
166
|
+
prion|prions|NPV)\b/ix) ||
|
167
|
+
a_string.match(/[A-Z]?[a-z]+virus\b/) ||
|
168
|
+
a_string.match(/\b[A-Za-z]*satellite[s]?\b/))
|
168
169
|
end
|
169
170
|
|
170
|
-
def
|
171
|
-
|
171
|
+
def noparse?(a_string)
|
172
|
+
incertae_sedis = a_string.match(/incertae\s+sedis/i) ||
|
173
|
+
a_string.match(/inc\.\s*sed\./i)
|
174
|
+
rna = a_string.match(/[^A-Z]RNA[^A-Z]*/)
|
175
|
+
incertae_sedis || rna
|
172
176
|
end
|
173
177
|
|
174
178
|
def parsed
|
@@ -181,7 +185,7 @@ class ScientificNameParser
|
|
181
185
|
|
182
186
|
if virus?(a_string)
|
183
187
|
@parsed = { verbatim: a_string, virus: true }
|
184
|
-
elsif
|
188
|
+
elsif noparse?(a_string)
|
185
189
|
@parsed = { verbatim: a_string }
|
186
190
|
else
|
187
191
|
begin
|
@@ -1372,15 +1372,9 @@ grammar ScientificNameClean
|
|
1372
1372
|
end
|
1373
1373
|
}
|
1374
1374
|
/
|
1375
|
-
"
|
1375
|
+
a:valid_name_letter "'" b:latin_word {
|
1376
1376
|
def value
|
1377
|
-
"
|
1378
|
-
end
|
1379
|
-
}
|
1380
|
-
/
|
1381
|
-
"o'neili" {
|
1382
|
-
def value
|
1383
|
-
"oneili"
|
1377
|
+
a.value + "'" + b.value
|
1384
1378
|
end
|
1385
1379
|
}
|
1386
1380
|
/
|
@@ -1388,7 +1382,7 @@ grammar ScientificNameClean
|
|
1388
1382
|
def value
|
1389
1383
|
a.value + b.value
|
1390
1384
|
end
|
1391
|
-
|
1385
|
+
}
|
1392
1386
|
end
|
1393
1387
|
|
1394
1388
|
rule valid_name_letters
|
data/lib/biodiversity/version.rb
CHANGED
data/spec/files/test_data.txt
CHANGED
@@ -63,7 +63,13 @@ Mo. alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"te
|
|
63
63
|
Mom.alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Mom.alpium (Osbeck, 1778)", "normalized":"Mom. alpium (Osbeck 1778)", "canonical":"Mom. alpium", "hybrid":false, "details":[{"genus":{"string":"Mom."}, "species":{"string":"alpium", "authorship":"(Osbeck, 1778)", "basionymAuthorTeam":{"authorTeam":"Osbeck", "author":["Osbeck"], "year":"1778"}}}], "parser_run":1, "positions":{"0":["genus", 4], "4":["species", 10], "12":["author_word", 18], "20":["year", 24]}}}
|
64
64
|
|
65
65
|
#binomial with apostrophe in species epithet
|
66
|
-
Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia
|
66
|
+
Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia o'donelli Moldenke 1946", "canonical":"Junellia o'donelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"o'donelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
|
67
|
+
Trophon d'orbignyi Carcelles, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Trophon d'orbignyi Carcelles, 1946", "normalized":"Trophon d'orbignyi Carcelles 1946", "canonical":"Trophon d'orbignyi", "hybrid":false, "details":[{"genus":{"string":"Trophon"}, "species":{"string":"d'orbignyi", "authorship":"Carcelles, 1946", "basionymAuthorTeam":{"authorTeam":"Carcelles", "author":["Carcelles"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 18], "19":["author_word", 28], "30":["year", 34]}}}
|
68
|
+
Arca m'coyi Tenison-Woods, 1878|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Arca m'coyi Tenison-Woods, 1878", "normalized":"Arca m'coyi Tenison-Woods 1878", "canonical":"Arca m'coyi", "hybrid":false, "details":[{"genus":{"string":"Arca"}, "species":{"string":"m'coyi", "authorship":"Tenison-Woods, 1878", "basionymAuthorTeam":{"authorTeam":"Tenison-Woods", "author":["Tenison-Woods"], "year":"1878"}}}], "parser_run":1, "positions":{"0":["genus", 4], "5":["species", 11], "12":["author_word", 25], "27":["year", 31]}}}
|
69
|
+
Nucula m'andrewii Hanley, 1860|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Nucula m'andrewii Hanley, 1860", "normalized":"Nucula m'andrewii Hanley 1860", "canonical":"Nucula m'andrewii", "hybrid":false, "details":[{"genus":{"string":"Nucula"}, "species":{"string":"m'andrewii", "authorship":"Hanley, 1860", "basionymAuthorTeam":{"authorTeam":"Hanley", "author":["Hanley"], "year":"1860"}}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17], "18":["author_word", 24], "26":["year", 30]}}}
|
70
|
+
Eristalis l'herminierii Macquart|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Eristalis l'herminierii Macquart", "normalized":"Eristalis l'herminierii Macquart", "canonical":"Eristalis l'herminierii", "hybrid":false, "details":[{"genus":{"string":"Eristalis"}, "species":{"string":"l'herminierii", "authorship":"Macquart", "basionymAuthorTeam":{"authorTeam":"Macquart", "author":["Macquart"]}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["author_word", 32]}}}
|
71
|
+
Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus o'neili Cameron", "canonical":"Odynerus o'neili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"o'neili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
|
72
|
+
Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "canonical":"Serjania meridionalis o'donelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"o'donelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
|
67
73
|
|
68
74
|
# whitespace names (rare, only ~50 cases)
|
69
75
|
#TODO Donatia novae zelandiae Hook.f.
|
@@ -132,9 +138,6 @@ Pseudocercospora dendrobii(H.C. Burnett, 1873)U. Braun & Crous 2003|{"sc
|
|
132
138
|
Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003","normalized":"Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"(H.C. Burnett 1873)U. Braun & Crous , 2003","combinationAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"],"year":"2003"},"basionymAuthorTeam":{"authorTeam":"H.C. Burnett","author":["H.C. Burnett"],"year":"1873"}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",31],"36":["author_word",43],"44":["year",48],"49":["author_word",51],"52":["author_word",57],"60":["author_word",65],"71":["year",75]}}}
|
133
139
|
Sedella pumila (Benth.) Britton & Rose|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sedella pumila (Benth.) Britton & Rose", "normalized":"Sedella pumila (Benth.) Britton & Rose", "canonical":"Sedella pumila", "hybrid":false, "details":[{"genus":{"string":"Sedella"}, "species":{"string":"pumila", "authorship":"(Benth.) Britton & Rose", "combinationAuthorTeam":{"authorTeam":"Britton & Rose", "author":["Britton", "Rose"]}, "basionymAuthorTeam":{"authorTeam":"Benth.", "author":["Benth."]}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 14], "16":["author_word", 22], "24":["author_word", 31], "34":["author_word", 38]}}}
|
134
140
|
|
135
|
-
#binomials with apostrophe in species epithet
|
136
|
-
Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus oneili Cameron", "canonical":"Odynerus oneili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"oneili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
|
137
|
-
Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. odonelli F.A. Barkley", "canonical":"Serjania meridionalis odonelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"odonelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
|
138
141
|
|
139
142
|
#infraspecies without rank
|
140
143
|
Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "normalized":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "canonical":"Hydnellum scrobiculatum zonatum", "hybrid":false, "details":[{"genus":{"string":"Hydnellum"}, "species":{"string":"scrobiculatum"}, "infraspecies":[{"string":"zonatum", "rank":"n/a", "authorship":"(Batsch) K. A. Harrison 1961", "combinationAuthorTeam":{"authorTeam":"K. A. Harrison", "author":["K. A. Harrison"], "year":"1961"}, "basionymAuthorTeam":{"authorTeam":"Batsch", "author":["Batsch"]}}]}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["infraspecies", 31], "33":["author_word", 39], "41":["author_word", 43], "44":["author_word", 46], "47":["author_word", 55], "56":["year", 60]}}}
|
@@ -357,6 +360,19 @@ Fungal prions|{"scientificName":{"parsed":false, "parser_version":"test_version"
|
|
357
360
|
Human rhinovirus A11|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Human rhinovirus A11", "virus":true}}
|
358
361
|
Kobuvirus korean black goat/South Korea/2010|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Kobuvirus korean black goat/South Korea/2010", "virus":true}}
|
359
362
|
Australian bat lyssavirus human/AUS/1998|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Australian bat lyssavirus human/AUS/1998", "virus":true}}
|
363
|
+
Gossypium mustilinum symptomless alphasatellite|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Gossypium mustilinum symptomless alphasatellite", "virus":true}}
|
364
|
+
Okra leaf curl Mali alphasatellites-Cameroon|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Okra leaf curl Mali alphasatellites-Cameroon", "virus":true}}
|
365
|
+
Bemisia betasatellite LW-2014|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Bemisia betasatellite LW-2014", "virus":true}}
|
366
|
+
Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]", "virus":true}}
|
367
|
+
Intracisternal A-particles|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Intracisternal A-particles", "virus":true}}
|
368
|
+
Saccharomyces cerevisiae killer particle M1|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Saccharomyces cerevisiae killer particle M1", "virus":true}}
|
369
|
+
|
370
|
+
#should not parse non-virus names containing RNA
|
371
|
+
ssRNA|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"ssRNA"}}
|
372
|
+
Alpha proteobacterium RNA12|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Alpha proteobacterium RNA12"}}
|
373
|
+
Ustilaginoidea virens RNA virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Ustilaginoidea virens RNA virus", "virus":true}}
|
374
|
+
Candida albicans RNA_CTR0-3|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Candida albicans RNA_CTR0-3"}}
|
375
|
+
Calathus (Lindrothius) KURNAKOV 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calathus (Lindrothius) KURNAKOV 1961", "normalized":"Calathus (Lindrothius) Kurnakov 1961", "canonical":"Calathus", "hybrid":false, "details":[{"uninomial":{"string":"Calathus", "infragenus":{"string":"Lindrothius"}, "authorship":"KURNAKOV 1961", "basionymAuthorTeam":{"authorTeam":"KURNAKOV", "author":["Kurnakov"], "year":"1961"}}}], "parser_run":1, "positions":{"0":["uninomial", 8], "10":["infragenus", 21], "23":["author_word", 31], "32":["year", 36]}}}
|
360
376
|
|
361
377
|
#double parenthesis
|
362
378
|
Eichornia crassipes ( (Martius) ) Solms-Laub.|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Eichornia crassipes ( (Martius) ) Solms-Laub.","normalized":"Eichornia crassipes (Martius) Solms-Laub.","canonical":"Eichornia crassipes","hybrid":false,"details":[{"genus":{"string":"Eichornia"},"species":{"string":"crassipes","authorship":"( (Martius) ) Solms-Laub.","combinationAuthorTeam":{"authorTeam":"Solms-Laub.","author":["Solms-Laub."]},"basionymAuthorTeam":{"authorTeam":"Martius","author":["Martius"]}}}],"positions":{"0":["genus",9],"10":["species",19],"23":["author_word",30],"34":["author_word",45]}}}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treetop
|