biodiversity 3.1.8 → 3.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/lib/biodiversity/parser.rb +11 -7
- data/lib/biodiversity/parser/scientific_name_clean.treetop +3 -9
- data/lib/biodiversity/version.rb +1 -1
- data/spec/files/test_data.txt +20 -4
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: fcbab08fed86591b64c378a48328bf6163cbe778
|
|
4
|
+
data.tar.gz: 318aec7ddf0deaf30d9b849f06b499a22f148100
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 37a5ec870231ed2d80b3a1648d66bd3327b48abfd259354900c0f0114233bf591c6e9ca9b5b09f530c2c5c66c319d39bc5452ca57f853fb9c5efb417b54eb18b
|
|
7
|
+
data.tar.gz: f907d03307a85fc72eea15f1a423cc6622b81acabe08b07da19da97588021d65cd90e514cd519126166e3d4baba97ce31341f7f0da040e91dc79b50a3345deff
|
data/CHANGELOG
CHANGED
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -161,14 +161,18 @@ class ScientificNameParser
|
|
|
161
161
|
|
|
162
162
|
def virus?(a_string)
|
|
163
163
|
!!(a_string.match(/\sICTV\s*$/) ||
|
|
164
|
-
a_string.match(/\b(virus|viruses|
|
|
165
|
-
phage|phages|viroid|viroids|
|
|
166
|
-
|
|
167
|
-
a_string.match(/[A-Z]?[a-z]+virus\b/)
|
|
164
|
+
a_string.match(/\b(virus|viruses|particle|particles|
|
|
165
|
+
phage|phages|viroid|viroids|virophage|
|
|
166
|
+
prion|prions|NPV)\b/ix) ||
|
|
167
|
+
a_string.match(/[A-Z]?[a-z]+virus\b/) ||
|
|
168
|
+
a_string.match(/\b[A-Za-z]*satellite[s]?\b/))
|
|
168
169
|
end
|
|
169
170
|
|
|
170
|
-
def
|
|
171
|
-
|
|
171
|
+
def noparse?(a_string)
|
|
172
|
+
incertae_sedis = a_string.match(/incertae\s+sedis/i) ||
|
|
173
|
+
a_string.match(/inc\.\s*sed\./i)
|
|
174
|
+
rna = a_string.match(/[^A-Z]RNA[^A-Z]*/)
|
|
175
|
+
incertae_sedis || rna
|
|
172
176
|
end
|
|
173
177
|
|
|
174
178
|
def parsed
|
|
@@ -181,7 +185,7 @@ class ScientificNameParser
|
|
|
181
185
|
|
|
182
186
|
if virus?(a_string)
|
|
183
187
|
@parsed = { verbatim: a_string, virus: true }
|
|
184
|
-
elsif
|
|
188
|
+
elsif noparse?(a_string)
|
|
185
189
|
@parsed = { verbatim: a_string }
|
|
186
190
|
else
|
|
187
191
|
begin
|
|
@@ -1372,15 +1372,9 @@ grammar ScientificNameClean
|
|
|
1372
1372
|
end
|
|
1373
1373
|
}
|
|
1374
1374
|
/
|
|
1375
|
-
"
|
|
1375
|
+
a:valid_name_letter "'" b:latin_word {
|
|
1376
1376
|
def value
|
|
1377
|
-
"
|
|
1378
|
-
end
|
|
1379
|
-
}
|
|
1380
|
-
/
|
|
1381
|
-
"o'neili" {
|
|
1382
|
-
def value
|
|
1383
|
-
"oneili"
|
|
1377
|
+
a.value + "'" + b.value
|
|
1384
1378
|
end
|
|
1385
1379
|
}
|
|
1386
1380
|
/
|
|
@@ -1388,7 +1382,7 @@ grammar ScientificNameClean
|
|
|
1388
1382
|
def value
|
|
1389
1383
|
a.value + b.value
|
|
1390
1384
|
end
|
|
1391
|
-
|
|
1385
|
+
}
|
|
1392
1386
|
end
|
|
1393
1387
|
|
|
1394
1388
|
rule valid_name_letters
|
data/lib/biodiversity/version.rb
CHANGED
data/spec/files/test_data.txt
CHANGED
|
@@ -63,7 +63,13 @@ Mo. alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"te
|
|
|
63
63
|
Mom.alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Mom.alpium (Osbeck, 1778)", "normalized":"Mom. alpium (Osbeck 1778)", "canonical":"Mom. alpium", "hybrid":false, "details":[{"genus":{"string":"Mom."}, "species":{"string":"alpium", "authorship":"(Osbeck, 1778)", "basionymAuthorTeam":{"authorTeam":"Osbeck", "author":["Osbeck"], "year":"1778"}}}], "parser_run":1, "positions":{"0":["genus", 4], "4":["species", 10], "12":["author_word", 18], "20":["year", 24]}}}
|
|
64
64
|
|
|
65
65
|
#binomial with apostrophe in species epithet
|
|
66
|
-
Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia
|
|
66
|
+
Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia o'donelli Moldenke 1946", "canonical":"Junellia o'donelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"o'donelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
|
|
67
|
+
Trophon d'orbignyi Carcelles, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Trophon d'orbignyi Carcelles, 1946", "normalized":"Trophon d'orbignyi Carcelles 1946", "canonical":"Trophon d'orbignyi", "hybrid":false, "details":[{"genus":{"string":"Trophon"}, "species":{"string":"d'orbignyi", "authorship":"Carcelles, 1946", "basionymAuthorTeam":{"authorTeam":"Carcelles", "author":["Carcelles"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 18], "19":["author_word", 28], "30":["year", 34]}}}
|
|
68
|
+
Arca m'coyi Tenison-Woods, 1878|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Arca m'coyi Tenison-Woods, 1878", "normalized":"Arca m'coyi Tenison-Woods 1878", "canonical":"Arca m'coyi", "hybrid":false, "details":[{"genus":{"string":"Arca"}, "species":{"string":"m'coyi", "authorship":"Tenison-Woods, 1878", "basionymAuthorTeam":{"authorTeam":"Tenison-Woods", "author":["Tenison-Woods"], "year":"1878"}}}], "parser_run":1, "positions":{"0":["genus", 4], "5":["species", 11], "12":["author_word", 25], "27":["year", 31]}}}
|
|
69
|
+
Nucula m'andrewii Hanley, 1860|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Nucula m'andrewii Hanley, 1860", "normalized":"Nucula m'andrewii Hanley 1860", "canonical":"Nucula m'andrewii", "hybrid":false, "details":[{"genus":{"string":"Nucula"}, "species":{"string":"m'andrewii", "authorship":"Hanley, 1860", "basionymAuthorTeam":{"authorTeam":"Hanley", "author":["Hanley"], "year":"1860"}}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17], "18":["author_word", 24], "26":["year", 30]}}}
|
|
70
|
+
Eristalis l'herminierii Macquart|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Eristalis l'herminierii Macquart", "normalized":"Eristalis l'herminierii Macquart", "canonical":"Eristalis l'herminierii", "hybrid":false, "details":[{"genus":{"string":"Eristalis"}, "species":{"string":"l'herminierii", "authorship":"Macquart", "basionymAuthorTeam":{"authorTeam":"Macquart", "author":["Macquart"]}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["author_word", 32]}}}
|
|
71
|
+
Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus o'neili Cameron", "canonical":"Odynerus o'neili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"o'neili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
|
|
72
|
+
Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "canonical":"Serjania meridionalis o'donelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"o'donelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
|
|
67
73
|
|
|
68
74
|
# whitespace names (rare, only ~50 cases)
|
|
69
75
|
#TODO Donatia novae zelandiae Hook.f.
|
|
@@ -132,9 +138,6 @@ Pseudocercospora dendrobii(H.C. Burnett, 1873)U. Braun & Crous 2003|{"sc
|
|
|
132
138
|
Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003","normalized":"Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"(H.C. Burnett 1873)U. Braun & Crous , 2003","combinationAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"],"year":"2003"},"basionymAuthorTeam":{"authorTeam":"H.C. Burnett","author":["H.C. Burnett"],"year":"1873"}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",31],"36":["author_word",43],"44":["year",48],"49":["author_word",51],"52":["author_word",57],"60":["author_word",65],"71":["year",75]}}}
|
|
133
139
|
Sedella pumila (Benth.) Britton & Rose|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sedella pumila (Benth.) Britton & Rose", "normalized":"Sedella pumila (Benth.) Britton & Rose", "canonical":"Sedella pumila", "hybrid":false, "details":[{"genus":{"string":"Sedella"}, "species":{"string":"pumila", "authorship":"(Benth.) Britton & Rose", "combinationAuthorTeam":{"authorTeam":"Britton & Rose", "author":["Britton", "Rose"]}, "basionymAuthorTeam":{"authorTeam":"Benth.", "author":["Benth."]}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 14], "16":["author_word", 22], "24":["author_word", 31], "34":["author_word", 38]}}}
|
|
134
140
|
|
|
135
|
-
#binomials with apostrophe in species epithet
|
|
136
|
-
Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus oneili Cameron", "canonical":"Odynerus oneili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"oneili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
|
|
137
|
-
Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. odonelli F.A. Barkley", "canonical":"Serjania meridionalis odonelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"odonelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
|
|
138
141
|
|
|
139
142
|
#infraspecies without rank
|
|
140
143
|
Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "normalized":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "canonical":"Hydnellum scrobiculatum zonatum", "hybrid":false, "details":[{"genus":{"string":"Hydnellum"}, "species":{"string":"scrobiculatum"}, "infraspecies":[{"string":"zonatum", "rank":"n/a", "authorship":"(Batsch) K. A. Harrison 1961", "combinationAuthorTeam":{"authorTeam":"K. A. Harrison", "author":["K. A. Harrison"], "year":"1961"}, "basionymAuthorTeam":{"authorTeam":"Batsch", "author":["Batsch"]}}]}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["infraspecies", 31], "33":["author_word", 39], "41":["author_word", 43], "44":["author_word", 46], "47":["author_word", 55], "56":["year", 60]}}}
|
|
@@ -357,6 +360,19 @@ Fungal prions|{"scientificName":{"parsed":false, "parser_version":"test_version"
|
|
|
357
360
|
Human rhinovirus A11|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Human rhinovirus A11", "virus":true}}
|
|
358
361
|
Kobuvirus korean black goat/South Korea/2010|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Kobuvirus korean black goat/South Korea/2010", "virus":true}}
|
|
359
362
|
Australian bat lyssavirus human/AUS/1998|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Australian bat lyssavirus human/AUS/1998", "virus":true}}
|
|
363
|
+
Gossypium mustilinum symptomless alphasatellite|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Gossypium mustilinum symptomless alphasatellite", "virus":true}}
|
|
364
|
+
Okra leaf curl Mali alphasatellites-Cameroon|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Okra leaf curl Mali alphasatellites-Cameroon", "virus":true}}
|
|
365
|
+
Bemisia betasatellite LW-2014|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Bemisia betasatellite LW-2014", "virus":true}}
|
|
366
|
+
Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]", "virus":true}}
|
|
367
|
+
Intracisternal A-particles|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Intracisternal A-particles", "virus":true}}
|
|
368
|
+
Saccharomyces cerevisiae killer particle M1|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Saccharomyces cerevisiae killer particle M1", "virus":true}}
|
|
369
|
+
|
|
370
|
+
#should not parse non-virus names containing RNA
|
|
371
|
+
ssRNA|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"ssRNA"}}
|
|
372
|
+
Alpha proteobacterium RNA12|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Alpha proteobacterium RNA12"}}
|
|
373
|
+
Ustilaginoidea virens RNA virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Ustilaginoidea virens RNA virus", "virus":true}}
|
|
374
|
+
Candida albicans RNA_CTR0-3|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Candida albicans RNA_CTR0-3"}}
|
|
375
|
+
Calathus (Lindrothius) KURNAKOV 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calathus (Lindrothius) KURNAKOV 1961", "normalized":"Calathus (Lindrothius) Kurnakov 1961", "canonical":"Calathus", "hybrid":false, "details":[{"uninomial":{"string":"Calathus", "infragenus":{"string":"Lindrothius"}, "authorship":"KURNAKOV 1961", "basionymAuthorTeam":{"authorTeam":"KURNAKOV", "author":["Kurnakov"], "year":"1961"}}}], "parser_run":1, "positions":{"0":["uninomial", 8], "10":["infragenus", 21], "23":["author_word", 31], "32":["year", 36]}}}
|
|
360
376
|
|
|
361
377
|
#double parenthesis
|
|
362
378
|
Eichornia crassipes ( (Martius) ) Solms-Laub.|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Eichornia crassipes ( (Martius) ) Solms-Laub.","normalized":"Eichornia crassipes (Martius) Solms-Laub.","canonical":"Eichornia crassipes","hybrid":false,"details":[{"genus":{"string":"Eichornia"},"species":{"string":"crassipes","authorship":"( (Martius) ) Solms-Laub.","combinationAuthorTeam":{"authorTeam":"Solms-Laub.","author":["Solms-Laub."]},"basionymAuthorTeam":{"authorTeam":"Martius","author":["Martius"]}}}],"positions":{"0":["genus",9],"10":["species",19],"23":["author_word",30],"34":["author_word",45]}}}
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.1.
|
|
4
|
+
version: 3.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Dmitry Mozzherin
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-05-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: treetop
|