biodiversity 3.1.8 → 3.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c1655d5f7cba11e28f66fec5bf3e0b82e118e637
4
- data.tar.gz: f499f992128929e495f79632bd43aef993297fcc
3
+ metadata.gz: fcbab08fed86591b64c378a48328bf6163cbe778
4
+ data.tar.gz: 318aec7ddf0deaf30d9b849f06b499a22f148100
5
5
  SHA512:
6
- metadata.gz: 221cfe8f65d0eec4c1d3c01b82156000c8ea9f01dabe2771571a1f0fbda76e7bf4cd16147be572cd958f67c2dc0add2665e8ead43fc9ad1b6677766326f1ae25
7
- data.tar.gz: 670e24f41c5ef54a830f9e3299e6999b8ba162fcb3ca16430f56f501464be5ec6f6ff00068a87e8296600565558af14315b2b01d9ca3884a7af05b6f9c75569c
6
+ metadata.gz: 37a5ec870231ed2d80b3a1648d66bd3327b48abfd259354900c0f0114233bf591c6e9ca9b5b09f530c2c5c66c319d39bc5452ca57f853fb9c5efb417b54eb18b
7
+ data.tar.gz: f907d03307a85fc72eea15f1a423cc6622b81acabe08b07da19da97588021d65cd90e514cd519126166e3d4baba97ce31341f7f0da040e91dc79b50a3345deff
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ 3.1.8 -- more virus keywords, better handling of apostrophes in
2
+ species epithets
3
+
1
4
  3.1.8 -- downgrading dependency on treetop to 1.4.1
2
5
 
3
6
  3.1.7 -- fixed travis and dependencies
@@ -161,14 +161,18 @@ class ScientificNameParser
161
161
 
162
162
  def virus?(a_string)
163
163
  !!(a_string.match(/\sICTV\s*$/) ||
164
- a_string.match(/\b(virus|viruses|
165
- phage|phages|viroid|viroids|
166
- satellite|satellites|prion|prions)\b/ix) ||
167
- a_string.match(/[A-Z]?[a-z]+virus\b/))
164
+ a_string.match(/\b(virus|viruses|particle|particles|
165
+ phage|phages|viroid|viroids|virophage|
166
+ prion|prions|NPV)\b/ix) ||
167
+ a_string.match(/[A-Z]?[a-z]+virus\b/) ||
168
+ a_string.match(/\b[A-Za-z]*satellite[s]?\b/))
168
169
  end
169
170
 
170
- def unknown_placement?(a_string)
171
- !!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i))
171
+ def noparse?(a_string)
172
+ incertae_sedis = a_string.match(/incertae\s+sedis/i) ||
173
+ a_string.match(/inc\.\s*sed\./i)
174
+ rna = a_string.match(/[^A-Z]RNA[^A-Z]*/)
175
+ incertae_sedis || rna
172
176
  end
173
177
 
174
178
  def parsed
@@ -181,7 +185,7 @@ class ScientificNameParser
181
185
 
182
186
  if virus?(a_string)
183
187
  @parsed = { verbatim: a_string, virus: true }
184
- elsif unknown_placement?(a_string)
188
+ elsif noparse?(a_string)
185
189
  @parsed = { verbatim: a_string }
186
190
  else
187
191
  begin
@@ -1372,15 +1372,9 @@ grammar ScientificNameClean
1372
1372
  end
1373
1373
  }
1374
1374
  /
1375
- "o'donelli" {
1375
+ a:valid_name_letter "'" b:latin_word {
1376
1376
  def value
1377
- "odonelli"
1378
- end
1379
- }
1380
- /
1381
- "o'neili" {
1382
- def value
1383
- "oneili"
1377
+ a.value + "'" + b.value
1384
1378
  end
1385
1379
  }
1386
1380
  /
@@ -1388,7 +1382,7 @@ grammar ScientificNameClean
1388
1382
  def value
1389
1383
  a.value + b.value
1390
1384
  end
1391
- }
1385
+ }
1392
1386
  end
1393
1387
 
1394
1388
  rule valid_name_letters
@@ -1,3 +1,3 @@
1
1
  module Biodiversity
2
- VERSION = "3.1.8"
2
+ VERSION = "3.1.9"
3
3
  end
@@ -63,7 +63,13 @@ Mo. alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"te
63
63
  Mom.alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Mom.alpium (Osbeck, 1778)", "normalized":"Mom. alpium (Osbeck 1778)", "canonical":"Mom. alpium", "hybrid":false, "details":[{"genus":{"string":"Mom."}, "species":{"string":"alpium", "authorship":"(Osbeck, 1778)", "basionymAuthorTeam":{"authorTeam":"Osbeck", "author":["Osbeck"], "year":"1778"}}}], "parser_run":1, "positions":{"0":["genus", 4], "4":["species", 10], "12":["author_word", 18], "20":["year", 24]}}}
64
64
 
65
65
  #binomial with apostrophe in species epithet
66
- Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia odonelli Moldenke 1946", "canonical":"Junellia odonelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"odonelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
66
+ Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia o'donelli Moldenke 1946", "canonical":"Junellia o'donelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"o'donelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
67
+ Trophon d'orbignyi Carcelles, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Trophon d'orbignyi Carcelles, 1946", "normalized":"Trophon d'orbignyi Carcelles 1946", "canonical":"Trophon d'orbignyi", "hybrid":false, "details":[{"genus":{"string":"Trophon"}, "species":{"string":"d'orbignyi", "authorship":"Carcelles, 1946", "basionymAuthorTeam":{"authorTeam":"Carcelles", "author":["Carcelles"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 18], "19":["author_word", 28], "30":["year", 34]}}}
68
+ Arca m'coyi Tenison-Woods, 1878|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Arca m'coyi Tenison-Woods, 1878", "normalized":"Arca m'coyi Tenison-Woods 1878", "canonical":"Arca m'coyi", "hybrid":false, "details":[{"genus":{"string":"Arca"}, "species":{"string":"m'coyi", "authorship":"Tenison-Woods, 1878", "basionymAuthorTeam":{"authorTeam":"Tenison-Woods", "author":["Tenison-Woods"], "year":"1878"}}}], "parser_run":1, "positions":{"0":["genus", 4], "5":["species", 11], "12":["author_word", 25], "27":["year", 31]}}}
69
+ Nucula m'andrewii Hanley, 1860|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Nucula m'andrewii Hanley, 1860", "normalized":"Nucula m'andrewii Hanley 1860", "canonical":"Nucula m'andrewii", "hybrid":false, "details":[{"genus":{"string":"Nucula"}, "species":{"string":"m'andrewii", "authorship":"Hanley, 1860", "basionymAuthorTeam":{"authorTeam":"Hanley", "author":["Hanley"], "year":"1860"}}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17], "18":["author_word", 24], "26":["year", 30]}}}
70
+ Eristalis l'herminierii Macquart|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Eristalis l'herminierii Macquart", "normalized":"Eristalis l'herminierii Macquart", "canonical":"Eristalis l'herminierii", "hybrid":false, "details":[{"genus":{"string":"Eristalis"}, "species":{"string":"l'herminierii", "authorship":"Macquart", "basionymAuthorTeam":{"authorTeam":"Macquart", "author":["Macquart"]}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["author_word", 32]}}}
71
+ Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus o'neili Cameron", "canonical":"Odynerus o'neili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"o'neili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
72
+ Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "canonical":"Serjania meridionalis o'donelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"o'donelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
67
73
 
68
74
  # whitespace names (rare, only ~50 cases)
69
75
  #TODO Donatia novae zelandiae Hook.f.
@@ -132,9 +138,6 @@ Pseudocercospora dendrobii(H.C. Burnett, 1873)U. Braun & Crous 2003|{"sc
132
138
  Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003","normalized":"Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"(H.C. Burnett 1873)U. Braun & Crous , 2003","combinationAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"],"year":"2003"},"basionymAuthorTeam":{"authorTeam":"H.C. Burnett","author":["H.C. Burnett"],"year":"1873"}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",31],"36":["author_word",43],"44":["year",48],"49":["author_word",51],"52":["author_word",57],"60":["author_word",65],"71":["year",75]}}}
133
139
  Sedella pumila (Benth.) Britton & Rose|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sedella pumila (Benth.) Britton & Rose", "normalized":"Sedella pumila (Benth.) Britton & Rose", "canonical":"Sedella pumila", "hybrid":false, "details":[{"genus":{"string":"Sedella"}, "species":{"string":"pumila", "authorship":"(Benth.) Britton & Rose", "combinationAuthorTeam":{"authorTeam":"Britton & Rose", "author":["Britton", "Rose"]}, "basionymAuthorTeam":{"authorTeam":"Benth.", "author":["Benth."]}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 14], "16":["author_word", 22], "24":["author_word", 31], "34":["author_word", 38]}}}
134
140
 
135
- #binomials with apostrophe in species epithet
136
- Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus oneili Cameron", "canonical":"Odynerus oneili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"oneili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
137
- Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. odonelli F.A. Barkley", "canonical":"Serjania meridionalis odonelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"odonelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
138
141
 
139
142
  #infraspecies without rank
140
143
  Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "normalized":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "canonical":"Hydnellum scrobiculatum zonatum", "hybrid":false, "details":[{"genus":{"string":"Hydnellum"}, "species":{"string":"scrobiculatum"}, "infraspecies":[{"string":"zonatum", "rank":"n/a", "authorship":"(Batsch) K. A. Harrison 1961", "combinationAuthorTeam":{"authorTeam":"K. A. Harrison", "author":["K. A. Harrison"], "year":"1961"}, "basionymAuthorTeam":{"authorTeam":"Batsch", "author":["Batsch"]}}]}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["infraspecies", 31], "33":["author_word", 39], "41":["author_word", 43], "44":["author_word", 46], "47":["author_word", 55], "56":["year", 60]}}}
@@ -357,6 +360,19 @@ Fungal prions|{"scientificName":{"parsed":false, "parser_version":"test_version"
357
360
  Human rhinovirus A11|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Human rhinovirus A11", "virus":true}}
358
361
  Kobuvirus korean black goat/South Korea/2010|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Kobuvirus korean black goat/South Korea/2010", "virus":true}}
359
362
  Australian bat lyssavirus human/AUS/1998|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Australian bat lyssavirus human/AUS/1998", "virus":true}}
363
+ Gossypium mustilinum symptomless alphasatellite|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Gossypium mustilinum symptomless alphasatellite", "virus":true}}
364
+ Okra leaf curl Mali alphasatellites-Cameroon|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Okra leaf curl Mali alphasatellites-Cameroon", "virus":true}}
365
+ Bemisia betasatellite LW-2014|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Bemisia betasatellite LW-2014", "virus":true}}
366
+ Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]", "virus":true}}
367
+ Intracisternal A-particles|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Intracisternal A-particles", "virus":true}}
368
+ Saccharomyces cerevisiae killer particle M1|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Saccharomyces cerevisiae killer particle M1", "virus":true}}
369
+
370
+ #should not parse non-virus names containing RNA
371
+ ssRNA|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"ssRNA"}}
372
+ Alpha proteobacterium RNA12|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Alpha proteobacterium RNA12"}}
373
+ Ustilaginoidea virens RNA virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Ustilaginoidea virens RNA virus", "virus":true}}
374
+ Candida albicans RNA_CTR0-3|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Candida albicans RNA_CTR0-3"}}
375
+ Calathus (Lindrothius) KURNAKOV 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calathus (Lindrothius) KURNAKOV 1961", "normalized":"Calathus (Lindrothius) Kurnakov 1961", "canonical":"Calathus", "hybrid":false, "details":[{"uninomial":{"string":"Calathus", "infragenus":{"string":"Lindrothius"}, "authorship":"KURNAKOV 1961", "basionymAuthorTeam":{"authorTeam":"KURNAKOV", "author":["Kurnakov"], "year":"1961"}}}], "parser_run":1, "positions":{"0":["uninomial", 8], "10":["infragenus", 21], "23":["author_word", 31], "32":["year", 36]}}}
360
376
 
361
377
  #double parenthesis
362
378
  Eichornia crassipes ( (Martius) ) Solms-Laub.|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Eichornia crassipes ( (Martius) ) Solms-Laub.","normalized":"Eichornia crassipes (Martius) Solms-Laub.","canonical":"Eichornia crassipes","hybrid":false,"details":[{"genus":{"string":"Eichornia"},"species":{"string":"crassipes","authorship":"( (Martius) ) Solms-Laub.","combinationAuthorTeam":{"authorTeam":"Solms-Laub.","author":["Solms-Laub."]},"basionymAuthorTeam":{"authorTeam":"Martius","author":["Martius"]}}}],"positions":{"0":["genus",9],"10":["species",19],"23":["author_word",30],"34":["author_word",45]}}}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.8
4
+ version: 3.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop