biodiversity 3.1.8 → 3.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c1655d5f7cba11e28f66fec5bf3e0b82e118e637
4
- data.tar.gz: f499f992128929e495f79632bd43aef993297fcc
3
+ metadata.gz: fcbab08fed86591b64c378a48328bf6163cbe778
4
+ data.tar.gz: 318aec7ddf0deaf30d9b849f06b499a22f148100
5
5
  SHA512:
6
- metadata.gz: 221cfe8f65d0eec4c1d3c01b82156000c8ea9f01dabe2771571a1f0fbda76e7bf4cd16147be572cd958f67c2dc0add2665e8ead43fc9ad1b6677766326f1ae25
7
- data.tar.gz: 670e24f41c5ef54a830f9e3299e6999b8ba162fcb3ca16430f56f501464be5ec6f6ff00068a87e8296600565558af14315b2b01d9ca3884a7af05b6f9c75569c
6
+ metadata.gz: 37a5ec870231ed2d80b3a1648d66bd3327b48abfd259354900c0f0114233bf591c6e9ca9b5b09f530c2c5c66c319d39bc5452ca57f853fb9c5efb417b54eb18b
7
+ data.tar.gz: f907d03307a85fc72eea15f1a423cc6622b81acabe08b07da19da97588021d65cd90e514cd519126166e3d4baba97ce31341f7f0da040e91dc79b50a3345deff
data/CHANGELOG CHANGED
@@ -1,3 +1,6 @@
1
+ 3.1.8 -- more virus keywords, better handling of apostrophes in
2
+ species epithets
3
+
1
4
  3.1.8 -- downgrading dependency on treetop to 1.4.1
2
5
 
3
6
  3.1.7 -- fixed travis and dependencies
@@ -161,14 +161,18 @@ class ScientificNameParser
161
161
 
162
162
  def virus?(a_string)
163
163
  !!(a_string.match(/\sICTV\s*$/) ||
164
- a_string.match(/\b(virus|viruses|
165
- phage|phages|viroid|viroids|
166
- satellite|satellites|prion|prions)\b/ix) ||
167
- a_string.match(/[A-Z]?[a-z]+virus\b/))
164
+ a_string.match(/\b(virus|viruses|particle|particles|
165
+ phage|phages|viroid|viroids|virophage|
166
+ prion|prions|NPV)\b/ix) ||
167
+ a_string.match(/[A-Z]?[a-z]+virus\b/) ||
168
+ a_string.match(/\b[A-Za-z]*satellite[s]?\b/))
168
169
  end
169
170
 
170
- def unknown_placement?(a_string)
171
- !!(a_string.match(/incertae\s+sedis/i) || a_string.match(/inc\.\s*sed\./i))
171
+ def noparse?(a_string)
172
+ incertae_sedis = a_string.match(/incertae\s+sedis/i) ||
173
+ a_string.match(/inc\.\s*sed\./i)
174
+ rna = a_string.match(/[^A-Z]RNA[^A-Z]*/)
175
+ incertae_sedis || rna
172
176
  end
173
177
 
174
178
  def parsed
@@ -181,7 +185,7 @@ class ScientificNameParser
181
185
 
182
186
  if virus?(a_string)
183
187
  @parsed = { verbatim: a_string, virus: true }
184
- elsif unknown_placement?(a_string)
188
+ elsif noparse?(a_string)
185
189
  @parsed = { verbatim: a_string }
186
190
  else
187
191
  begin
@@ -1372,15 +1372,9 @@ grammar ScientificNameClean
1372
1372
  end
1373
1373
  }
1374
1374
  /
1375
- "o'donelli" {
1375
+ a:valid_name_letter "'" b:latin_word {
1376
1376
  def value
1377
- "odonelli"
1378
- end
1379
- }
1380
- /
1381
- "o'neili" {
1382
- def value
1383
- "oneili"
1377
+ a.value + "'" + b.value
1384
1378
  end
1385
1379
  }
1386
1380
  /
@@ -1388,7 +1382,7 @@ grammar ScientificNameClean
1388
1382
  def value
1389
1383
  a.value + b.value
1390
1384
  end
1391
- }
1385
+ }
1392
1386
  end
1393
1387
 
1394
1388
  rule valid_name_letters
@@ -1,3 +1,3 @@
1
1
  module Biodiversity
2
- VERSION = "3.1.8"
2
+ VERSION = "3.1.9"
3
3
  end
@@ -63,7 +63,13 @@ Mo. alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"te
63
63
  Mom.alpium (Osbeck, 1778)|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Mom.alpium (Osbeck, 1778)", "normalized":"Mom. alpium (Osbeck 1778)", "canonical":"Mom. alpium", "hybrid":false, "details":[{"genus":{"string":"Mom."}, "species":{"string":"alpium", "authorship":"(Osbeck, 1778)", "basionymAuthorTeam":{"authorTeam":"Osbeck", "author":["Osbeck"], "year":"1778"}}}], "parser_run":1, "positions":{"0":["genus", 4], "4":["species", 10], "12":["author_word", 18], "20":["year", 24]}}}
64
64
 
65
65
  #binomial with apostrophe in species epithet
66
- Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia odonelli Moldenke 1946", "canonical":"Junellia odonelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"odonelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
66
+ Junellia o'donelli Moldenke, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Junellia o'donelli Moldenke, 1946", "normalized":"Junellia o'donelli Moldenke 1946", "canonical":"Junellia o'donelli", "hybrid":false, "details":[{"genus":{"string":"Junellia"}, "species":{"string":"o'donelli", "authorship":"Moldenke, 1946", "basionymAuthorTeam":{"authorTeam":"Moldenke", "author":["Moldenke"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["author_word", 27], "29":["year", 33]}}}
67
+ Trophon d'orbignyi Carcelles, 1946|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Trophon d'orbignyi Carcelles, 1946", "normalized":"Trophon d'orbignyi Carcelles 1946", "canonical":"Trophon d'orbignyi", "hybrid":false, "details":[{"genus":{"string":"Trophon"}, "species":{"string":"d'orbignyi", "authorship":"Carcelles, 1946", "basionymAuthorTeam":{"authorTeam":"Carcelles", "author":["Carcelles"], "year":"1946"}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 18], "19":["author_word", 28], "30":["year", 34]}}}
68
+ Arca m'coyi Tenison-Woods, 1878|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Arca m'coyi Tenison-Woods, 1878", "normalized":"Arca m'coyi Tenison-Woods 1878", "canonical":"Arca m'coyi", "hybrid":false, "details":[{"genus":{"string":"Arca"}, "species":{"string":"m'coyi", "authorship":"Tenison-Woods, 1878", "basionymAuthorTeam":{"authorTeam":"Tenison-Woods", "author":["Tenison-Woods"], "year":"1878"}}}], "parser_run":1, "positions":{"0":["genus", 4], "5":["species", 11], "12":["author_word", 25], "27":["year", 31]}}}
69
+ Nucula m'andrewii Hanley, 1860|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Nucula m'andrewii Hanley, 1860", "normalized":"Nucula m'andrewii Hanley 1860", "canonical":"Nucula m'andrewii", "hybrid":false, "details":[{"genus":{"string":"Nucula"}, "species":{"string":"m'andrewii", "authorship":"Hanley, 1860", "basionymAuthorTeam":{"authorTeam":"Hanley", "author":["Hanley"], "year":"1860"}}}], "parser_run":1, "positions":{"0":["genus", 6], "7":["species", 17], "18":["author_word", 24], "26":["year", 30]}}}
70
+ Eristalis l'herminierii Macquart|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Eristalis l'herminierii Macquart", "normalized":"Eristalis l'herminierii Macquart", "canonical":"Eristalis l'herminierii", "hybrid":false, "details":[{"genus":{"string":"Eristalis"}, "species":{"string":"l'herminierii", "authorship":"Macquart", "basionymAuthorTeam":{"authorTeam":"Macquart", "author":["Macquart"]}}}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["author_word", 32]}}}
71
+ Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus o'neili Cameron", "canonical":"Odynerus o'neili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"o'neili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
72
+ Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "canonical":"Serjania meridionalis o'donelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"o'donelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
67
73
 
68
74
  # whitespace names (rare, only ~50 cases)
69
75
  #TODO Donatia novae zelandiae Hook.f.
@@ -132,9 +138,6 @@ Pseudocercospora dendrobii(H.C. Burnett, 1873)U. Braun & Crous 2003|{"sc
132
138
  Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":1,"verbatim":"Pseudocercospora dendrobii(H.C. Burnett 1873)U. Braun & Crous , 2003","normalized":"Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003","canonical":"Pseudocercospora dendrobii","hybrid":false,"details":[{"genus":{"string":"Pseudocercospora"},"species":{"string":"dendrobii","authorship":"(H.C. Burnett 1873)U. Braun & Crous , 2003","combinationAuthorTeam":{"authorTeam":"U. Braun & Crous","author":["U. Braun","Crous"],"year":"2003"},"basionymAuthorTeam":{"authorTeam":"H.C. Burnett","author":["H.C. Burnett"],"year":"1873"}}}],"positions":{"0":["genus",16],"17":["species",26],"27":["author_word",31],"36":["author_word",43],"44":["year",48],"49":["author_word",51],"52":["author_word",57],"60":["author_word",65],"71":["year",75]}}}
133
139
  Sedella pumila (Benth.) Britton & Rose|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Sedella pumila (Benth.) Britton & Rose", "normalized":"Sedella pumila (Benth.) Britton & Rose", "canonical":"Sedella pumila", "hybrid":false, "details":[{"genus":{"string":"Sedella"}, "species":{"string":"pumila", "authorship":"(Benth.) Britton & Rose", "combinationAuthorTeam":{"authorTeam":"Britton & Rose", "author":["Britton", "Rose"]}, "basionymAuthorTeam":{"authorTeam":"Benth.", "author":["Benth."]}}}], "parser_run":1, "positions":{"0":["genus", 7], "8":["species", 14], "16":["author_word", 22], "24":["author_word", 31], "34":["author_word", 38]}}}
134
140
 
135
- #binomials with apostrophe in species epithet
136
- Odynerus o'neili Cameron|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Odynerus o'neili Cameron", "normalized":"Odynerus oneili Cameron", "canonical":"Odynerus oneili", "hybrid":false, "details":[{"genus":{"string":"Odynerus"}, "species":{"string":"oneili", "authorship":"Cameron", "basionymAuthorTeam":{"authorTeam":"Cameron", "author":["Cameron"]}}}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 16], "17":["author_word", 24]}}}
137
- Serjania meridionalis Cambess. var. o'donelli F.A. Barkley|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Serjania meridionalis Cambess. var. o'donelli F.A. Barkley", "normalized":"Serjania meridionalis Cambess. var. odonelli F.A. Barkley", "canonical":"Serjania meridionalis odonelli", "hybrid":false, "details":[{"genus":{"string":"Serjania"}, "species":{"string":"meridionalis", "authorship":"Cambess.", "basionymAuthorTeam":{"authorTeam":"Cambess.", "author":["Cambess."]}}, "infraspecies":[{"string":"odonelli", "rank":"var.", "authorship":"F.A. Barkley", "basionymAuthorTeam":{"authorTeam":"F.A. Barkley", "author":["F.A. Barkley"]}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 21], "22":["author_word", 30], "31":["infraspecific_type", 35], "36":["infraspecies", 45], "46":["author_word", 50], "51":["author_word", 58]}}}
138
141
 
139
142
  #infraspecies without rank
140
143
  Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "normalized":"Hydnellum scrobiculatum zonatum (Batsch) K. A. Harrison 1961", "canonical":"Hydnellum scrobiculatum zonatum", "hybrid":false, "details":[{"genus":{"string":"Hydnellum"}, "species":{"string":"scrobiculatum"}, "infraspecies":[{"string":"zonatum", "rank":"n/a", "authorship":"(Batsch) K. A. Harrison 1961", "combinationAuthorTeam":{"authorTeam":"K. A. Harrison", "author":["K. A. Harrison"], "year":"1961"}, "basionymAuthorTeam":{"authorTeam":"Batsch", "author":["Batsch"]}}]}], "parser_run":1, "positions":{"0":["genus", 9], "10":["species", 23], "24":["infraspecies", 31], "33":["author_word", 39], "41":["author_word", 43], "44":["author_word", 46], "47":["author_word", 55], "56":["year", 60]}}}
@@ -357,6 +360,19 @@ Fungal prions|{"scientificName":{"parsed":false, "parser_version":"test_version"
357
360
  Human rhinovirus A11|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Human rhinovirus A11", "virus":true}}
358
361
  Kobuvirus korean black goat/South Korea/2010|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Kobuvirus korean black goat/South Korea/2010", "virus":true}}
359
362
  Australian bat lyssavirus human/AUS/1998|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Australian bat lyssavirus human/AUS/1998", "virus":true}}
363
+ Gossypium mustilinum symptomless alphasatellite|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Gossypium mustilinum symptomless alphasatellite", "virus":true}}
364
+ Okra leaf curl Mali alphasatellites-Cameroon|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Okra leaf curl Mali alphasatellites-Cameroon", "virus":true}}
365
+ Bemisia betasatellite LW-2014|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Bemisia betasatellite LW-2014", "virus":true}}
366
+ Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Tomato leaf curl Bangladesh betasatellites [India/Patna/Chilli/2008]", "virus":true}}
367
+ Intracisternal A-particles|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Intracisternal A-particles", "virus":true}}
368
+ Saccharomyces cerevisiae killer particle M1|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Saccharomyces cerevisiae killer particle M1", "virus":true}}
369
+
370
+ #should not parse non-virus names containing RNA
371
+ ssRNA|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"ssRNA"}}
372
+ Alpha proteobacterium RNA12|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Alpha proteobacterium RNA12"}}
373
+ Ustilaginoidea virens RNA virus|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Ustilaginoidea virens RNA virus", "virus":true}}
374
+ Candida albicans RNA_CTR0-3|{"scientificName":{"parsed":false, "parser_version":"test_version", "verbatim":"Candida albicans RNA_CTR0-3"}}
375
+ Calathus (Lindrothius) KURNAKOV 1961|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Calathus (Lindrothius) KURNAKOV 1961", "normalized":"Calathus (Lindrothius) Kurnakov 1961", "canonical":"Calathus", "hybrid":false, "details":[{"uninomial":{"string":"Calathus", "infragenus":{"string":"Lindrothius"}, "authorship":"KURNAKOV 1961", "basionymAuthorTeam":{"authorTeam":"KURNAKOV", "author":["Kurnakov"], "year":"1961"}}}], "parser_run":1, "positions":{"0":["uninomial", 8], "10":["infragenus", 21], "23":["author_word", 31], "32":["year", 36]}}}
360
376
 
361
377
  #double parenthesis
362
378
  Eichornia crassipes ( (Martius) ) Solms-Laub.|{"scientificName":{"parsed":true, "parser_version":"test_version", "parser_run":2,"verbatim":"Eichornia crassipes ( (Martius) ) Solms-Laub.","normalized":"Eichornia crassipes (Martius) Solms-Laub.","canonical":"Eichornia crassipes","hybrid":false,"details":[{"genus":{"string":"Eichornia"},"species":{"string":"crassipes","authorship":"( (Martius) ) Solms-Laub.","combinationAuthorTeam":{"authorTeam":"Solms-Laub.","author":["Solms-Laub."]},"basionymAuthorTeam":{"authorTeam":"Martius","author":["Martius"]}}}],"positions":{"0":["genus",9],"10":["species",19],"23":["author_word",30],"34":["author_word",45]}}}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.8
4
+ version: 3.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-05-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treetop