biodiversity 3.1.1 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +5 -2
- data/VERSION +1 -1
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1 -1
- data/spec/parser/scientific_name_clean.spec.rb +47 -46
- data/spec/parser/test_data.txt +1 -0
- metadata +5 -2
data/CHANGELOG
CHANGED
@@ -1,6 +1,9 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2 -- fixed problems with infraspecies epithets starting with fo,
|
2
|
+
for example Peristernia nassatula forskali Tapparone-Canefri 1875
|
2
3
|
|
3
|
-
3.1.
|
4
|
+
3.1.1 -- fixed problem with names ending on sp
|
5
|
+
|
6
|
+
3.1.0 -- added surrogates: Genbank, Barcode of life names.
|
4
7
|
Also cf., sp. spp are flagged as surrogates
|
5
8
|
|
6
9
|
3.0.1 -- updated gems
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.1.
|
1
|
+
3.1.2
|
@@ -560,7 +560,7 @@ grammar ScientificNameClean
|
|
560
560
|
end
|
561
561
|
|
562
562
|
rule rank
|
563
|
-
("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo"/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
563
|
+
("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
564
564
|
{
|
565
565
|
def value
|
566
566
|
text_value.strip
|
@@ -7,7 +7,7 @@ describe ScientificNameClean do
|
|
7
7
|
before(:all) do
|
8
8
|
set_parser(ScientificNameCleanParser.new)
|
9
9
|
end
|
10
|
-
|
10
|
+
|
11
11
|
it 'should parse uninomial' do
|
12
12
|
sn = 'Pseudocercospora'
|
13
13
|
parse(sn).should_not be_nil
|
@@ -16,12 +16,12 @@ describe ScientificNameClean do
|
|
16
16
|
details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora"}}]
|
17
17
|
pos(sn).should == {0=>["uninomial", 16]}
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
it 'should parse uninomial with author and year' do
|
21
21
|
sn = 'Pseudocercospora Speg.'
|
22
22
|
parse(sn).should_not be_nil
|
23
23
|
details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora", :authorship=>"Speg.", :basionymAuthorTeam=>{:authorTeam=>"Speg.", :author=>["Speg."]}}}]
|
24
|
-
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
24
|
+
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
25
25
|
sn = 'Pseudocercospora Spegazzini, 1910'
|
26
26
|
parse(sn).should_not be_nil
|
27
27
|
value(sn).should == 'Pseudocercospora Spegazzini 1910'
|
@@ -34,7 +34,7 @@ describe ScientificNameClean do
|
|
34
34
|
parse(sn).should_not be_nil
|
35
35
|
details(sn).should == [{:uninomial=>{:string=>"Epacridaceae"}, :rank_uninomials=>"trib.", :uninomial2=>{:string=>"Archerieae", :authorship=>"Crayn & Quinn", :basionymAuthorTeam=>{:authorTeam=>"Crayn & Quinn", :author=>["Crayn", "Quinn"]}}}]
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
it 'should parse names with a valid 2 letter genus' do
|
39
39
|
["Ca Dyar 1914",
|
40
40
|
"Ea Distant 1911",
|
@@ -59,7 +59,7 @@ describe ScientificNameClean do
|
|
59
59
|
end
|
60
60
|
canonical('Quoyula').should == 'Quoyula'
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
it 'should parse canonical' do
|
64
64
|
sn = 'Pseudocercospora dendrobii'
|
65
65
|
parse(sn).should_not be_nil
|
@@ -78,8 +78,8 @@ describe ScientificNameClean do
|
|
78
78
|
value(sn).should == 'Ps. dendrobii'
|
79
79
|
details(sn).should == [{:genus=>{:string=>"Ps."}, :species=>{:string=>"dendrobii"}}]
|
80
80
|
end
|
81
|
-
|
82
|
-
|
81
|
+
|
82
|
+
|
83
83
|
it 'should parse species name with author and year' do
|
84
84
|
sn = "Platypus bicaudatulus Schedl 1935"
|
85
85
|
parse(sn).should_not be_nil
|
@@ -95,7 +95,7 @@ describe ScientificNameClean do
|
|
95
95
|
details(sn).should == [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl (1935h)", :basionymAuthorTeam=>{:authorTeam=>"Schedl", :author=>["Schedl"], :year=>"1935"}}}]
|
96
96
|
parse("Platypus bicaudatulus Schedl 1935").should_not be_nil
|
97
97
|
end
|
98
|
-
|
98
|
+
|
99
99
|
it 'should parse species name with abbreviated genus, author and year' do
|
100
100
|
sn = "P. bicaudatulus Schedl 1935"
|
101
101
|
parse(sn).should_not be_nil
|
@@ -110,7 +110,7 @@ describe ScientificNameClean do
|
|
110
110
|
end
|
111
111
|
|
112
112
|
it "should parse species name with author's postfix f., filius (son of)" do
|
113
|
-
names = [
|
113
|
+
names = [
|
114
114
|
[ "Platypus bicaudatulus Schedl f. 1935", [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl f. 1935", :basionymAuthorTeam=>{:authorTeam=>"Schedl f.", :author=>["Schedl f."], :year=>"1935"}}}], 'Platypus bicaudatulus Schedl f. 1935'],
|
115
115
|
[ "Platypus bicaudatulus Schedl filius 1935", [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl filius 1935", :basionymAuthorTeam=>{:authorTeam=>"Schedl filius", :author=>["Schedl filius"], :year=>"1935"}}}], 'Platypus bicaudatulus Schedl filius 1935'],
|
116
116
|
[ "Fimbristylis ovata (Burm. f.) J. Kern", [{:genus=>{:string=>"Fimbristylis"}, :species=>{:string=>"ovata", :authorship=>"(Burm. f.) J. Kern", :combinationAuthorTeam=>{:authorTeam=>"J. Kern", :author=>["J. Kern"]}, :basionymAuthorTeam=>{:authorTeam=>"Burm. f.", :author=>["Burm. f."]}}}], 'Fimbristylis ovata (Burm. f.) J. Kern'],
|
@@ -123,14 +123,15 @@ describe ScientificNameClean do
|
|
123
123
|
["Betula pendula fo. dalecarlica (L. f.) C.K. Schneid.", [{:infraspecies=>[{:basionymAuthorTeam=>{:author=>["L. f."], :authorTeam=>"L. f."}, :string=>"dalecarlica", :rank=>"fo.", :combinationAuthorTeam=>{:author=>["C.K. Schneid."], :authorTeam=>"C.K. Schneid."}, :authorship=>"(L. f.) C.K. Schneid."}], :genus=>{:string=>"Betula"}, :species=>{:string=>"pendula"}}], "Betula pendula fo. dalecarlica (L. f.) C.K. Schneid."],
|
124
124
|
["Racomitrium canescens f. ericoides (F. Weber ex Brid.) Mönk.", [{:genus=>{:string=>"Racomitrium"}, :species=>{:string=>"canescens"}, :infraspecies=>[{:string=>"ericoides", :rank=>"f.", :authorship=>"(F. Weber ex Brid.) Mönk.", :combinationAuthorTeam=>{:authorTeam=>"Mönk.", :author=>["Mönk."]}, :basionymAuthorTeam=>{:authorTeam=>"F. Weber", :author=>["F. Weber"], :exAuthorTeam=>{:authorTeam=>"Brid.", :author=>["Brid."]}}}]}], "Racomitrium canescens f. ericoides (F. Weber ex Brid.) Mönk."],
|
125
125
|
["Racomitrium canescens forma ericoides (F. Weber ex Brid.) Mönk.", [{:genus=>{:string=>"Racomitrium"}, :species=>{:string=>"canescens"}, :infraspecies=>[{:string=>"ericoides", :rank=>"forma", :authorship=>"(F. Weber ex Brid.) Mönk.", :combinationAuthorTeam=>{:authorTeam=>"Mönk.", :author=>["Mönk."]}, :basionymAuthorTeam=>{:authorTeam=>"F. Weber", :author=>["F. Weber"], :exAuthorTeam=>{:authorTeam=>"Brid.", :author=>["Brid."]}}}]}], "Racomitrium canescens forma ericoides (F. Weber ex Brid.) Mönk."],
|
126
|
+
["Peristernia nassatula forskali Tapparone-Canefri 1875", [{:genus=>{:string=>"Peristernia"}, :species=>{:string=>"nassatula"}, :infraspecies=>[{:string=>"forskali", :rank=>"n/a", :authorship=>"Tapparone-Canefri 1875", :basionymAuthorTeam=>{:authorTeam=>"Tapparone-Canefri", :author=>["Tapparone-Canefri"], :year=>"1875"}}]}], "Peristernia nassatula forskali Tapparone-Canefri 1875"],
|
126
127
|
]
|
127
|
-
names.each do |sn, sn_details, sn_value|
|
128
|
+
names.each do |sn, sn_details, sn_value|
|
128
129
|
parse(sn).should_not be_nil
|
129
130
|
details(sn).should == sn_details
|
130
131
|
value(sn).should == sn_value
|
131
132
|
end
|
132
133
|
end
|
133
|
-
|
134
|
+
|
134
135
|
it 'should parse genus with "?"' do
|
135
136
|
sn = "Ferganoconcha? oblonga"
|
136
137
|
parse(sn).should_not be_nil
|
@@ -138,7 +139,7 @@ describe ScientificNameClean do
|
|
138
139
|
details(sn).should == [{:genus=>{:string=>"Ferganoconcha"}, :species=>{:string=>"oblonga"}}]
|
139
140
|
pos(sn).should == {0=>["genus", 14], 15=>["species", 22]}
|
140
141
|
end
|
141
|
-
|
142
|
+
|
142
143
|
it 'should parse æ in the name' do
|
143
144
|
names = [
|
144
145
|
["Læptura laetifica Dow, 1913", "Laeptura laetifica Dow 1913"],
|
@@ -157,7 +158,7 @@ describe ScientificNameClean do
|
|
157
158
|
value(name_pair[0]).should == name_pair[1]
|
158
159
|
end
|
159
160
|
end
|
160
|
-
|
161
|
+
|
161
162
|
it 'should parse names with e-umlaut' do
|
162
163
|
sn = 'Kalanchoë tuberosa'
|
163
164
|
canonical(sn).should == 'Kalanchoe tuberosa'
|
@@ -174,7 +175,7 @@ describe ScientificNameClean do
|
|
174
175
|
pos(sn).should == {0=>["genus", 7], 9=>["infragenus", 16], 18=>["species", 29], 30=>["author_word", 38], 39=>["author_word", 40], 41=>["year", 45]}
|
175
176
|
sn = "Ixodes (Ixodes) hexagonus hexagonus Neumann, 1911"
|
176
177
|
canonical(sn).should == "Ixodes hexagonus hexagonus"
|
177
|
-
sn = "Brachytrypus (B.) grandidieri"
|
178
|
+
sn = "Brachytrypus (B.) grandidieri"
|
178
179
|
canonical(sn).should == "Brachytrypus grandidieri"
|
179
180
|
details(sn).should == [{:genus=>{:string=>"Brachytrypus"}, :infragenus=>{:string=>"B."}, :species=>{:string=>"grandidieri"}}]
|
180
181
|
sn = "Empis (Argyrandrus) Bezzi 1909"
|
@@ -185,7 +186,7 @@ describe ScientificNameClean do
|
|
185
186
|
sn = "Platydoris (B.)"
|
186
187
|
details(sn).should == [{:uninomial=>{:string=>"Platydoris", :infragenus=>{:string=>"B."}}}]
|
187
188
|
end
|
188
|
-
|
189
|
+
|
189
190
|
it 'should parse several authors without a year' do
|
190
191
|
sn = "Pseudocercospora dendrobii U. Braun & Crous"
|
191
192
|
parse(sn).should_not be_nil
|
@@ -199,14 +200,14 @@ describe ScientificNameClean do
|
|
199
200
|
pos(sn).should == {0=>["genus", 16], 17=>["species", 26], 27=>["author_word", 29], 30=>["author_word", 35], 40=>["author_word", 45]}
|
200
201
|
sn = "Pseudocercospora dendrobii U. Braun et Crous"
|
201
202
|
parse(sn).should_not be_nil
|
202
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
|
203
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
|
203
204
|
sn = "Arthopyrenia hyalospora(Nyl.)R.C. Harris"
|
204
205
|
parse(sn).should_not be_nil
|
205
206
|
value(sn).should == "Arthopyrenia hyalospora (Nyl.) R.C. Harris"
|
206
207
|
canonical(sn).should == "Arthopyrenia hyalospora"
|
207
208
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.)R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}}]
|
208
209
|
end
|
209
|
-
|
210
|
+
|
210
211
|
it 'should parse several authors with a year' do
|
211
212
|
sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
|
212
213
|
parse(sn).should_not be_nil
|
@@ -217,7 +218,7 @@ describe ScientificNameClean do
|
|
217
218
|
sn = "Pseudocercospora dendrobii Crous, 2003"
|
218
219
|
parse(sn).should_not be_nil
|
219
220
|
end
|
220
|
-
|
221
|
+
|
221
222
|
it 'should parse basionym authors in parenthesis' do
|
222
223
|
sn = "Zophosis persis (Chatanay, 1914)"
|
223
224
|
parse(sn).should_not be_nil
|
@@ -232,9 +233,9 @@ describe ScientificNameClean do
|
|
232
233
|
pos(sn).should == {0=>["genus", 8], 9=>["species", 15], 17=>["author_word", 25], 28=>["year", 32]}
|
233
234
|
parse("Zophosis persis (Chatanay) 1914").should_not be_nil
|
234
235
|
#parse("Zophosis persis Chatanay (1914)").should_not be_nil
|
235
|
-
end
|
236
|
+
end
|
236
237
|
|
237
|
-
it "should be able to parse name with identificaation annotation -- aff cf sp spp" do
|
238
|
+
it "should be able to parse name with identificaation annotation -- aff cf sp spp" do
|
238
239
|
sn = 'Diplocephalus aff. procerus Thaler, 1972'
|
239
240
|
details(sn).should == [{:genus=>{:string=>"Diplocephalus"}, :annotation_identification=>"aff.", :ignored=>{:species=>{:string=>"procerus", :authorship=>"Thaler, 1972", :basionymAuthorTeam=>{:authorTeam=>"Thaler", :author=>["Thaler"], :year=>"1972"}}}}]
|
240
241
|
sn = 'Diplocephalus aff procerus Thaler, 1972'
|
@@ -250,7 +251,7 @@ describe ScientificNameClean do
|
|
250
251
|
sn = "Thryothorus leucotis spp. bogotensis"
|
251
252
|
details(sn).should == [{:genus=>{:string=>"Thryothorus"}, :species=>{:string=>"leucotis"}, :infraspecies=>[{:annotation_identification=>"spp.", :ignored=>{:infraspecies=>{:string=>"bogotensis", :rank=>"n/a"}}}]}]
|
252
253
|
end
|
253
|
-
|
254
|
+
|
254
255
|
it 'should parse scientific name' do
|
255
256
|
sn = "Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003"
|
256
257
|
parse(sn).should_not be_nil
|
@@ -262,7 +263,7 @@ describe ScientificNameClean do
|
|
262
263
|
value(sn).should == "Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003"
|
263
264
|
details(sn).should == [{:genus=>{:string=>"Pseudocercospora"}, :species=>{:string=>"dendrobii", :authorship=>"(H.C. Burnett,1873)U. Braun & Crous 2003", :combinationAuthorTeam=>{:authorTeam=>"U. Braun & Crous", :author=>["U. Braun", "Crous"], :year=>"2003"}, :basionymAuthorTeam=>{:authorTeam=>"H.C. Burnett", :author=>["H.C. Burnett"], :year=>"1873"}}}]
|
264
265
|
end
|
265
|
-
|
266
|
+
|
266
267
|
it 'should parse several authors with several years' do
|
267
268
|
sn = "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003"
|
268
269
|
parse(sn).should_not be_nil
|
@@ -301,7 +302,7 @@ describe ScientificNameClean do
|
|
301
302
|
sn = "Cassytha peninsularis J. Z. Weber var. flindersii"
|
302
303
|
canonical(sn).should == "Cassytha peninsularis flindersii"
|
303
304
|
sn = "Prunus armeniaca convar. budae (Pénzes) Soó"
|
304
|
-
|
305
|
+
|
305
306
|
canonical(sn).should == "Prunus armeniaca budae"
|
306
307
|
sn = "Polypodium pectinatum L. f. typica Rosenst."
|
307
308
|
canonical(sn).should == "Polypodium pectinatum typica"
|
@@ -330,8 +331,8 @@ describe ScientificNameClean do
|
|
330
331
|
value(sn).should == "Lachenalia tricolor var. nelsonii (auct.) Baker"
|
331
332
|
details(sn).should == [{:genus=>{:string=>"Lachenalia"}, :species=>{:string=>"tricolor"}, :infraspecies=>[{:string=>"nelsonii", :rank=>"var.", :authorship=>"(auct.) Baker", :combinationAuthorTeam=>{:authorTeam=>"Baker", :author=>["Baker"]}, :basionymAuthorTeam=>{:authorTeam=>"auct.", :author=>["unknown"]}}]}]
|
332
333
|
pos(sn).should == {0=>["genus", 10], 11=>["species", 19], 20=>["infraspecific_type", 24], 25=>["infraspecies", 33], 35=>["unknown_author", 40], 42=>["author_word", 47]}
|
333
|
-
end
|
334
|
-
|
334
|
+
end
|
335
|
+
|
335
336
|
it 'should parse unknown authors auct./anon./hort./ht.' do
|
336
337
|
sn = "Puya acris ht."
|
337
338
|
parse(sn).should_not be_nil
|
@@ -343,7 +344,7 @@ describe ScientificNameClean do
|
|
343
344
|
pr = parse(sn).should_not be_nil
|
344
345
|
pos(sn).should == {0=>["genus", 3], 4=>["species", 12], 13=>["infraspecies", 23]}
|
345
346
|
end
|
346
|
-
|
347
|
+
|
347
348
|
it 'should parse real world examples' do
|
348
349
|
sn = "Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934"
|
349
350
|
parse(sn).should_not be_nil
|
@@ -358,7 +359,7 @@ describe ScientificNameClean do
|
|
358
359
|
parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
|
359
360
|
parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
|
360
361
|
parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
|
361
|
-
# valid
|
362
|
+
# valid
|
362
363
|
# infraspecific
|
363
364
|
parse("Calicium furfuraceum * furfuraceum (L.) Pers. 1797").should_not be_nil
|
364
365
|
parse("Exobasidium vaccinii ** andromedae (P. Karst.) P. Karst. 1882").should_not be_nil
|
@@ -411,7 +412,7 @@ describe ScientificNameClean do
|
|
411
412
|
pos(sn).should == {0=>["genus", 13], 14=>["species", 25], 26=>["infraspecific_type", 32], 33=>["infraspecies", 41], 42=>["author_word", 47], 48=>["year", 52]}
|
412
413
|
end
|
413
414
|
|
414
|
-
|
415
|
+
|
415
416
|
it "should parse name with forma/fo./form./f." do
|
416
417
|
sn = "Caulerpa cupressoides forma nuda"
|
417
418
|
parse(sn).should_not be_nil
|
@@ -440,18 +441,18 @@ describe ScientificNameClean do
|
|
440
441
|
pos(sn).should == {0=>["genus", 12], 16=>["species", 25], 29=>["infraspecific_type", 31], 36=>["infraspecies", 43], 47=>["author_word", 53], 58=>["year", 62]}
|
441
442
|
parse('Polypodium vulgare nothosubsp. mantoniae (Rothm.) Schidlay').should_not be_nil
|
442
443
|
end
|
443
|
-
|
444
|
+
|
444
445
|
it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
|
445
446
|
sn = "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
|
446
447
|
parse(sn).should_not be_nil
|
447
448
|
value(sn).should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
|
448
449
|
details(sn).should == [{:genus=>{:string=>"Hydnellum"}, :species=>{:string=>"scrobiculatum"}, :infraspecies=>[{:string=>"zonatum", :rank=>"var."}, {:string=>"parvum", :rank=>"f.", :authorship=>"(Banker) D. Hall & D.E. Stuntz 1972", :combinationAuthorTeam=>{:authorTeam=>"D. Hall & D.E. Stuntz", :author=>["D. Hall", "D.E. Stuntz"], :year=>"1972"}, :basionymAuthorTeam=>{:authorTeam=>"Banker", :author=>["Banker"]}}]}]
|
449
|
-
pos(sn).should == {0=>["genus", 9], 10=>["species", 23], 24=>["infraspecific_type", 28], 29=>["infraspecies", 36], 37=>["infraspecific_type", 39], 40=>["infraspecies", 46], 48=>["author_word", 54], 56=>["author_word", 58], 59=>["author_word", 63], 66=>["author_word", 70], 71=>["author_word", 77], 78=>["year", 82]}
|
450
|
+
pos(sn).should == {0=>["genus", 9], 10=>["species", 23], 24=>["infraspecific_type", 28], 29=>["infraspecies", 36], 37=>["infraspecific_type", 39], 40=>["infraspecies", 46], 48=>["author_word", 54], 56=>["author_word", 58], 59=>["author_word", 63], 66=>["author_word", 70], 71=>["author_word", 77], 78=>["year", 82]}
|
450
451
|
parse('Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek').should_not be_nil
|
451
452
|
parse('Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii').should_not be_nil
|
452
453
|
end
|
453
|
-
|
454
|
-
|
454
|
+
|
455
|
+
|
455
456
|
it "should parse status BOTANICAL RARE" do
|
456
457
|
#it is always latin abbrev often 2 words
|
457
458
|
sn = "Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov."
|
@@ -461,7 +462,7 @@ describe ScientificNameClean do
|
|
461
462
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.) R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}, :status=>"comb. nov."}]
|
462
463
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 29], 31=>["author_word", 35], 36=>["author_word", 42]}
|
463
464
|
end
|
464
|
-
|
465
|
+
|
465
466
|
it "should parse revised (ex) names" do
|
466
467
|
#invalidly published
|
467
468
|
sn = "Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris"
|
@@ -490,7 +491,7 @@ describe ScientificNameClean do
|
|
490
491
|
details(sn).should == [{:genus=>{:string=>"Salmonella"}, :species=>{:string=>"werahensis", :authorship=>"(Castellani) Hauduroy and Ehringer in Hauduroy 1937", :combinationAuthorTeam=>{:authorTeam=>"Hauduroy and Ehringer", :author=>["Hauduroy", "Ehringer"], :exAuthorTeam=>{:authorTeam=>"Hauduroy", :author=>["Hauduroy"], :year=>"1937"}}, :basionymAuthorTeam=>{:authorTeam=>"Castellani", :author=>["Castellani"]}}}]
|
491
492
|
pos(sn).should == {0=>["genus", 10], 11=>["species", 21], 23=>["author_word", 33], 35=>["author_word", 43], 48=>["author_word", 56], 60=>["author_word", 68], 69=>["year", 73]}
|
492
493
|
end
|
493
|
-
|
494
|
+
|
494
495
|
it 'should parse named hybrids' do
|
495
496
|
[
|
496
497
|
["×Agropogon P. Fourn. 1934", [{:uninomial=>{:string=>"Agropogon", :authorship=>"P. Fourn. 1934", :basionymAuthorTeam=>{:authorTeam=>"P. Fourn.", :author=>["P. Fourn."], :year=>"1934"}}}]],
|
@@ -504,12 +505,12 @@ describe ScientificNameClean do
|
|
504
505
|
["xHeucherella tiarelloides", [{:genus=>{:string=>"Heucherella"}, :species=>{:string=>"tiarelloides"}}]],
|
505
506
|
["x Heucherella tiarelloides", [{:genus=>{:string=>"Heucherella"}, :species=>{:string=>"tiarelloides"}}]],
|
506
507
|
["×Agropogon littoralis (Sm.) C. E. Hubb. 1946", [{:genus=>{:string=>"Agropogon"}, :species=>{:string=>"littoralis", :authorship=>"(Sm.) C. E. Hubb. 1946", :combinationAuthorTeam=>{:authorTeam=>"C. E. Hubb.", :author=>["C. E. Hubb."], :year=>"1946"}, :basionymAuthorTeam=>{:authorTeam=>"Sm.", :author=>["Sm."]}}}]]
|
507
|
-
].each do |res|
|
508
|
+
].each do |res|
|
508
509
|
parse(res[0]).should_not be_nil
|
509
510
|
parse(res[0]).hybrid.should be_true
|
510
511
|
details(res[0]).should == res[1]
|
511
512
|
end
|
512
|
-
[
|
513
|
+
[
|
513
514
|
['Asplenium X inexpectatum (E.L. Braun 1940) Morton (1956)',[{:genus=>{:string=>"Asplenium"}, :species=>{:string=>"inexpectatum", :authorship=>"(E.L. Braun 1940) Morton (1956)", :combinationAuthorTeam=>{:authorTeam=>"Morton", :author=>["Morton"], :year=>"1956"}, :basionymAuthorTeam=>{:authorTeam=>"E.L. Braun", :author=>["E.L. Braun"], :year=>"1940"}}}]],
|
514
515
|
['Mentha ×smithiana R. A. Graham 1949',[{:genus=>{:string=>"Mentha"}, :species=>{:string=>"smithiana", :authorship=>"R. A. Graham 1949", :basionymAuthorTeam=>{:authorTeam=>"R. A. Graham", :author=>["R. A. Graham"], :year=>"1949"}}}]],
|
515
516
|
['Salix ×capreola Andersson (1867)',[{:genus=>{:string=>"Salix"}, :species=>{:string=>"capreola", :authorship=>"Andersson (1867)", :basionymAuthorTeam=>{:authorTeam=>"Andersson", :author=>["Andersson"], :year=>"1867"}}}]],
|
@@ -523,7 +524,7 @@ describe ScientificNameClean do
|
|
523
524
|
canonical(sn).should == "Rosa alpina × pomifera"
|
524
525
|
parse(sn).details.should == [{:genus=>{:string=>"Rosa"}, :species=>{:string=>"alpina"}}, {:species=>{:string=>"pomifera"}, :genus=>{:string=>"Rosa"}}]
|
525
526
|
end
|
526
|
-
|
527
|
+
|
527
528
|
it "should parse hybrid combination" do
|
528
529
|
sn = "Arthopyrenia hyalospora X Hydnellum scrobiculatum"
|
529
530
|
parse(sn).should_not be_nil
|
@@ -538,7 +539,7 @@ describe ScientificNameClean do
|
|
538
539
|
value(sn).should == "Arthopyrenia hyalospora (Banker) D. Hall \303\227 Hydnellum scrobiculatum D.E. Stuntz"
|
539
540
|
canonical(sn).should == "Arthopyrenia hyalospora × Hydnellum scrobiculatum"
|
540
541
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 31], 33=>["author_word", 35], 36=>["author_word", 40], 43=>["genus", 52], 53=>["species", 66], 67=>["author_word", 71], 72=>["author_word", 78]}
|
541
|
-
value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
|
542
|
+
value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
|
542
543
|
sn = "Arthopyrenia hyalospora x"
|
543
544
|
parse(sn).should_not be_nil
|
544
545
|
parse(sn).hybrid.should be_true
|
@@ -551,7 +552,7 @@ describe ScientificNameClean do
|
|
551
552
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora"}}, "?"]
|
552
553
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23]}
|
553
554
|
end
|
554
|
-
|
555
|
+
|
555
556
|
it 'should parse names with taxon concept' do
|
556
557
|
sn = "Stenometope laevissimus sec. Eschmeyer 2004"
|
557
558
|
details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus"}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
|
@@ -561,12 +562,12 @@ describe ScientificNameClean do
|
|
561
562
|
details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus", :authorship=>"Bibron 1855", :basionymAuthorTeam=>{:authorTeam=>"Bibron", :author=>["Bibron"], :year=>"1855"}}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
|
562
563
|
pos(sn).should == {0=>["genus", 11], 12=>["species", 23], 24=>["author_word", 30], 31=>["year", 35], 41=>["author_word", 50], 51=>["year", 55]}
|
563
564
|
end
|
564
|
-
|
565
|
+
|
565
566
|
it 'should parse names with spaces inconsistencies at the start and the end and in the middle' do
|
566
567
|
parse(" Asplenium X inexpectatum (E.L. Braun 1940) Morton (1956) ").should_not be_nil
|
567
568
|
end
|
568
|
-
|
569
|
-
it 'should parse names with any number of spaces' do
|
569
|
+
|
570
|
+
it 'should parse names with any number of spaces' do
|
570
571
|
sn = "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
|
571
572
|
parse(sn).should_not be_nil
|
572
573
|
value(sn).should == "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
|
@@ -574,7 +575,7 @@ describe ScientificNameClean do
|
|
574
575
|
details(sn).should == [{:genus=>{:string=>"Trematosphaeria"}, :species=>{:string=>"phaeospora", :authorship=>"(E. Müll.) L. Holm 1957", :combinationAuthorTeam=>{:authorTeam=>"L. Holm", :author=>["L. Holm"], :year=>"1957"}, :basionymAuthorTeam=>{:authorTeam=>"E. Müll.", :author=>["E. Müll."]}}}]
|
575
576
|
pos(sn).should == {0=>["genus", 15], 16=>["species", 26], 28=>["author_word", 30], 31=>["author_word", 36], 46=>["author_word", 48], 61=>["author_word", 65], 66=>["year", 70]}
|
576
577
|
end
|
577
|
-
|
578
|
+
|
578
579
|
it 'should not parse serveral authors groups with several years NOT CORRECT' do
|
579
580
|
parse("Pseudocercospora dendrobii (H.C. Burnett 1883) (Leight.) (Movss. 1967) U. Braun & Crous 2003").should be_nil
|
580
581
|
end
|
@@ -609,7 +610,7 @@ describe ScientificNameClean do
|
|
609
610
|
sn = 'Deschampsia cespitosa ssp pumila'
|
610
611
|
details(sn).should == [{:genus=>{:string=>"Deschampsia"}, :species=>{:string=>"cespitosa"}, :infraspecies=>[{:string=>"pumila", :rank=>"ssp"}]}]
|
611
612
|
end
|
612
|
-
|
613
|
+
|
613
614
|
# Combination genus names should be merged without dash or capital letter
|
614
615
|
it 'should parse hybrid names with capitalized second name in genus (botanical code error)' do
|
615
616
|
sn = 'Anacampti-Platanthera P. Fourn.'
|
@@ -627,13 +628,13 @@ describe ScientificNameClean do
|
|
627
628
|
canonical(sn).should == 'Oemona simplex'
|
628
629
|
end
|
629
630
|
#"Arthrosamanea eriorhachis (Harms & sine ref. ) Aubrév." -- ignore & sine ref. (means without reference)
|
630
|
-
|
631
|
+
|
631
632
|
=begin
|
632
633
|
new stuff
|
633
634
|
|
634
635
|
sn = "Orchidaceae × Asconopsis hort."
|
635
636
|
canonical(sn).should == "Orchidaceae x Asconopsis"
|
636
|
-
sn
|
637
|
+
sn
|
637
638
|
Tamiops swinhoei near hainanus|Tamiops swinhoei near hainanus
|
638
639
|
Conus textile form archiepiscopus|Conus textile form archiepiscopus|
|
639
640
|
Crypticus pseudosericeus ssp. olivieri Desbrochers des Loges,1881|Crypticus pseudosericeus olivieri des
|
data/spec/parser/test_data.txt
CHANGED
@@ -414,6 +414,7 @@ Buteo borealis ? ventralis|{"scientificName":{"parsed":true, "parser_version":"t
|
|
414
414
|
Cetraria islandica ? islandica|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Cetraria islandica ? islandica", "normalized":"Cetraria islandica", "canonical":"Cetraria islandica", "hybrid":false, "details":[{"genus":{"string":"Cetraria"}, "species":{"string":"islandica"}, "infraspecies":[{"annotation_identification":"?", "ignored":{"infraspecies":{"string":"islandica", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["annotation_identification", 21]}}}
|
415
415
|
Euxoa nr. idahoensis sp. 1clay|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Euxoa nr. idahoensis sp. 1clay", "normalized":"Euxoa idahoensis", "canonical":"Euxoa idahoensis", "hybrid":false, "details":[{"genus":{"string":"Euxoa"}, "species":{"string":"idahoensis"}, "infraspecies":[{"annotation_identification":"sp.", "ignored":{"infraspecies":{"string":"uniclay", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 5], "10":["species", 20], "21":["annotation_identification", 24]}, "surrogate": true}}
|
416
416
|
Parus caeruleus species complex|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Parus caeruleus species complex", "normalized":"Parus caeruleus", "canonical":"Parus caeruleus", "hybrid":false, "details":[{"genus":{"string":"Parus"}, "species":{"string":"caeruleus"}}], "parser_run":1, "positions":{"0":["genus", 5], "6":["species", 15]}}}
|
417
|
+
Peristernia nassatula forskali Tapparone-Canefri 1875|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Peristernia nassatula forskali Tapparone-Canefri 1875", "normalized":"Peristernia nassatula forskali Tapparone-Canefri 1875", "canonical":"Peristernia nassatula forskali", "hybrid":false, "details":[{"genus":{"string":"Peristernia"}, "species":{"string":"nassatula"}, "infraspecies":[{"string":"forskali", "rank":"n/a", "authorship":"Tapparone-Canefri 1875", "basionymAuthorTeam":{"authorTeam":"Tapparone-Canefri", "author":["Tapparone-Canefri"], "year":"1875"}}]}], "parser_run":1, "positions":{"0":["genus", 11], "12":["species", 21], "22":["infraspecies", 30], "31":["author_word", 48], "49":["year", 53]}}}
|
417
418
|
|
418
419
|
#should flag surrogate names
|
419
420
|
Coleoptera sp. BOLD:AAV0432|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Coleoptera sp. BOLD:AAV0432", "normalized":"Coleoptera", "canonical":"Coleoptera", "hybrid":false, "details":[{"genus":{"string":"Coleoptera"}, "annotation_identification":"sp.", "ignored":{"unparsed":"BOLD:AAV0432"}}], "parser_run":1, "positions":{"0":["genus", 10], "11":["annotation_identification", 14]}, "surrogate":true}}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.1.
|
4
|
+
version: 3.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -209,6 +209,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
209
209
|
- - ! '>='
|
210
210
|
- !ruby/object:Gem::Version
|
211
211
|
version: '0'
|
212
|
+
segments:
|
213
|
+
- 0
|
214
|
+
hash: 3336294247914629914
|
212
215
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
213
216
|
none: false
|
214
217
|
requirements:
|