biodiversity 3.1.1 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +5 -2
- data/VERSION +1 -1
- data/lib/biodiversity/parser/scientific_name_clean.treetop +1 -1
- data/spec/parser/scientific_name_clean.spec.rb +47 -46
- data/spec/parser/test_data.txt +1 -0
- metadata +5 -2
data/CHANGELOG
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
3.1.
|
|
1
|
+
3.1.2 -- fixed problems with infraspecies epithets starting with fo,
|
|
2
|
+
for example Peristernia nassatula forskali Tapparone-Canefri 1875
|
|
2
3
|
|
|
3
|
-
3.1.
|
|
4
|
+
3.1.1 -- fixed problem with names ending on sp
|
|
5
|
+
|
|
6
|
+
3.1.0 -- added surrogates: Genbank, Barcode of life names.
|
|
4
7
|
Also cf., sp. spp are flagged as surrogates
|
|
5
8
|
|
|
6
9
|
3.0.1 -- updated gems
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.1.
|
|
1
|
+
3.1.2
|
|
@@ -560,7 +560,7 @@ grammar ScientificNameClean
|
|
|
560
560
|
end
|
|
561
561
|
|
|
562
562
|
rule rank
|
|
563
|
-
("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo"/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
|
563
|
+
("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
|
564
564
|
{
|
|
565
565
|
def value
|
|
566
566
|
text_value.strip
|
|
@@ -7,7 +7,7 @@ describe ScientificNameClean do
|
|
|
7
7
|
before(:all) do
|
|
8
8
|
set_parser(ScientificNameCleanParser.new)
|
|
9
9
|
end
|
|
10
|
-
|
|
10
|
+
|
|
11
11
|
it 'should parse uninomial' do
|
|
12
12
|
sn = 'Pseudocercospora'
|
|
13
13
|
parse(sn).should_not be_nil
|
|
@@ -16,12 +16,12 @@ describe ScientificNameClean do
|
|
|
16
16
|
details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora"}}]
|
|
17
17
|
pos(sn).should == {0=>["uninomial", 16]}
|
|
18
18
|
end
|
|
19
|
-
|
|
19
|
+
|
|
20
20
|
it 'should parse uninomial with author and year' do
|
|
21
21
|
sn = 'Pseudocercospora Speg.'
|
|
22
22
|
parse(sn).should_not be_nil
|
|
23
23
|
details(sn).should == [{:uninomial=>{:string=>"Pseudocercospora", :authorship=>"Speg.", :basionymAuthorTeam=>{:authorTeam=>"Speg.", :author=>["Speg."]}}}]
|
|
24
|
-
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
|
24
|
+
pos(sn).should == {0=>["uninomial", 16], 17=>["author_word", 22]}
|
|
25
25
|
sn = 'Pseudocercospora Spegazzini, 1910'
|
|
26
26
|
parse(sn).should_not be_nil
|
|
27
27
|
value(sn).should == 'Pseudocercospora Spegazzini 1910'
|
|
@@ -34,7 +34,7 @@ describe ScientificNameClean do
|
|
|
34
34
|
parse(sn).should_not be_nil
|
|
35
35
|
details(sn).should == [{:uninomial=>{:string=>"Epacridaceae"}, :rank_uninomials=>"trib.", :uninomial2=>{:string=>"Archerieae", :authorship=>"Crayn & Quinn", :basionymAuthorTeam=>{:authorTeam=>"Crayn & Quinn", :author=>["Crayn", "Quinn"]}}}]
|
|
36
36
|
end
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
it 'should parse names with a valid 2 letter genus' do
|
|
39
39
|
["Ca Dyar 1914",
|
|
40
40
|
"Ea Distant 1911",
|
|
@@ -59,7 +59,7 @@ describe ScientificNameClean do
|
|
|
59
59
|
end
|
|
60
60
|
canonical('Quoyula').should == 'Quoyula'
|
|
61
61
|
end
|
|
62
|
-
|
|
62
|
+
|
|
63
63
|
it 'should parse canonical' do
|
|
64
64
|
sn = 'Pseudocercospora dendrobii'
|
|
65
65
|
parse(sn).should_not be_nil
|
|
@@ -78,8 +78,8 @@ describe ScientificNameClean do
|
|
|
78
78
|
value(sn).should == 'Ps. dendrobii'
|
|
79
79
|
details(sn).should == [{:genus=>{:string=>"Ps."}, :species=>{:string=>"dendrobii"}}]
|
|
80
80
|
end
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
|
|
82
|
+
|
|
83
83
|
it 'should parse species name with author and year' do
|
|
84
84
|
sn = "Platypus bicaudatulus Schedl 1935"
|
|
85
85
|
parse(sn).should_not be_nil
|
|
@@ -95,7 +95,7 @@ describe ScientificNameClean do
|
|
|
95
95
|
details(sn).should == [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl (1935h)", :basionymAuthorTeam=>{:authorTeam=>"Schedl", :author=>["Schedl"], :year=>"1935"}}}]
|
|
96
96
|
parse("Platypus bicaudatulus Schedl 1935").should_not be_nil
|
|
97
97
|
end
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
it 'should parse species name with abbreviated genus, author and year' do
|
|
100
100
|
sn = "P. bicaudatulus Schedl 1935"
|
|
101
101
|
parse(sn).should_not be_nil
|
|
@@ -110,7 +110,7 @@ describe ScientificNameClean do
|
|
|
110
110
|
end
|
|
111
111
|
|
|
112
112
|
it "should parse species name with author's postfix f., filius (son of)" do
|
|
113
|
-
names = [
|
|
113
|
+
names = [
|
|
114
114
|
[ "Platypus bicaudatulus Schedl f. 1935", [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl f. 1935", :basionymAuthorTeam=>{:authorTeam=>"Schedl f.", :author=>["Schedl f."], :year=>"1935"}}}], 'Platypus bicaudatulus Schedl f. 1935'],
|
|
115
115
|
[ "Platypus bicaudatulus Schedl filius 1935", [{:genus=>{:string=>"Platypus"}, :species=>{:string=>"bicaudatulus", :authorship=>"Schedl filius 1935", :basionymAuthorTeam=>{:authorTeam=>"Schedl filius", :author=>["Schedl filius"], :year=>"1935"}}}], 'Platypus bicaudatulus Schedl filius 1935'],
|
|
116
116
|
[ "Fimbristylis ovata (Burm. f.) J. Kern", [{:genus=>{:string=>"Fimbristylis"}, :species=>{:string=>"ovata", :authorship=>"(Burm. f.) J. Kern", :combinationAuthorTeam=>{:authorTeam=>"J. Kern", :author=>["J. Kern"]}, :basionymAuthorTeam=>{:authorTeam=>"Burm. f.", :author=>["Burm. f."]}}}], 'Fimbristylis ovata (Burm. f.) J. Kern'],
|
|
@@ -123,14 +123,15 @@ describe ScientificNameClean do
|
|
|
123
123
|
["Betula pendula fo. dalecarlica (L. f.) C.K. Schneid.", [{:infraspecies=>[{:basionymAuthorTeam=>{:author=>["L. f."], :authorTeam=>"L. f."}, :string=>"dalecarlica", :rank=>"fo.", :combinationAuthorTeam=>{:author=>["C.K. Schneid."], :authorTeam=>"C.K. Schneid."}, :authorship=>"(L. f.) C.K. Schneid."}], :genus=>{:string=>"Betula"}, :species=>{:string=>"pendula"}}], "Betula pendula fo. dalecarlica (L. f.) C.K. Schneid."],
|
|
124
124
|
["Racomitrium canescens f. ericoides (F. Weber ex Brid.) Mönk.", [{:genus=>{:string=>"Racomitrium"}, :species=>{:string=>"canescens"}, :infraspecies=>[{:string=>"ericoides", :rank=>"f.", :authorship=>"(F. Weber ex Brid.) Mönk.", :combinationAuthorTeam=>{:authorTeam=>"Mönk.", :author=>["Mönk."]}, :basionymAuthorTeam=>{:authorTeam=>"F. Weber", :author=>["F. Weber"], :exAuthorTeam=>{:authorTeam=>"Brid.", :author=>["Brid."]}}}]}], "Racomitrium canescens f. ericoides (F. Weber ex Brid.) Mönk."],
|
|
125
125
|
["Racomitrium canescens forma ericoides (F. Weber ex Brid.) Mönk.", [{:genus=>{:string=>"Racomitrium"}, :species=>{:string=>"canescens"}, :infraspecies=>[{:string=>"ericoides", :rank=>"forma", :authorship=>"(F. Weber ex Brid.) Mönk.", :combinationAuthorTeam=>{:authorTeam=>"Mönk.", :author=>["Mönk."]}, :basionymAuthorTeam=>{:authorTeam=>"F. Weber", :author=>["F. Weber"], :exAuthorTeam=>{:authorTeam=>"Brid.", :author=>["Brid."]}}}]}], "Racomitrium canescens forma ericoides (F. Weber ex Brid.) Mönk."],
|
|
126
|
+
["Peristernia nassatula forskali Tapparone-Canefri 1875", [{:genus=>{:string=>"Peristernia"}, :species=>{:string=>"nassatula"}, :infraspecies=>[{:string=>"forskali", :rank=>"n/a", :authorship=>"Tapparone-Canefri 1875", :basionymAuthorTeam=>{:authorTeam=>"Tapparone-Canefri", :author=>["Tapparone-Canefri"], :year=>"1875"}}]}], "Peristernia nassatula forskali Tapparone-Canefri 1875"],
|
|
126
127
|
]
|
|
127
|
-
names.each do |sn, sn_details, sn_value|
|
|
128
|
+
names.each do |sn, sn_details, sn_value|
|
|
128
129
|
parse(sn).should_not be_nil
|
|
129
130
|
details(sn).should == sn_details
|
|
130
131
|
value(sn).should == sn_value
|
|
131
132
|
end
|
|
132
133
|
end
|
|
133
|
-
|
|
134
|
+
|
|
134
135
|
it 'should parse genus with "?"' do
|
|
135
136
|
sn = "Ferganoconcha? oblonga"
|
|
136
137
|
parse(sn).should_not be_nil
|
|
@@ -138,7 +139,7 @@ describe ScientificNameClean do
|
|
|
138
139
|
details(sn).should == [{:genus=>{:string=>"Ferganoconcha"}, :species=>{:string=>"oblonga"}}]
|
|
139
140
|
pos(sn).should == {0=>["genus", 14], 15=>["species", 22]}
|
|
140
141
|
end
|
|
141
|
-
|
|
142
|
+
|
|
142
143
|
it 'should parse æ in the name' do
|
|
143
144
|
names = [
|
|
144
145
|
["Læptura laetifica Dow, 1913", "Laeptura laetifica Dow 1913"],
|
|
@@ -157,7 +158,7 @@ describe ScientificNameClean do
|
|
|
157
158
|
value(name_pair[0]).should == name_pair[1]
|
|
158
159
|
end
|
|
159
160
|
end
|
|
160
|
-
|
|
161
|
+
|
|
161
162
|
it 'should parse names with e-umlaut' do
|
|
162
163
|
sn = 'Kalanchoë tuberosa'
|
|
163
164
|
canonical(sn).should == 'Kalanchoe tuberosa'
|
|
@@ -174,7 +175,7 @@ describe ScientificNameClean do
|
|
|
174
175
|
pos(sn).should == {0=>["genus", 7], 9=>["infragenus", 16], 18=>["species", 29], 30=>["author_word", 38], 39=>["author_word", 40], 41=>["year", 45]}
|
|
175
176
|
sn = "Ixodes (Ixodes) hexagonus hexagonus Neumann, 1911"
|
|
176
177
|
canonical(sn).should == "Ixodes hexagonus hexagonus"
|
|
177
|
-
sn = "Brachytrypus (B.) grandidieri"
|
|
178
|
+
sn = "Brachytrypus (B.) grandidieri"
|
|
178
179
|
canonical(sn).should == "Brachytrypus grandidieri"
|
|
179
180
|
details(sn).should == [{:genus=>{:string=>"Brachytrypus"}, :infragenus=>{:string=>"B."}, :species=>{:string=>"grandidieri"}}]
|
|
180
181
|
sn = "Empis (Argyrandrus) Bezzi 1909"
|
|
@@ -185,7 +186,7 @@ describe ScientificNameClean do
|
|
|
185
186
|
sn = "Platydoris (B.)"
|
|
186
187
|
details(sn).should == [{:uninomial=>{:string=>"Platydoris", :infragenus=>{:string=>"B."}}}]
|
|
187
188
|
end
|
|
188
|
-
|
|
189
|
+
|
|
189
190
|
it 'should parse several authors without a year' do
|
|
190
191
|
sn = "Pseudocercospora dendrobii U. Braun & Crous"
|
|
191
192
|
parse(sn).should_not be_nil
|
|
@@ -199,14 +200,14 @@ describe ScientificNameClean do
|
|
|
199
200
|
pos(sn).should == {0=>["genus", 16], 17=>["species", 26], 27=>["author_word", 29], 30=>["author_word", 35], 40=>["author_word", 45]}
|
|
200
201
|
sn = "Pseudocercospora dendrobii U. Braun et Crous"
|
|
201
202
|
parse(sn).should_not be_nil
|
|
202
|
-
value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
|
|
203
|
+
value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
|
|
203
204
|
sn = "Arthopyrenia hyalospora(Nyl.)R.C. Harris"
|
|
204
205
|
parse(sn).should_not be_nil
|
|
205
206
|
value(sn).should == "Arthopyrenia hyalospora (Nyl.) R.C. Harris"
|
|
206
207
|
canonical(sn).should == "Arthopyrenia hyalospora"
|
|
207
208
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.)R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}}]
|
|
208
209
|
end
|
|
209
|
-
|
|
210
|
+
|
|
210
211
|
it 'should parse several authors with a year' do
|
|
211
212
|
sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
|
|
212
213
|
parse(sn).should_not be_nil
|
|
@@ -217,7 +218,7 @@ describe ScientificNameClean do
|
|
|
217
218
|
sn = "Pseudocercospora dendrobii Crous, 2003"
|
|
218
219
|
parse(sn).should_not be_nil
|
|
219
220
|
end
|
|
220
|
-
|
|
221
|
+
|
|
221
222
|
it 'should parse basionym authors in parenthesis' do
|
|
222
223
|
sn = "Zophosis persis (Chatanay, 1914)"
|
|
223
224
|
parse(sn).should_not be_nil
|
|
@@ -232,9 +233,9 @@ describe ScientificNameClean do
|
|
|
232
233
|
pos(sn).should == {0=>["genus", 8], 9=>["species", 15], 17=>["author_word", 25], 28=>["year", 32]}
|
|
233
234
|
parse("Zophosis persis (Chatanay) 1914").should_not be_nil
|
|
234
235
|
#parse("Zophosis persis Chatanay (1914)").should_not be_nil
|
|
235
|
-
end
|
|
236
|
+
end
|
|
236
237
|
|
|
237
|
-
it "should be able to parse name with identificaation annotation -- aff cf sp spp" do
|
|
238
|
+
it "should be able to parse name with identificaation annotation -- aff cf sp spp" do
|
|
238
239
|
sn = 'Diplocephalus aff. procerus Thaler, 1972'
|
|
239
240
|
details(sn).should == [{:genus=>{:string=>"Diplocephalus"}, :annotation_identification=>"aff.", :ignored=>{:species=>{:string=>"procerus", :authorship=>"Thaler, 1972", :basionymAuthorTeam=>{:authorTeam=>"Thaler", :author=>["Thaler"], :year=>"1972"}}}}]
|
|
240
241
|
sn = 'Diplocephalus aff procerus Thaler, 1972'
|
|
@@ -250,7 +251,7 @@ describe ScientificNameClean do
|
|
|
250
251
|
sn = "Thryothorus leucotis spp. bogotensis"
|
|
251
252
|
details(sn).should == [{:genus=>{:string=>"Thryothorus"}, :species=>{:string=>"leucotis"}, :infraspecies=>[{:annotation_identification=>"spp.", :ignored=>{:infraspecies=>{:string=>"bogotensis", :rank=>"n/a"}}}]}]
|
|
252
253
|
end
|
|
253
|
-
|
|
254
|
+
|
|
254
255
|
it 'should parse scientific name' do
|
|
255
256
|
sn = "Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003"
|
|
256
257
|
parse(sn).should_not be_nil
|
|
@@ -262,7 +263,7 @@ describe ScientificNameClean do
|
|
|
262
263
|
value(sn).should == "Pseudocercospora dendrobii (H.C. Burnett 1873) U. Braun & Crous 2003"
|
|
263
264
|
details(sn).should == [{:genus=>{:string=>"Pseudocercospora"}, :species=>{:string=>"dendrobii", :authorship=>"(H.C. Burnett,1873)U. Braun & Crous 2003", :combinationAuthorTeam=>{:authorTeam=>"U. Braun & Crous", :author=>["U. Braun", "Crous"], :year=>"2003"}, :basionymAuthorTeam=>{:authorTeam=>"H.C. Burnett", :author=>["H.C. Burnett"], :year=>"1873"}}}]
|
|
264
265
|
end
|
|
265
|
-
|
|
266
|
+
|
|
266
267
|
it 'should parse several authors with several years' do
|
|
267
268
|
sn = "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003"
|
|
268
269
|
parse(sn).should_not be_nil
|
|
@@ -301,7 +302,7 @@ describe ScientificNameClean do
|
|
|
301
302
|
sn = "Cassytha peninsularis J. Z. Weber var. flindersii"
|
|
302
303
|
canonical(sn).should == "Cassytha peninsularis flindersii"
|
|
303
304
|
sn = "Prunus armeniaca convar. budae (Pénzes) Soó"
|
|
304
|
-
|
|
305
|
+
|
|
305
306
|
canonical(sn).should == "Prunus armeniaca budae"
|
|
306
307
|
sn = "Polypodium pectinatum L. f. typica Rosenst."
|
|
307
308
|
canonical(sn).should == "Polypodium pectinatum typica"
|
|
@@ -330,8 +331,8 @@ describe ScientificNameClean do
|
|
|
330
331
|
value(sn).should == "Lachenalia tricolor var. nelsonii (auct.) Baker"
|
|
331
332
|
details(sn).should == [{:genus=>{:string=>"Lachenalia"}, :species=>{:string=>"tricolor"}, :infraspecies=>[{:string=>"nelsonii", :rank=>"var.", :authorship=>"(auct.) Baker", :combinationAuthorTeam=>{:authorTeam=>"Baker", :author=>["Baker"]}, :basionymAuthorTeam=>{:authorTeam=>"auct.", :author=>["unknown"]}}]}]
|
|
332
333
|
pos(sn).should == {0=>["genus", 10], 11=>["species", 19], 20=>["infraspecific_type", 24], 25=>["infraspecies", 33], 35=>["unknown_author", 40], 42=>["author_word", 47]}
|
|
333
|
-
end
|
|
334
|
-
|
|
334
|
+
end
|
|
335
|
+
|
|
335
336
|
it 'should parse unknown authors auct./anon./hort./ht.' do
|
|
336
337
|
sn = "Puya acris ht."
|
|
337
338
|
parse(sn).should_not be_nil
|
|
@@ -343,7 +344,7 @@ describe ScientificNameClean do
|
|
|
343
344
|
pr = parse(sn).should_not be_nil
|
|
344
345
|
pos(sn).should == {0=>["genus", 3], 4=>["species", 12], 13=>["infraspecies", 23]}
|
|
345
346
|
end
|
|
346
|
-
|
|
347
|
+
|
|
347
348
|
it 'should parse real world examples' do
|
|
348
349
|
sn = "Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934"
|
|
349
350
|
parse(sn).should_not be_nil
|
|
@@ -358,7 +359,7 @@ describe ScientificNameClean do
|
|
|
358
359
|
parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
|
|
359
360
|
parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
|
|
360
361
|
parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
|
|
361
|
-
# valid
|
|
362
|
+
# valid
|
|
362
363
|
# infraspecific
|
|
363
364
|
parse("Calicium furfuraceum * furfuraceum (L.) Pers. 1797").should_not be_nil
|
|
364
365
|
parse("Exobasidium vaccinii ** andromedae (P. Karst.) P. Karst. 1882").should_not be_nil
|
|
@@ -411,7 +412,7 @@ describe ScientificNameClean do
|
|
|
411
412
|
pos(sn).should == {0=>["genus", 13], 14=>["species", 25], 26=>["infraspecific_type", 32], 33=>["infraspecies", 41], 42=>["author_word", 47], 48=>["year", 52]}
|
|
412
413
|
end
|
|
413
414
|
|
|
414
|
-
|
|
415
|
+
|
|
415
416
|
it "should parse name with forma/fo./form./f." do
|
|
416
417
|
sn = "Caulerpa cupressoides forma nuda"
|
|
417
418
|
parse(sn).should_not be_nil
|
|
@@ -440,18 +441,18 @@ describe ScientificNameClean do
|
|
|
440
441
|
pos(sn).should == {0=>["genus", 12], 16=>["species", 25], 29=>["infraspecific_type", 31], 36=>["infraspecies", 43], 47=>["author_word", 53], 58=>["year", 62]}
|
|
441
442
|
parse('Polypodium vulgare nothosubsp. mantoniae (Rothm.) Schidlay').should_not be_nil
|
|
442
443
|
end
|
|
443
|
-
|
|
444
|
+
|
|
444
445
|
it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
|
|
445
446
|
sn = "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
|
|
446
447
|
parse(sn).should_not be_nil
|
|
447
448
|
value(sn).should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
|
|
448
449
|
details(sn).should == [{:genus=>{:string=>"Hydnellum"}, :species=>{:string=>"scrobiculatum"}, :infraspecies=>[{:string=>"zonatum", :rank=>"var."}, {:string=>"parvum", :rank=>"f.", :authorship=>"(Banker) D. Hall & D.E. Stuntz 1972", :combinationAuthorTeam=>{:authorTeam=>"D. Hall & D.E. Stuntz", :author=>["D. Hall", "D.E. Stuntz"], :year=>"1972"}, :basionymAuthorTeam=>{:authorTeam=>"Banker", :author=>["Banker"]}}]}]
|
|
449
|
-
pos(sn).should == {0=>["genus", 9], 10=>["species", 23], 24=>["infraspecific_type", 28], 29=>["infraspecies", 36], 37=>["infraspecific_type", 39], 40=>["infraspecies", 46], 48=>["author_word", 54], 56=>["author_word", 58], 59=>["author_word", 63], 66=>["author_word", 70], 71=>["author_word", 77], 78=>["year", 82]}
|
|
450
|
+
pos(sn).should == {0=>["genus", 9], 10=>["species", 23], 24=>["infraspecific_type", 28], 29=>["infraspecies", 36], 37=>["infraspecific_type", 39], 40=>["infraspecies", 46], 48=>["author_word", 54], 56=>["author_word", 58], 59=>["author_word", 63], 66=>["author_word", 70], 71=>["author_word", 77], 78=>["year", 82]}
|
|
450
451
|
parse('Senecio fuchsii C.C.Gmel. subsp. fuchsii var. expansus (Boiss. & Heldr.) Hayek').should_not be_nil
|
|
451
452
|
parse('Senecio fuchsii C.C.Gmel. subsp. fuchsii var. fuchsii').should_not be_nil
|
|
452
453
|
end
|
|
453
|
-
|
|
454
|
-
|
|
454
|
+
|
|
455
|
+
|
|
455
456
|
it "should parse status BOTANICAL RARE" do
|
|
456
457
|
#it is always latin abbrev often 2 words
|
|
457
458
|
sn = "Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov."
|
|
@@ -461,7 +462,7 @@ describe ScientificNameClean do
|
|
|
461
462
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.) R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}, :status=>"comb. nov."}]
|
|
462
463
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 29], 31=>["author_word", 35], 36=>["author_word", 42]}
|
|
463
464
|
end
|
|
464
|
-
|
|
465
|
+
|
|
465
466
|
it "should parse revised (ex) names" do
|
|
466
467
|
#invalidly published
|
|
467
468
|
sn = "Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris"
|
|
@@ -490,7 +491,7 @@ describe ScientificNameClean do
|
|
|
490
491
|
details(sn).should == [{:genus=>{:string=>"Salmonella"}, :species=>{:string=>"werahensis", :authorship=>"(Castellani) Hauduroy and Ehringer in Hauduroy 1937", :combinationAuthorTeam=>{:authorTeam=>"Hauduroy and Ehringer", :author=>["Hauduroy", "Ehringer"], :exAuthorTeam=>{:authorTeam=>"Hauduroy", :author=>["Hauduroy"], :year=>"1937"}}, :basionymAuthorTeam=>{:authorTeam=>"Castellani", :author=>["Castellani"]}}}]
|
|
491
492
|
pos(sn).should == {0=>["genus", 10], 11=>["species", 21], 23=>["author_word", 33], 35=>["author_word", 43], 48=>["author_word", 56], 60=>["author_word", 68], 69=>["year", 73]}
|
|
492
493
|
end
|
|
493
|
-
|
|
494
|
+
|
|
494
495
|
it 'should parse named hybrids' do
|
|
495
496
|
[
|
|
496
497
|
["×Agropogon P. Fourn. 1934", [{:uninomial=>{:string=>"Agropogon", :authorship=>"P. Fourn. 1934", :basionymAuthorTeam=>{:authorTeam=>"P. Fourn.", :author=>["P. Fourn."], :year=>"1934"}}}]],
|
|
@@ -504,12 +505,12 @@ describe ScientificNameClean do
|
|
|
504
505
|
["xHeucherella tiarelloides", [{:genus=>{:string=>"Heucherella"}, :species=>{:string=>"tiarelloides"}}]],
|
|
505
506
|
["x Heucherella tiarelloides", [{:genus=>{:string=>"Heucherella"}, :species=>{:string=>"tiarelloides"}}]],
|
|
506
507
|
["×Agropogon littoralis (Sm.) C. E. Hubb. 1946", [{:genus=>{:string=>"Agropogon"}, :species=>{:string=>"littoralis", :authorship=>"(Sm.) C. E. Hubb. 1946", :combinationAuthorTeam=>{:authorTeam=>"C. E. Hubb.", :author=>["C. E. Hubb."], :year=>"1946"}, :basionymAuthorTeam=>{:authorTeam=>"Sm.", :author=>["Sm."]}}}]]
|
|
507
|
-
].each do |res|
|
|
508
|
+
].each do |res|
|
|
508
509
|
parse(res[0]).should_not be_nil
|
|
509
510
|
parse(res[0]).hybrid.should be_true
|
|
510
511
|
details(res[0]).should == res[1]
|
|
511
512
|
end
|
|
512
|
-
[
|
|
513
|
+
[
|
|
513
514
|
['Asplenium X inexpectatum (E.L. Braun 1940) Morton (1956)',[{:genus=>{:string=>"Asplenium"}, :species=>{:string=>"inexpectatum", :authorship=>"(E.L. Braun 1940) Morton (1956)", :combinationAuthorTeam=>{:authorTeam=>"Morton", :author=>["Morton"], :year=>"1956"}, :basionymAuthorTeam=>{:authorTeam=>"E.L. Braun", :author=>["E.L. Braun"], :year=>"1940"}}}]],
|
|
514
515
|
['Mentha ×smithiana R. A. Graham 1949',[{:genus=>{:string=>"Mentha"}, :species=>{:string=>"smithiana", :authorship=>"R. A. Graham 1949", :basionymAuthorTeam=>{:authorTeam=>"R. A. Graham", :author=>["R. A. Graham"], :year=>"1949"}}}]],
|
|
515
516
|
['Salix ×capreola Andersson (1867)',[{:genus=>{:string=>"Salix"}, :species=>{:string=>"capreola", :authorship=>"Andersson (1867)", :basionymAuthorTeam=>{:authorTeam=>"Andersson", :author=>["Andersson"], :year=>"1867"}}}]],
|
|
@@ -523,7 +524,7 @@ describe ScientificNameClean do
|
|
|
523
524
|
canonical(sn).should == "Rosa alpina × pomifera"
|
|
524
525
|
parse(sn).details.should == [{:genus=>{:string=>"Rosa"}, :species=>{:string=>"alpina"}}, {:species=>{:string=>"pomifera"}, :genus=>{:string=>"Rosa"}}]
|
|
525
526
|
end
|
|
526
|
-
|
|
527
|
+
|
|
527
528
|
it "should parse hybrid combination" do
|
|
528
529
|
sn = "Arthopyrenia hyalospora X Hydnellum scrobiculatum"
|
|
529
530
|
parse(sn).should_not be_nil
|
|
@@ -538,7 +539,7 @@ describe ScientificNameClean do
|
|
|
538
539
|
value(sn).should == "Arthopyrenia hyalospora (Banker) D. Hall \303\227 Hydnellum scrobiculatum D.E. Stuntz"
|
|
539
540
|
canonical(sn).should == "Arthopyrenia hyalospora × Hydnellum scrobiculatum"
|
|
540
541
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 31], 33=>["author_word", 35], 36=>["author_word", 40], 43=>["genus", 52], 53=>["species", 66], 67=>["author_word", 71], 72=>["author_word", 78]}
|
|
541
|
-
value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
|
|
542
|
+
value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
|
|
542
543
|
sn = "Arthopyrenia hyalospora x"
|
|
543
544
|
parse(sn).should_not be_nil
|
|
544
545
|
parse(sn).hybrid.should be_true
|
|
@@ -551,7 +552,7 @@ describe ScientificNameClean do
|
|
|
551
552
|
details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora"}}, "?"]
|
|
552
553
|
pos(sn).should == {0=>["genus", 12], 13=>["species", 23]}
|
|
553
554
|
end
|
|
554
|
-
|
|
555
|
+
|
|
555
556
|
it 'should parse names with taxon concept' do
|
|
556
557
|
sn = "Stenometope laevissimus sec. Eschmeyer 2004"
|
|
557
558
|
details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus"}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
|
|
@@ -561,12 +562,12 @@ describe ScientificNameClean do
|
|
|
561
562
|
details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus", :authorship=>"Bibron 1855", :basionymAuthorTeam=>{:authorTeam=>"Bibron", :author=>["Bibron"], :year=>"1855"}}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
|
|
562
563
|
pos(sn).should == {0=>["genus", 11], 12=>["species", 23], 24=>["author_word", 30], 31=>["year", 35], 41=>["author_word", 50], 51=>["year", 55]}
|
|
563
564
|
end
|
|
564
|
-
|
|
565
|
+
|
|
565
566
|
it 'should parse names with spaces inconsistencies at the start and the end and in the middle' do
|
|
566
567
|
parse(" Asplenium X inexpectatum (E.L. Braun 1940) Morton (1956) ").should_not be_nil
|
|
567
568
|
end
|
|
568
|
-
|
|
569
|
-
it 'should parse names with any number of spaces' do
|
|
569
|
+
|
|
570
|
+
it 'should parse names with any number of spaces' do
|
|
570
571
|
sn = "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
|
|
571
572
|
parse(sn).should_not be_nil
|
|
572
573
|
value(sn).should == "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
|
|
@@ -574,7 +575,7 @@ describe ScientificNameClean do
|
|
|
574
575
|
details(sn).should == [{:genus=>{:string=>"Trematosphaeria"}, :species=>{:string=>"phaeospora", :authorship=>"(E. Müll.) L. Holm 1957", :combinationAuthorTeam=>{:authorTeam=>"L. Holm", :author=>["L. Holm"], :year=>"1957"}, :basionymAuthorTeam=>{:authorTeam=>"E. Müll.", :author=>["E. Müll."]}}}]
|
|
575
576
|
pos(sn).should == {0=>["genus", 15], 16=>["species", 26], 28=>["author_word", 30], 31=>["author_word", 36], 46=>["author_word", 48], 61=>["author_word", 65], 66=>["year", 70]}
|
|
576
577
|
end
|
|
577
|
-
|
|
578
|
+
|
|
578
579
|
it 'should not parse serveral authors groups with several years NOT CORRECT' do
|
|
579
580
|
parse("Pseudocercospora dendrobii (H.C. Burnett 1883) (Leight.) (Movss. 1967) U. Braun & Crous 2003").should be_nil
|
|
580
581
|
end
|
|
@@ -609,7 +610,7 @@ describe ScientificNameClean do
|
|
|
609
610
|
sn = 'Deschampsia cespitosa ssp pumila'
|
|
610
611
|
details(sn).should == [{:genus=>{:string=>"Deschampsia"}, :species=>{:string=>"cespitosa"}, :infraspecies=>[{:string=>"pumila", :rank=>"ssp"}]}]
|
|
611
612
|
end
|
|
612
|
-
|
|
613
|
+
|
|
613
614
|
# Combination genus names should be merged without dash or capital letter
|
|
614
615
|
it 'should parse hybrid names with capitalized second name in genus (botanical code error)' do
|
|
615
616
|
sn = 'Anacampti-Platanthera P. Fourn.'
|
|
@@ -627,13 +628,13 @@ describe ScientificNameClean do
|
|
|
627
628
|
canonical(sn).should == 'Oemona simplex'
|
|
628
629
|
end
|
|
629
630
|
#"Arthrosamanea eriorhachis (Harms & sine ref. ) Aubrév." -- ignore & sine ref. (means without reference)
|
|
630
|
-
|
|
631
|
+
|
|
631
632
|
=begin
|
|
632
633
|
new stuff
|
|
633
634
|
|
|
634
635
|
sn = "Orchidaceae × Asconopsis hort."
|
|
635
636
|
canonical(sn).should == "Orchidaceae x Asconopsis"
|
|
636
|
-
sn
|
|
637
|
+
sn
|
|
637
638
|
Tamiops swinhoei near hainanus|Tamiops swinhoei near hainanus
|
|
638
639
|
Conus textile form archiepiscopus|Conus textile form archiepiscopus|
|
|
639
640
|
Crypticus pseudosericeus ssp. olivieri Desbrochers des Loges,1881|Crypticus pseudosericeus olivieri des
|
data/spec/parser/test_data.txt
CHANGED
|
@@ -414,6 +414,7 @@ Buteo borealis ? ventralis|{"scientificName":{"parsed":true, "parser_version":"t
|
|
|
414
414
|
Cetraria islandica ? islandica|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Cetraria islandica ? islandica", "normalized":"Cetraria islandica", "canonical":"Cetraria islandica", "hybrid":false, "details":[{"genus":{"string":"Cetraria"}, "species":{"string":"islandica"}, "infraspecies":[{"annotation_identification":"?", "ignored":{"infraspecies":{"string":"islandica", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 8], "9":["species", 18], "19":["annotation_identification", 21]}}}
|
|
415
415
|
Euxoa nr. idahoensis sp. 1clay|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Euxoa nr. idahoensis sp. 1clay", "normalized":"Euxoa idahoensis", "canonical":"Euxoa idahoensis", "hybrid":false, "details":[{"genus":{"string":"Euxoa"}, "species":{"string":"idahoensis"}, "infraspecies":[{"annotation_identification":"sp.", "ignored":{"infraspecies":{"string":"uniclay", "rank":"n/a"}}}]}], "parser_run":1, "positions":{"0":["genus", 5], "10":["species", 20], "21":["annotation_identification", 24]}, "surrogate": true}}
|
|
416
416
|
Parus caeruleus species complex|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Parus caeruleus species complex", "normalized":"Parus caeruleus", "canonical":"Parus caeruleus", "hybrid":false, "details":[{"genus":{"string":"Parus"}, "species":{"string":"caeruleus"}}], "parser_run":1, "positions":{"0":["genus", 5], "6":["species", 15]}}}
|
|
417
|
+
Peristernia nassatula forskali Tapparone-Canefri 1875|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Peristernia nassatula forskali Tapparone-Canefri 1875", "normalized":"Peristernia nassatula forskali Tapparone-Canefri 1875", "canonical":"Peristernia nassatula forskali", "hybrid":false, "details":[{"genus":{"string":"Peristernia"}, "species":{"string":"nassatula"}, "infraspecies":[{"string":"forskali", "rank":"n/a", "authorship":"Tapparone-Canefri 1875", "basionymAuthorTeam":{"authorTeam":"Tapparone-Canefri", "author":["Tapparone-Canefri"], "year":"1875"}}]}], "parser_run":1, "positions":{"0":["genus", 11], "12":["species", 21], "22":["infraspecies", 30], "31":["author_word", 48], "49":["year", 53]}}}
|
|
417
418
|
|
|
418
419
|
#should flag surrogate names
|
|
419
420
|
Coleoptera sp. BOLD:AAV0432|{"scientificName":{"parsed":true, "parser_version":"test_version", "verbatim":"Coleoptera sp. BOLD:AAV0432", "normalized":"Coleoptera", "canonical":"Coleoptera", "hybrid":false, "details":[{"genus":{"string":"Coleoptera"}, "annotation_identification":"sp.", "ignored":{"unparsed":"BOLD:AAV0432"}}], "parser_run":1, "positions":{"0":["genus", 10], "11":["annotation_identification", 14]}, "surrogate":true}}
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: biodiversity
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.1.
|
|
4
|
+
version: 3.1.2
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2013-06-
|
|
12
|
+
date: 2013-06-21 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: rake
|
|
@@ -209,6 +209,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
209
209
|
- - ! '>='
|
|
210
210
|
- !ruby/object:Gem::Version
|
|
211
211
|
version: '0'
|
|
212
|
+
segments:
|
|
213
|
+
- 0
|
|
214
|
+
hash: 3336294247914629914
|
|
212
215
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
216
|
none: false
|
|
214
217
|
requirements:
|