dimus-biodiversity 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,244 @@
1
+ dir = File.dirname("__FILE__")
2
+ require 'rubygems'
3
+ require 'spec'
4
+ require 'treetop'
5
+ require 'biodiversity'
6
+ Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name'))
7
+
8
+ describe ScientificName do
9
+ before(:all) do
10
+ @parser = ScientificNameParser.new
11
+ end
12
+
13
+ def parse(input)
14
+ @parser.parse(input)
15
+ end
16
+
17
+ def value(input)
18
+ parse(input).value
19
+ end
20
+
21
+ def canonical(input)
22
+ parse(input).canonical
23
+ end
24
+
25
+ def details(input)
26
+ parse(input).details
27
+ end
28
+
29
+ it 'should parse uninomial' do
30
+ sn = 'Pseudocercospora'
31
+ parse(sn).should_not be_nil
32
+ value(sn).should == 'Pseudocercospora'
33
+ canonical(sn).should == 'Pseudocercospora'
34
+ details(sn).should == {:uninomial=>"Pseudocercospora", :name_type=>"Uninomial"}
35
+ end
36
+
37
+ it 'should parse canonical' do
38
+ sn = 'Pseudocercospora dendrobii'
39
+ parse(sn).should_not be_nil
40
+ value(sn).should == 'Pseudocercospora dendrobii'
41
+ canonical(sn).should == 'Pseudocercospora dendrobii'
42
+ details(sn).should == {:species=>"dendrobii", :genus=>"Pseudocercospora"}
43
+ end
44
+
45
+ it 'should parse subgenus ZOOLOGICAL' do
46
+ sn = "Doriteuthis (Amerigo) pealeii Author 1999"
47
+ parse(sn).should_not be_nil
48
+ value(sn).should == "Doriteuthis (Amerigo) pealeii Author 1999"
49
+ canonical(sn).should == "Doriteuthis pealeii"
50
+ details(sn).should == {:subgenus=>"Amerigo", :authors=>{:year=>"1999", :names=>["Author"]}, :species=>"pealeii", :genus=>"Doriteuthis"}
51
+ end
52
+
53
+ it 'should parse species autonym for complex subspecies authorships' do
54
+ #parse("Aus bus Linn. var. bus").should_not be_nil
55
+ # aus genus, bus species, Linn. author, var. rank, bus infraspecific epithet
56
+ end
57
+
58
+ it 'should parse several authors' do
59
+ sn = "Pseudocercospora dendrobii U. Braun & Crous"
60
+ parse(sn).should_not be_nil
61
+ value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous"
62
+ canonical(sn).should == "Pseudocercospora dendrobii"
63
+ details(sn).should == {
64
+ :authors=>{:names=>["U. Braun","Crous"]},
65
+ :species=>"dendrobii",
66
+ :genus=>"Pseudocercospora"}
67
+ end
68
+
69
+ it 'should parse several authors with a year' do
70
+ sn = "Pseudocercospora dendrobii U. Braun & Crous 2003"
71
+ parse(sn).should_not be_nil
72
+ value(sn).should == "Pseudocercospora dendrobii U. Braun & Crous 2003"
73
+ canonical(sn).should == "Pseudocercospora dendrobii"
74
+ details(sn).should == {
75
+ :authors=>{:names=>["U. Braun","Crous"], :year => "2003"},
76
+ :species=>"dendrobii",
77
+ :genus=>"Pseudocercospora"}
78
+ sn = "Pseudocercospora dendrobii Crous, 2003"
79
+ parse(sn).should_not be_nil
80
+ end
81
+
82
+ it 'should parse scientific name' do
83
+ parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
84
+ value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003"
85
+ canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
86
+ {:orig_authors=>{:names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
87
+
88
+ parse("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should_not be_nil
89
+ value("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == "Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934"
90
+ details("Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934").should == {:authors=>{:year=>"1934", :names=>["M.T. Lucas", "Sousa da C\303\242mara"]}, :species=>"polyspora", :genus=>"Stagonospora"}
91
+
92
+ parse("Cladoniicola staurospora Diederich, van den Boom & Aptroot 2001").should_not be_nil
93
+ parse("Yarrowia lipolytica var. lipolytica (Wick., Kurtzman & E.A. Herrm.) Van der Walt & Arx 1981").should_not be_nil
94
+ parse("Physalospora rubiginosa (Fr.) anon.").should_not be_nil
95
+ parse("Pleurotus ëous (Berk.) Sacc. 1887").should_not be_nil
96
+ parse("Lecanora wetmorei Śliwa 2004").should_not be_nil
97
+ # valid
98
+ # infraspecific
99
+ parse("Calicium furfuraceum * furfuraceum (L.) Pers. 1797").should_not be_nil
100
+ parse("Exobasidium vaccinii ** andromedae (P. Karst.) P. Karst. 1882").should_not be_nil
101
+ parse("Urceolaria scruposa **** clausa Flot. 1849").should_not be_nil
102
+ parse("Cortinarius angulatus B gracilescens Fr. 1838").should_not be_nil
103
+ parse("Cyathicula scelobelonium").should_not be_nil
104
+ # single quote that did not show
105
+ # parse("Phytophthora hedraiandra De Cock & Man in ?t Veld 2004"
106
+ # Phthora vastatrix d?Hérelle 1909
107
+ # author is exception
108
+ parse("Tuber liui A S. Xu 1999").should_not be_nil
109
+ parse("Agaricus squamula Berk. & M.A. Curtis 1860").should_not be_nil
110
+ parse("Peltula coriacea Büdel, Henssen & Wessels 1986").should_not be_nil
111
+ end
112
+
113
+
114
+ it 'should parse several authors with several years' do
115
+ parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003").should_not be_nil
116
+ value("Pseudocercospora dendrobii(H.C. Burnett1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003"
117
+ canonical("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
118
+ details("Pseudocercospora dendrobii(H.C. Burnett 1883)U. Braun & Crous 2003").should == {:orig_authors=>{:year=>"1883", :names=>["H.C. Burnett"]}, :species=>"dendrobii", :authors=>{:year=>"2003", :names=>["U. Braun", "Crous"]}, :genus=>"Pseudocercospora"}
119
+ end
120
+
121
+ it 'should not parse serveral authors groups with several years NOT CORRECT' do
122
+ parse("Pseudocercospora dendrobii (H.C. Burnett 1883) (Leight.) (Movss. 1967) U. Braun & Crous 2003").should be_nil
123
+ end
124
+
125
+
126
+ it 'should parse utf-8 name' do
127
+ parse("Trematosphaeria phaeospora (E. Müll.) L. Holm 1957").should_not be_nil
128
+ value("Trematosphaeria phaeospora ( E. Müll. )L. Holm 1957").should == "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
129
+ canonical("Trematosphaeria phaeospora(E. Müll.) L. Holm 1957").should == "Trematosphaeria phaeospora"
130
+ details("Trematosphaeria phaeospora(E. Müll.) L. Holm 1957 ").should == {:orig_authors=>{:names=>["E. M\303\274ll."]}, :species=>"phaeospora", :authors=>{:year=>"1957", :names=>["L. Holm"]}, :genus=>"Trematosphaeria"}
131
+ end
132
+
133
+ it "should parse name with f." do
134
+
135
+ parse("Sphaerotheca fuliginea f. dahliae Movss. 1967").should_not be_nil
136
+ value(" Sphaerotheca fuliginea f. dahliae Movss. 1967 ").should == "Sphaerotheca fuliginea f. dahliae Movss. 1967"
137
+ canonical("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == "Sphaerotheca fuliginea dahliae"
138
+ details("Sphaerotheca fuliginea f. dahliae Movss. 1967").should == {:subspecies=>[{:type=>"f.", :value=>"dahliae"}], :authors=>{:year=>"1967", :names=>["Movss."]}, :species=>"fuliginea", :genus=>"Sphaerotheca"}
139
+ end
140
+
141
+ it "should parse name with var." do
142
+ parse("Phaeographis inusta var. macularis (Leight.) A.L. Sm. 1861").should_not be_nil
143
+ value("Phaeographis inusta var. macularis(Leight.) A.L. Sm. 1861").should == "Phaeographis inusta var. macularis (Leight.) A.L. Sm. 1861"
144
+ canonical("Phaeographis inusta var. macularis(Leight.) A.L. Sm. 1861").should == "Phaeographis inusta macularis"
145
+ end
146
+
147
+ it "should parse name with several subspecies names NOT BOTANICAL CODE BUT NOT INFREQUENT" do
148
+ parse("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should_not be_nil
149
+ value("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == "Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972"
150
+ details("Hydnellum scrobiculatum var. zonatum f. parvum (Banker) D. Hall & D.E. Stuntz 1972").should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>[{:type=>"var.", :value=>"zonatum"}, {:type=>"f.", :value=>"parvum"}], :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum", :is_valid=>false}
151
+ end
152
+
153
+ it "should parse status BOTANICAL RARE" do
154
+ #it is always latin abbrev often 2 words
155
+ parse("Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov.").should_not be_nil
156
+ value("Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov.").should == "Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov."
157
+ canonical("Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov.").should == "Arthopyrenia hyalospora"
158
+ details("Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov.").should == {:status=>"comb. nov.", :orig_authors=>{:names=>["Nyl."]}, :species=>"hyalospora", :authors=>{:names=>["R.C. Harris"]}, :genus=>"Arthopyrenia"}
159
+ end
160
+
161
+ it "should parse name without a year but with authors" do
162
+ parse("Arthopyrenia hyalospora (Nyl.) R.C. Harris").should_not be_nil
163
+ value("Arthopyrenia hyalospora(Nyl.)R.C. Harris").should == "Arthopyrenia hyalospora (Nyl.) R.C. Harris"
164
+ canonical("Arthopyrenia hyalospora (Nyl.) R.C. Harris").should == "Arthopyrenia hyalospora"
165
+ end
166
+
167
+ it "should parse revised (ex) names" do
168
+ #invalidly published
169
+ parse("Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should_not be_nil
170
+ value("Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should == "Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris"
171
+ canonical("Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should == "Arthopyrenia hyalospora"
172
+ details("Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should == {:species=>"hyalospora", :authors=>{:names=>["R.C. Harris"]}, :genus=>"Arthopyrenia", :original_revised_name_authors=>{:authors=>{:names=>["Banker"]}, :revised_authors=>{:names=>["Nyl."]}}}
173
+ parse("Arthopyrenia hyalospora Nyl. ex Banker").should_not be_nil
174
+
175
+ parse("Glomopsis lonicerae Peck ex C.J. Gould 1945").should_not be_nil
176
+ details("Glomopsis lonicerae Peck ex C.J. Gould 1945").should == {:revised_name_authors=>{:authors=>{:year=>"1945", :names=>["C.J. Gould"]}, :revised_authors=>{:names=>["Peck"]}}, :species=>"lonicerae", :genus=>"Glomopsis"}
177
+
178
+ parse("Acanthobasidium delicatum (Wakef.) Oberw. ex Jülich 1979").should_not be_nil
179
+ parse("Mycosphaerella eryngii (Fr. ex Duby) Johanson ex Oudem. 1897").should_not be_nil
180
+ details("Mycosphaerella eryngii (Fr. ex Duby) Johanson ex Oudem. 1897").should == {:original_revised_name_authors=>{:authors=>{:names=>["Duby"]}, :revised_authors=>{:names=>["Fr."]}}, :species=>"eryngii", :genus=>"Mycosphaerella", :revised_name_authors=>{:authors=>{:year=>"1897", :names=>["Oudem."]}, :revised_authors=>{:names=>["Johanson"]}}}
181
+ #invalid but happens
182
+ parse("Mycosphaerella eryngii (Fr. Duby) ex Oudem. 1897").should_not be_nil
183
+ parse("Mycosphaerella eryngii (Fr.ex Duby) ex Oudem. 1897").should_not be_nil
184
+ end
185
+
186
+ it "should parse multiplication sign" do
187
+ parse("Arthopyrenia x hyalospora (Nyl.) R.C. Harris").should_not be_nil
188
+ details("Arthopyrenia x hyalospora (Nyl. ex Banker) R.C. Harris").should == {:original_revised_name_authors=>{:authors=>{:names=>["Banker"]}, :revised_authors=>{:names=>["Nyl."]}}, :species=>"hyalospora", :authors=>{:names=>["R.C. Harris"]}, :genus=>"Arthopyrenia", :cross=>"inside"}
189
+ parse("Arthopyrenia X hyalospora(Nyl. ex Banker) R.C. Harris").should_not be_nil
190
+ parse("x Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should_not be_nil
191
+ details("x Arthopyrenia hyalospora (Nyl. ex Banker) R.C. Harris").should == {:original_revised_name_authors=>{:authors=>{:names=>["Banker"]}, :revised_authors=>{:names=>["Nyl."]}}, :species=>"hyalospora", :authors=>{:names=>["R.C. Harris"]}, :genus=>"Arthopyrenia", :cross=>"before"}
192
+ parse("X Arthopyrenia (Nyl. ex Banker) R.C. Harris").should_not be_nil
193
+ details("X Arthopyrenia (Nyl. ex Banker) R.C. Harris").should == {:uninomial=>"Arthopyrenia", :original_revised_name_authors=>{:authors=>{:names=>["Banker"]}, :revised_authors=>{:names=>["Nyl."]}}, :authors=>{:names=>["R.C. Harris"]}, :cross=>"before"}
194
+ #ascii for multiplication
195
+ parse("Melampsora × columbiana G. Newc. 2000").should_not be_nil
196
+ end
197
+
198
+ it "should parse hybrid combination" do
199
+ parse("Arthopyrenia hyalospora X Hydnellum scrobiculatum").should_not be_nil
200
+ value("Arthopyrenia hyalospora X Hydnellum scrobiculatum").should == "Arthopyrenia hyalospora \303\227 Hydnellum scrobiculatum"
201
+ canonical("Arthopyrenia hyalospora X Hydnellum scrobiculatum").should == "Arthopyrenia hyalospora \303\227 Hydnellum scrobiculatum"
202
+ details("Arthopyrenia hyalospora x Hydnellum scrobiculatum").should == {:hybrid=>{:scientific_name1=>{:species=>"hyalospora", :genus=>"Arthopyrenia"}, :scientific_name2=>{:species=>"scrobiculatum", :genus=>"Hydnellum"}}}
203
+
204
+ parse("Arthopyrenia hyalospora (Banker) D. Hall x Hydnellum scrobiculatum D.E. Stuntz").should_not be_nil
205
+ value("Arthopyrenia hyalospora (Banker) D. Hall X Hydnellum scrobiculatum D.E. Stuntz").should == "Arthopyrenia hyalospora (Banker) D. Hall \303\227 Hydnellum scrobiculatum D.E. Stuntz"
206
+ canonical("Arthopyrenia hyalospora (Banker) D. Hall X Hydnellum scrobiculatum D.E. Stuntz").should == "Arthopyrenia hyalospora \303\227 Hydnellum scrobiculatum"
207
+
208
+ parse("Arthopyrenia hyalospora x").should_not be_nil
209
+ value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
210
+ canonical("Arthopyrenia hyalospora x").should == "Arthopyrenia hyalospora"
211
+ details("Arthopyrenia hyalospora x").should == {:hybrid=>{:scientific_name1=>{:species=>"hyalospora", :genus=>"Arthopyrenia"}, :scientific_name2=>"?"}}
212
+ parse("Arthopyrenia hyalospora × ?").should_not be_nil
213
+ details("Arthopyrenia hyalospora × ?").should == {:hybrid=>{:scientific_name1=>{:species=>"hyalospora", :genus=>"Arthopyrenia"}, :scientific_name2=>"?"}}
214
+ end
215
+
216
+
217
+
218
+ it "should parse name with subspecies without rank selector NOT BOTANICAL" do
219
+ name = "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
220
+ parse(name).should_not be_nil
221
+ value(name).should == "Hydnellum scrobiculatum zonatum (Banker) D. Hall & D.E. Stuntz 1972"
222
+ canonical(name).should == "Hydnellum scrobiculatum zonatum"
223
+ details(name).should == {:orig_authors=>{:names=>["Banker"]}, :subspecies=>{:type=>"n/a", :value=>"zonatum"}, :species=>"scrobiculatum", :authors=>{:year=>"1972", :names=>["D. Hall", "D.E. Stuntz"]}, :genus=>"Hydnellum"}
224
+ end
225
+
226
+ it "should not parse utf-8 chars in name part" do
227
+ parse("Érematosphaeria phaespora").should be_nil
228
+ parse("Trematosphaeria phaeáapora").should be_nil
229
+ end
230
+
231
+ it "should parse some invalid names" do
232
+ parse("Acarospora cratericola 1929").should_not be_nil
233
+ parse("Agaricus acris var. (b.)").should_not be_nil
234
+ value("Agaricus acris var. (b.)").should == "Agaricus acris var. (b.)"
235
+ parse("Agaricus acris var. (b.)").should_not be_nil
236
+ value("Agaricus acris var. (b.&c.)").should == "Agaricus acris var. (b.c.)"
237
+ details("Agaricus acris var. (b.&c.)").should == {:editorial_markup=>"(b.c.)", :subspecies=>[{:type=>"var.", :value=>nil}], :species=>"acris", :genus=>"Agaricus", :is_valid=>false}
238
+
239
+ end
240
+
241
+ it "should not have this problems, but it has them" do
242
+ parse("Saccharomyces drosophilae anon.").should be_nil
243
+ end
244
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dimus-biodiversity
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Dmitry Mozzherin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-21 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: treetop
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.2.4
23
+ version:
24
+ description: Biodiversity library provides a parser tool for scientific species names
25
+ email: dmozzherin {et} eol {dt} org
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files:
31
+ - README.rdoc
32
+ - LICENSE
33
+ files:
34
+ - LICENSE
35
+ - README.rdoc
36
+ - Rakefile
37
+ - spec/parser
38
+ - spec/parser/scientific_name.spec.rb
39
+ - lib/biodiversity
40
+ - lib/biodiversity/parser
41
+ - lib/biodiversity/parser/scientific_name.rb
42
+ - lib/biodiversity/parser/scientific_name.treetop
43
+ - lib/biodiversity/parser.rb
44
+ - lib/biodiversity.rb
45
+ - bin/nnparse
46
+ has_rdoc: true
47
+ homepage: http://github.com/dimus/biodiversity/wikis
48
+ post_install_message:
49
+ rdoc_options:
50
+ - --main
51
+ - README.rdoc
52
+ - --inline-source
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.2.0
72
+ signing_key:
73
+ specification_version: 2
74
+ summary: scientific species name parser
75
+ test_files: []
76
+