dimus-biodiversity 0.0.18 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/biodiversity/parser.rb +18 -35
- data/lib/biodiversity/parser/scientific_name_canonical.rb +248 -83
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +46 -20
- data/lib/biodiversity/parser/scientific_name_clean.rb +3304 -3409
- data/lib/biodiversity/parser/scientific_name_clean.treetop +539 -500
- data/lib/biodiversity/parser/scientific_name_dirty.rb +362 -213
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +123 -98
- data/spec/parser/scientific_name.spec.rb +7 -28
- data/spec/parser/scientific_name_canonical.spec.rb +7 -6
- data/spec/parser/scientific_name_clean.spec.rb +256 -260
- data/spec/parser/scientific_name_dirty.spec.rb +62 -52
- metadata +2 -2
@@ -1,8 +1,9 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
dir = File.dirname("__FILE__")
|
2
3
|
require 'rubygems'
|
3
4
|
require 'spec'
|
4
|
-
require 'treetop'
|
5
5
|
require 'yaml'
|
6
|
+
require 'treetop'
|
6
7
|
|
7
8
|
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
|
8
9
|
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
|
@@ -32,61 +33,48 @@ describe ScientificNameDirty do
|
|
32
33
|
parse(input).pos
|
33
34
|
end
|
34
35
|
|
36
|
+
def debug(input)
|
37
|
+
res = parse(input)
|
38
|
+
puts "<pre>"
|
39
|
+
if res
|
40
|
+
puts 'success!'
|
41
|
+
puts res.inspect
|
42
|
+
else
|
43
|
+
puts input
|
44
|
+
val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s.gsub(/column /,'').to_i
|
45
|
+
print ("-" * (val - 1))
|
46
|
+
print "^ Computer says 'no'!\n"
|
47
|
+
puts @parser.failure_reason
|
48
|
+
puts @parser.to_yaml
|
49
|
+
end
|
50
|
+
puts "</pre>"
|
51
|
+
end
|
52
|
+
|
35
53
|
it 'should parse clean names' do
|
36
|
-
parse("Betula
|
54
|
+
parse("Betula verucosa (L.) Bar. 1899").should_not be_nil
|
37
55
|
end
|
38
56
|
|
39
|
-
it 'should parse
|
40
|
-
|
41
|
-
value("Anthoscopus Cabanis [185?]").should == "Anthoscopus Cabanis (185?)"
|
42
|
-
parse("Anthoscopus Cabanis [1851?]").should_not be_nil
|
43
|
-
value("Anthoscopus Cabanis [1851]").should == "Anthoscopus Cabanis (1851)"
|
44
|
-
sn = "Anthoscopus Cabanis [1851?]"
|
45
|
-
value(sn).should == "Anthoscopus Cabanis (1851?)"
|
46
|
-
details(sn).should == {:uninomial=>"Anthoscopus", :authors=>{:names=>["Cabanis"], :approximate_year=>"(1851?)"}, :name_part_verbatim=>"Anthoscopus", :auth_part_verbatim=>"Cabanis [1851?]"}
|
47
|
-
pos(sn).should == {0=>["uninomial", 11], 12=>["author_word", 19], 21=>["year", 26]}
|
48
|
-
sn = "Trismegistia monodii Ando, 1973 [1974]"
|
49
|
-
parse(sn).should_not be_nil
|
50
|
-
value(sn).should == 'Trismegistia monodii Ando 1973 [1974]' #should it be 'Trismegistia monodii Ando 1973 (1974)' instead?
|
51
|
-
details(sn).should == {:genus=>"Trismegistia", :species=>"monodii", :authors=>{:names=>["Ando"], :ambiguous_year=>"1973 [1974]"}, :name_part_verbatim=>"Trismegistia monodii", :auth_part_verbatim=>"Ando, 1973 [1974]"}
|
52
|
-
pos(sn).should == {0=>["genus", 12], 13=>["species", 20], 21=>["author_word", 25], 27=>["year", 31], 33=>["year", 37]}
|
53
|
-
parse("Zygaena witti Wiegel [1973]").should_not be_nil
|
54
|
-
sn = "Deyeuxia coarctata Kunth, 1815 [1816]"
|
57
|
+
it 'should parse double parenthesis' do
|
58
|
+
sn = "Eichornia crassipes ( (Martius) ) Solms-Laub."
|
55
59
|
parse(sn).should_not be_nil
|
56
|
-
|
60
|
+
value(sn).should == "Eichornia crassipes (Martius) Solms-Laub."
|
61
|
+
details(sn).should == {:genus=>{:epitheton=>"Eichornia"}, :species=>{:epitheton=>"crassipes", :authorship=>"( (Martius) ) Solms-Laub.", :combinationAuthorTeam=>{:authorTeam=>"Solms-Laub.", :author=>["Solms-Laub."]}, :basionymAuthorTeam=>{:authorTeam=>"Martius", :author=>["Martius"]}}}
|
62
|
+
pos(sn).should == {0=>["genus", 9], 10=>["species", 19], 23=>["author_word", 30], 34=>["author_word", 45]}
|
57
63
|
end
|
58
|
-
|
59
|
-
it
|
60
|
-
sn = "
|
64
|
+
|
65
|
+
it "should parse year without author" do
|
66
|
+
sn = "Acarospora cratericola 1929"
|
61
67
|
parse(sn).should_not be_nil
|
62
|
-
|
63
|
-
details(sn).should == {:genus=>
|
64
|
-
pos(sn).should == {0=>["genus", 13], 14=>["species", 25], 26=>["author_word", 33], 34=>["year", 38]}
|
68
|
+
pos(sn).should == {0=>["genus", 10], 11=>["species", 22], 23=>["year", 27]}
|
69
|
+
details(sn).should == {:genus=>{:epitheton=>"Acarospora"}, :species=>{:epitheton=>"cratericola", :year=>"1929"}}
|
65
70
|
end
|
66
71
|
|
67
|
-
it 'should parse double
|
68
|
-
sn = "
|
72
|
+
it 'should parse double years' do
|
73
|
+
sn = "Tridentella tangeroae Bruce, 1987-92"
|
69
74
|
parse(sn).should_not be_nil
|
70
|
-
|
71
|
-
details(sn).should == {:genus=>"
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
# Acomys "Geoffroy, I." 1838
|
76
|
-
# Verpericola megasoma "Dall" Pils.
|
77
|
-
# Auricotes neoclayae "Price, Hellenthal and Palma 2003"
|
78
|
-
# Leccinum cinnamomeum var. cinnamomeum "A.H. Sm.
|
79
|
-
|
80
|
-
# it 'should parse quote' do
|
81
|
-
# val = 'Acomys "Geoffroy, I." 1838'
|
82
|
-
# end
|
83
|
-
|
84
|
-
# it 'should parse author with []' do
|
85
|
-
# # OK parse("Farsetia mutabilis [ R.Br. ]").should_not be_nil
|
86
|
-
# parse("Farsetia mutabilis [R.Br.]").should_not be_nil
|
87
|
-
# # value("Farsetia mutabilis [R.Br.]").should == "Farsetia mutabilis [R.Br.]"
|
88
|
-
# # details("Farsetia mutabilis [R.Br.]").should == {}
|
89
|
-
# end
|
75
|
+
pos(sn).should == {0=>["genus", 11], 12=>["species", 21], 22=>["author_word", 27], 29=>["year", 36]}
|
76
|
+
details(sn).should == {:genus=>{:epitheton=>"Tridentella"}, :species=>{:epitheton=>"tangeroae", :authorship=>"Bruce, 1987-92", :basionymAuthorTeam=>{:authorTeam=>"Bruce", :author=>["Bruce"], :year=>"1987-92"}}}
|
77
|
+
end
|
90
78
|
|
91
79
|
it 'should parse dirty years' do
|
92
80
|
parse("Tridentella tangeroae Bruce, 1988B").should_not be_nil
|
@@ -96,11 +84,33 @@ describe ScientificNameDirty do
|
|
96
84
|
parse(sn).should_not be_nil
|
97
85
|
pos(sn).should == {0=>["genus", 11], 12=>["species", 21], 22=>["author_word", 27], 29=>["year", 33]}
|
98
86
|
end
|
99
|
-
|
100
|
-
it 'should parse
|
101
|
-
sn = "
|
87
|
+
|
88
|
+
it 'should parse year with page number' do
|
89
|
+
sn = "Gymnodactylus irregularis WERMUTH 1965: 54"
|
102
90
|
parse(sn).should_not be_nil
|
103
|
-
|
91
|
+
value(sn).should == "Gymnodactylus irregularis WERMUTH 1965"
|
92
|
+
details(sn).should == {:genus=>{:epitheton=>"Gymnodactylus"}, :species=>{:epitheton=>"irregularis", :authorship=>"WERMUTH 1965: 54", :basionymAuthorTeam=>{:authorTeam=>"WERMUTH", :author=>["WERMUTH"], :year=>"1965"}}}
|
93
|
+
pos(sn).should == {0=>["genus", 13], 14=>["species", 25], 26=>["author_word", 33], 34=>["year", 38]}
|
104
94
|
end
|
105
|
-
|
95
|
+
|
96
|
+
it 'should parse year with []' do
|
97
|
+
parse("Anthoscopus Cabanis [1851]").should_not be_nil
|
98
|
+
value("Anthoscopus Cabanis [185?]").should == "Anthoscopus Cabanis (185?)"
|
99
|
+
parse("Anthoscopus Cabanis [1851?]").should_not be_nil
|
100
|
+
value("Anthoscopus Cabanis [1851]").should == "Anthoscopus Cabanis (1851)"
|
101
|
+
sn = "Anthoscopus Cabanis [1851?]"
|
102
|
+
value(sn).should == "Anthoscopus Cabanis (1851?)"
|
103
|
+
details(sn).should == {:uninomial=>{:epitheton=>"Anthoscopus", :authorship=>"Cabanis [1851?]", :basionymAuthorTeam=>{:authorTeam=>"Cabanis", :author=>["Cabanis"], :approximate_year=>"(1851?)"}}}
|
104
|
+
pos(sn).should == {0=>["uninomial", 11], 12=>["author_word", 19], 21=>["year", 26]}
|
105
|
+
sn = "Trismegistia monodii Ando, 1973 [1974]"
|
106
|
+
parse(sn).should_not be_nil
|
107
|
+
value(sn).should == 'Trismegistia monodii Ando 1973 (1974)' #should it be 'Trismegistia monodii Ando 1973 (1974)' instead?
|
108
|
+
details(sn).should == {:genus=>{:epitheton=>"Trismegistia"}, :species=>{:epitheton=>"monodii", :authorship=>"Ando, 1973 [1974]", :basionymAuthorTeam=>{:authorTeam=>"Ando", :author=>["Ando"], :year=>"1973", :approximate_year=>"(1974)"}}}
|
109
|
+
pos(sn).should == {0=>["genus", 12], 13=>["species", 20], 21=>["author_word", 25], 27=>["year", 31], 33=>["year", 37]}
|
110
|
+
parse("Zygaena witti Wiegel [1973]").should_not be_nil
|
111
|
+
sn = "Deyeuxia coarctata Kunth, 1815 [1816]"
|
112
|
+
parse(sn).should_not be_nil
|
113
|
+
pos(sn).should == {0=>["genus", 8], 9=>["species", 18], 19=>["author_word", 24], 26=>["year", 30], 32=>["year", 36]}
|
114
|
+
end
|
115
|
+
|
106
116
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-04-11 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|