dimus-biodiversity 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,6 +5,68 @@ grammar ScientificNameDirty
5
5
  rule root
6
6
  super
7
7
  end
8
+
9
+ rule scientific_name_5
10
+ a:scientific_name_4 garbage {
11
+ def value
12
+ a.value
13
+ end
14
+
15
+ def canonical
16
+ a.canonical
17
+ end
18
+
19
+ def pos
20
+ a.pos
21
+ end
22
+
23
+ def details
24
+ a.details
25
+ end
26
+ }
27
+ /
28
+ super
29
+ end
30
+
31
+ rule infraspecies
32
+ a:infraspecies_epitheton space b:year {
33
+ def value
34
+ a.value + " " + b.value
35
+ end
36
+
37
+ def canonical
38
+ a.canonical
39
+ end
40
+
41
+ def pos
42
+ a.pos.merge(b.pos)
43
+ end
44
+
45
+ def details
46
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
47
+ end
48
+ }
49
+ /
50
+ a:infraspecies_epitheton space epitheton_authorship_inconsistencies space b:authorship {
51
+ def value
52
+ a.value + " " + b.value
53
+ end
54
+
55
+ def canonical
56
+ a.canonical
57
+ end
58
+
59
+ def pos
60
+ a.pos.merge(b.pos)
61
+ end
62
+
63
+ def details
64
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
65
+ end
66
+ }
67
+ /
68
+ super
69
+ end
8
70
 
9
71
  rule species
10
72
  a:species_epitheton space b:year {
@@ -33,8 +95,6 @@ grammar ScientificNameDirty
33
95
  /
34
96
  super
35
97
  end
36
-
37
-
38
98
 
39
99
  rule left_paren
40
100
  "(" space "("
@@ -71,6 +131,8 @@ grammar ScientificNameDirty
71
131
  end
72
132
  }
73
133
  /
134
+ year_number_with_punctuation
135
+ /
74
136
  approximate_year
75
137
  /
76
138
  double_year
@@ -111,6 +173,23 @@ grammar ScientificNameDirty
111
173
  }
112
174
  end
113
175
 
176
+ rule year_number_with_punctuation
177
+ a:year_number "." {
178
+ def value
179
+ a.text_value
180
+ end
181
+
182
+ def pos
183
+ {interval.begin => ['year', interval.end]}
184
+ end
185
+
186
+ def details
187
+ {:year => value}
188
+ end
189
+ }
190
+ end
191
+
192
+
114
193
  rule page_number
115
194
  ":" space [\d]+
116
195
  {
@@ -119,5 +198,14 @@ grammar ScientificNameDirty
119
198
  }
120
199
  end
121
200
 
201
+ rule epitheton_authorship_inconsistencies
202
+ ("corrig.")
203
+ end
204
+
205
+ rule garbage
206
+ space (["',.]) space [^щ]*
207
+ /
208
+ space_hard [^ш]+
209
+ end
122
210
 
123
211
  end
@@ -1,39 +1,11 @@
1
- require 'rubygems'
2
- require 'spec'
3
- require 'treetop'
4
- require 'yaml'
5
-
6
1
  #NOTE: this spec needs compiled treetop files.
7
2
  dir = File.dirname("__FILE__")
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
8
4
  require File.expand_path(dir + '../../lib/biodiversity/parser')
9
5
 
10
6
  describe ScientificNameClean do
11
7
  before(:all) do
12
- @parser = ScientificNameParser.new
13
- end
14
-
15
- def parse(input)
16
- @parser.parse(input)
17
- end
18
-
19
- def value(input)
20
- parse(input).value
21
- end
22
-
23
- def canonical(input)
24
- parse(input).canonical
25
- end
26
-
27
- def details(input)
28
- parse(input).details
29
- end
30
-
31
- def pos(input)
32
- parse(input).pos
33
- end
34
-
35
- def json(input)
36
- parse(input).to_json
8
+ set_parser(ScientificNameParser.new)
37
9
  end
38
10
 
39
11
  it 'should generate standardized json' do
@@ -44,5 +16,10 @@ describe ScientificNameClean do
44
16
  JSON.load(json(name)).should == JSON.load(jsn)
45
17
  end
46
18
  end
19
+
20
+ it 'should generate reasonable output if parser failed' do
21
+ sn = 'ddd sljlkj 3223452432'
22
+ json(sn).should == '{"scientificName":{"parsed":false,"verbatim":"ddd sljlkj 3223452432"}}'
23
+ end
47
24
 
48
25
  end
@@ -1,38 +1,12 @@
1
+ # encoding: UTF-8
1
2
  dir = File.dirname("__FILE__")
2
- require 'rubygems'
3
- require 'spec'
4
- require 'treetop'
5
- require 'yaml'
6
-
7
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
8
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
9
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_canonical'))
10
-
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
11
4
 
12
5
  describe ScientificNameCanonical do
13
6
  before(:all) do
14
- @parser = ScientificNameCanonicalParser.new
15
- end
16
-
17
- def parse(input)
18
- @parser.parse(input)
19
- end
20
-
21
- def value(input)
22
- parse(input).value
7
+ set_parser(ScientificNameCanonicalParser.new)
23
8
  end
24
9
 
25
- def canonical(input)
26
- parse(input).canonical
27
- end
28
-
29
- def details(input)
30
- parse(input).details
31
- end
32
-
33
- def pos(input)
34
- parse(input).pos
35
- end
36
10
 
37
11
  it 'should parse names with valid name part and unparseable rest' do
38
12
  [
@@ -48,4 +22,5 @@ describe ScientificNameCanonical do
48
22
  pos(n[0]).should == n[3]
49
23
  end
50
24
  end
25
+
51
26
  end
@@ -1,35 +1,11 @@
1
1
  # encoding: UTF-8
2
2
  dir = File.dirname("__FILE__")
3
- require 'rubygems'
4
- require 'spec'
5
- require 'yaml'
6
- require 'treetop'
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
7
4
 
8
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
9
5
 
10
6
  describe ScientificNameClean do
11
7
  before(:all) do
12
- @parser = ScientificNameCleanParser.new
13
- end
14
-
15
- def parse(input)
16
- @parser.parse(input)
17
- end
18
-
19
- def value(input)
20
- parse(input).value
21
- end
22
-
23
- def canonical(input)
24
- parse(input).canonical
25
- end
26
-
27
- def details(input)
28
- parse(input).details
29
- end
30
-
31
- def pos(input)
32
- parse(input).pos
8
+ set_parser(ScientificNameCleanParser.new)
33
9
  end
34
10
 
35
11
  it 'should parse uninomial' do
@@ -312,7 +288,7 @@ describe ScientificNameClean do
312
288
  sn = "Gastrosericus eremorum von Beaumont 1955"
313
289
  canonical(sn).should == 'Gastrosericus eremorum'
314
290
  sn = "Cypraeovula (Luponia) amphithales perdentata"
315
- canonical(sn).should == 'Cypraeovula amphithales perdentata'
291
+ canonical(sn).should == 'Cypraeovula Luponia amphithales perdentata'
316
292
  details(sn).should == {:genus=>{:epitheton=>"Cypraeovula"}, :subgenus=>{:epitheton=>"Luponia"}, :species=>{:epitheton=>"amphithales"}, :infraspecies=>{:epitheton=>"perdentata", :rank=>"n/a"}}
317
293
  sn = "Polyrhachis orsyllus nat musculus Forel 1901"
318
294
  canonical(sn).should == "Polyrhachis orsyllus musculus"
@@ -490,5 +466,33 @@ describe ScientificNameClean do
490
466
  parse("Trematоsphaeria phaeáapora").should be_nil #cyrillic o
491
467
  end
492
468
 
469
+ it "should parse new stuff" do
470
+ sn = 'Nesticus quelpartensis Paik & Namkung, in Paik, Yaginuma & Namkung, 1969'
471
+ details(sn).should == {:genus=>{:epitheton=>"Nesticus"}, :species=>{:epitheton=>"quelpartensis", :authorship=>"Paik & Namkung, in Paik, Yaginuma & Namkung, 1969", :basionymAuthorTeam=>{:authorTeam=>"Paik & Namkung", :author=>["Paik", "Namkung"], :exAuthorTeam=>{:authorTeam=>"Paik, Yaginuma & Namkung", :author=>["Paik", "Yaginuma", "Namkung"], :year=>"1969"}}}}
472
+ parse('Dipoena yoshidai Ono, in Ono et al., 1991').should_not be_nil
473
+ sn = 'Choriozopella trägårdhi Lawrence, 1947'
474
+ details(sn).should == {:genus=>{:epitheton=>"Choriozopella"}, :species=>{:epitheton=>"trägårdhi", :authorship=>"Lawrence, 1947", :basionymAuthorTeam=>{:authorTeam=>"Lawrence", :author=>["Lawrence"], :year=>"1947"}}}
475
+ sn = 'Latrodectus mactans bishopi Kaston, 1938'
476
+ details(sn).should == {:genus=>{:epitheton=>"Latrodectus"}, :species=>{:epitheton=>"mactans"}, :infraspecies=>{:epitheton=>"bishopi", :rank=>"n/a", :authorship=>"Kaston, 1938", :basionymAuthorTeam=>{:authorTeam=>"Kaston", :author=>["Kaston"], :year=>"1938"}}}
477
+ sn = 'Diplocephalus aff. procerus Thaler, 1972'
478
+ details(sn).should == {:genus=>{:epitheton=>"Diplocephalus"}, :species=>{:epitheton=>"procerus", :authorship=>"Thaler, 1972", :basionymAuthorTeam=>{:authorTeam=>"Thaler", :author=>["Thaler"], :year=>"1972"}}}
479
+ sn = 'Dyarcyops birói Kulczynski, 1908'
480
+ details(sn).should == {:genus=>{:epitheton=>"Dyarcyops"}, :species=>{:epitheton=>"birói", :authorship=>"Kulczynski, 1908", :basionymAuthorTeam=>{:authorTeam=>"Kulczynski", :author=>["Kulczynski"], :year=>"1908"}}}
481
+ sn = 'Sparassus françoisi Simon, 1898'
482
+ details(sn).should == {:genus=>{:epitheton=>"Sparassus"}, :species=>{:epitheton=>"françoisi", :authorship=>"Simon, 1898", :basionymAuthorTeam=>{:authorTeam=>"Simon", :author=>["Simon"], :year=>"1898"}}}
483
+ sn = 'Thiobacillus x Parker and Prisk 1953' #have to figure out black lists for this one
484
+ sn = 'Bacille de Plaut, Kritchevsky and Séguin 1921'
485
+ details(sn).should == {:uninomial=>{:epitheton=>"Bacille", :authorship=>"de Plaut, Kritchevsky and Séguin 1921", :basionymAuthorTeam=>{:authorTeam=>"de Plaut, Kritchevsky and Séguin", :author=>["de Plaut", "Kritchevsky", "Séguin"], :year=>"1921"}}}
486
+ sn = 'Araneus van bruysseli Petrunkevitch, 1911'
487
+ details(sn).should == {:genus=>{:epitheton=>"Araneus"}, :species=>{:epitheton=>"van"}, :infraspecies=>{:epitheton=>"bruysseli", :rank=>"n/a", :authorship=>"Petrunkevitch, 1911", :basionymAuthorTeam=>{:authorTeam=>"Petrunkevitch", :author=>["Petrunkevitch"], :year=>"1911"}}}
488
+ sn = 'Sapromyces laidlawi ab Sabin 1941'
489
+ details(sn).should == {:genus=>{:epitheton=>"Sapromyces"}, :species=>{:epitheton=>"laidlawi", :authorship=>"ab Sabin 1941", :basionymAuthorTeam=>{:authorTeam=>"ab Sabin", :author=>["ab Sabin"], :year=>"1941"}}}
490
+ sn = 'Nocardia rugosa di Marco and Spalla 1957'
491
+ details(sn).should == {:genus=>{:epitheton=>"Nocardia"}, :species=>{:epitheton=>"rugosa", :authorship=>"di Marco and Spalla 1957", :basionymAuthorTeam=>{:authorTeam=>"di Marco and Spalla", :author=>["di Marco", "Spalla"], :year=>"1957"}}}
492
+ sn = 'Flexibacter elegans Lewin 1969 non Soriano 1945'
493
+ details(sn).should == {:genus=>{:epitheton=>"Flexibacter"}, :species=>{:epitheton=>"elegans", :authorship=>"Lewin 1969 non Soriano 1945", :basionymAuthorTeam=>{:authorTeam=>"Lewin", :author=>["Lewin"], :year=>"1969"}}}
494
+ sn = 'Flexibacter elegans Soriano 1945, non Lewin 1969'
495
+ details(sn).should == {:genus=>{:epitheton=>"Flexibacter"}, :species=>{:epitheton=>"elegans", :authorship=>"Soriano 1945, non Lewin 1969", :basionymAuthorTeam=>{:authorTeam=>"Soriano", :author=>["Soriano"], :year=>"1945"}}}
496
+ end
493
497
 
494
498
  end
@@ -1,55 +1,13 @@
1
1
  # encoding: UTF-8
2
2
  dir = File.dirname("__FILE__")
3
- require 'rubygems'
4
- require 'spec'
5
- require 'yaml'
6
- require 'treetop'
7
-
8
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_clean'))
9
- Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name_dirty'))
3
+ require File.expand_path(dir + '../../spec/parser/spec_helper')
10
4
 
11
5
  describe ScientificNameDirty do
12
6
  before(:all) do
13
- @parser = ScientificNameDirtyParser.new
14
- end
15
-
16
- def parse(input)
17
- @parser.parse(input)
18
- end
19
-
20
- def value(input)
21
- parse(input).value
22
- end
23
-
24
- def canonical(input)
25
- parse(input).canonical
26
- end
27
-
28
- def details(input)
29
- parse(input).details
30
- end
31
-
32
- def pos(input)
33
- parse(input).pos
34
- end
35
-
36
- def debug(input)
37
- res = parse(input)
38
- puts "<pre>"
39
- if res
40
- puts 'success!'
41
- puts res.inspect
42
- else
43
- puts input
44
- val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s.gsub(/column /,'').to_i
45
- print ("-" * (val - 1))
46
- print "^ Computer says 'no'!\n"
47
- puts @parser.failure_reason
48
- puts @parser.to_yaml
49
- end
50
- puts "</pre>"
7
+ set_parser(ScientificNameDirtyParser.new)
51
8
  end
52
9
 
10
+
53
11
  it 'should parse clean names' do
54
12
  parse("Betula verucosa (L.) Bar. 1899").should_not be_nil
55
13
  end
@@ -113,4 +71,20 @@ describe ScientificNameDirty do
113
71
  pos(sn).should == {0=>["genus", 8], 9=>["species", 18], 19=>["author_word", 24], 26=>["year", 30], 32=>["year", 36]}
114
72
  end
115
73
 
74
+ it "should parse new stuff" do
75
+ sn = 'Zoropsis (TaKeoa) nishimurai Yaginuma, 1971' #skipping for now
76
+ sn = 'Campylobacter pyloridis Marshall et al.1985.'
77
+ details(sn).should == {:genus=>{:epitheton=>"Campylobacter"}, :species=>{:epitheton=>"pyloridis", :authorship=>"Marshall et al.1985.", :basionymAuthorTeam=>{:authorTeam=>"Marshall et al.", :author=>["Marshall et al."], :year=>"1985"}}}
78
+ sn = 'Staphylococcus hyicus chromogenes Devriese et al. 1978 (Approved Lists 1980).'
79
+ details(sn).should == {:genus=>{:epitheton=>"Staphylococcus"}, :species=>{:epitheton=>"hyicus"}, :infraspecies=>{:epitheton=>"chromogenes", :rank=>"n/a", :authorship=>"Devriese et al. 1978", :basionymAuthorTeam=>{:authorTeam=>"Devriese et al.", :author=>["Devriese et al."], :year=>"1978"}}}
80
+ sn = 'Kitasatospora corrig. griseola Takahashi et al. 1985.'
81
+ details(sn).should == {:genus=>{:epitheton=>"Kitasatospora"}, :species=>{:epitheton=>"griseola", :authorship=>"Takahashi et al. 1985.", :basionymAuthorTeam=>{:authorTeam=>"Takahashi et al.", :author=>["Takahashi et al."], :year=>"1985"}}}
82
+ sn = 'Beijerinckia derxii venezuelae corrig. Thompson and Skerman, 1981'
83
+ details(sn).should == {:genus=>{:epitheton=>"Beijerinckia"}, :species=>{:epitheton=>"derxii"}, :infraspecies=>{:epitheton=>"venezuelae", :rank=>"n/a", :authorship=>"Thompson and Skerman, 1981", :basionymAuthorTeam=>{:authorTeam=>"Thompson and Skerman", :author=>["Thompson", "Skerman"], :year=>"1981"}}}
84
+ details('Streptomyces parvisporogenes ignotus 1960').should == {:genus=>{:epitheton=>"Streptomyces"}, :species=>{:epitheton=>"parvisporogenes"}, :infraspecies=>{:epitheton=>"ignotus", :rank=>"n/a", :year=>"1960"}}
85
+ details('Oscillaria caviae Simons 1920, according to Simons 1922').should == {:genus=>{:epitheton=>"Oscillaria"}, :species=>{:epitheton=>"caviae", :authorship=>"Simons 1920", :basionymAuthorTeam=>{:authorTeam=>"Simons", :author=>["Simons"], :year=>"1920"}}}
86
+ sn = 'Bacterium monocytogenes hominis"" Nyfeldt 1932'
87
+ details(sn).should == {:genus=>{:epitheton=>"Bacterium"}, :species=>{:epitheton=>"monocytogenes"}, :infraspecies=>{:epitheton=>"hominis", :rank=>"n/a"}}
88
+ end
89
+
116
90
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin