dimus-biodiversity 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +3 -1
- data/bin/nnparse +1 -0
- data/lib/biodiversity.rb +2 -0
- data/lib/biodiversity/parser/scientific_name.rb +116 -28
- data/lib/biodiversity/parser/scientific_name.treetop +14 -2
- data/spec/parser/scientific_name.spec.rb +3 -2
- metadata +1 -1
data/README.rdoc
CHANGED
|
@@ -33,4 +33,6 @@ You can use it as a library
|
|
|
33
33
|
# to get detailed information about elements of the name
|
|
34
34
|
parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003").details
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
# to resolve lsid and get back RDF file
|
|
37
|
+
LsidResolver.resolve("urn:lsid:ubio.org:classificationbank:2232671")
|
|
38
|
+
|
data/bin/nnparse
CHANGED
data/lib/biodiversity.rb
CHANGED
|
@@ -295,7 +295,7 @@ module ScientificName
|
|
|
295
295
|
end
|
|
296
296
|
|
|
297
297
|
i0 = index
|
|
298
|
-
r1 =
|
|
298
|
+
r1 = _nt_name_part_authors_mix
|
|
299
299
|
if r1
|
|
300
300
|
r0 = r1
|
|
301
301
|
else
|
|
@@ -544,7 +544,7 @@ module ScientificName
|
|
|
544
544
|
return r0
|
|
545
545
|
end
|
|
546
546
|
|
|
547
|
-
module
|
|
547
|
+
module NamePartAuthorsMix0
|
|
548
548
|
def a
|
|
549
549
|
elements[0]
|
|
550
550
|
end
|
|
@@ -564,9 +564,51 @@ module ScientificName
|
|
|
564
564
|
def c
|
|
565
565
|
elements[4]
|
|
566
566
|
end
|
|
567
|
+
|
|
568
|
+
def space
|
|
569
|
+
elements[5]
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
def d
|
|
573
|
+
elements[6]
|
|
574
|
+
end
|
|
567
575
|
end
|
|
568
576
|
|
|
569
|
-
module
|
|
577
|
+
module NamePartAuthorsMix1
|
|
578
|
+
def value
|
|
579
|
+
a.value + " " + b.value + " " + c.value + " " + d.value
|
|
580
|
+
end
|
|
581
|
+
def canonical
|
|
582
|
+
a.canonical + " " + c.canonical
|
|
583
|
+
end
|
|
584
|
+
def details
|
|
585
|
+
a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details})
|
|
586
|
+
end
|
|
587
|
+
end
|
|
588
|
+
|
|
589
|
+
module NamePartAuthorsMix2
|
|
590
|
+
def a
|
|
591
|
+
elements[0]
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
def space
|
|
595
|
+
elements[1]
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
def b
|
|
599
|
+
elements[2]
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
def space
|
|
603
|
+
elements[3]
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
def c
|
|
607
|
+
elements[4]
|
|
608
|
+
end
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
module NamePartAuthorsMix3
|
|
570
612
|
def value
|
|
571
613
|
a.value + " " + b.value + " " + c.value
|
|
572
614
|
end
|
|
@@ -578,43 +620,89 @@ module ScientificName
|
|
|
578
620
|
end
|
|
579
621
|
end
|
|
580
622
|
|
|
581
|
-
def
|
|
623
|
+
def _nt_name_part_authors_mix
|
|
582
624
|
start_index = index
|
|
583
|
-
if node_cache[:
|
|
584
|
-
cached = node_cache[:
|
|
625
|
+
if node_cache[:name_part_authors_mix].has_key?(index)
|
|
626
|
+
cached = node_cache[:name_part_authors_mix][index]
|
|
585
627
|
@index = cached.interval.end if cached
|
|
586
628
|
return cached
|
|
587
629
|
end
|
|
588
630
|
|
|
589
|
-
i0
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
631
|
+
i0 = index
|
|
632
|
+
i1, s1 = index, []
|
|
633
|
+
r2 = _nt_species_name
|
|
634
|
+
s1 << r2
|
|
635
|
+
if r2
|
|
636
|
+
r3 = _nt_space
|
|
637
|
+
s1 << r3
|
|
638
|
+
if r3
|
|
639
|
+
r4 = _nt_authors_part
|
|
640
|
+
s1 << r4
|
|
641
|
+
if r4
|
|
642
|
+
r5 = _nt_space
|
|
643
|
+
s1 << r5
|
|
644
|
+
if r5
|
|
645
|
+
r6 = _nt_subspecies_name
|
|
646
|
+
s1 << r6
|
|
647
|
+
if r6
|
|
648
|
+
r7 = _nt_space
|
|
649
|
+
s1 << r7
|
|
650
|
+
if r7
|
|
651
|
+
r8 = _nt_authors_part
|
|
652
|
+
s1 << r8
|
|
653
|
+
end
|
|
654
|
+
end
|
|
604
655
|
end
|
|
605
656
|
end
|
|
606
657
|
end
|
|
607
658
|
end
|
|
608
|
-
if
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
659
|
+
if s1.last
|
|
660
|
+
r1 = (SyntaxNode).new(input, i1...index, s1)
|
|
661
|
+
r1.extend(NamePartAuthorsMix0)
|
|
662
|
+
r1.extend(NamePartAuthorsMix1)
|
|
612
663
|
else
|
|
613
|
-
self.index =
|
|
614
|
-
|
|
664
|
+
self.index = i1
|
|
665
|
+
r1 = nil
|
|
666
|
+
end
|
|
667
|
+
if r1
|
|
668
|
+
r0 = r1
|
|
669
|
+
else
|
|
670
|
+
i9, s9 = index, []
|
|
671
|
+
r10 = _nt_species_name
|
|
672
|
+
s9 << r10
|
|
673
|
+
if r10
|
|
674
|
+
r11 = _nt_space
|
|
675
|
+
s9 << r11
|
|
676
|
+
if r11
|
|
677
|
+
r12 = _nt_authors_part
|
|
678
|
+
s9 << r12
|
|
679
|
+
if r12
|
|
680
|
+
r13 = _nt_space
|
|
681
|
+
s9 << r13
|
|
682
|
+
if r13
|
|
683
|
+
r14 = _nt_subspecies_name
|
|
684
|
+
s9 << r14
|
|
685
|
+
end
|
|
686
|
+
end
|
|
687
|
+
end
|
|
688
|
+
end
|
|
689
|
+
if s9.last
|
|
690
|
+
r9 = (SyntaxNode).new(input, i9...index, s9)
|
|
691
|
+
r9.extend(NamePartAuthorsMix2)
|
|
692
|
+
r9.extend(NamePartAuthorsMix3)
|
|
693
|
+
else
|
|
694
|
+
self.index = i9
|
|
695
|
+
r9 = nil
|
|
696
|
+
end
|
|
697
|
+
if r9
|
|
698
|
+
r0 = r9
|
|
699
|
+
else
|
|
700
|
+
self.index = i0
|
|
701
|
+
r0 = nil
|
|
702
|
+
end
|
|
615
703
|
end
|
|
616
704
|
|
|
617
|
-
node_cache[:
|
|
705
|
+
node_cache[:name_part_authors_mix][start_index] = r0
|
|
618
706
|
|
|
619
707
|
return r0
|
|
620
708
|
end
|
|
@@ -31,7 +31,7 @@ grammar ScientificName
|
|
|
31
31
|
end
|
|
32
32
|
|
|
33
33
|
rule scientific_name
|
|
34
|
-
|
|
34
|
+
name_part_authors_mix
|
|
35
35
|
/
|
|
36
36
|
space a:name_part space b:authors_part space c:status_part space {
|
|
37
37
|
def value
|
|
@@ -100,7 +100,19 @@ grammar ScientificName
|
|
|
100
100
|
latin_word
|
|
101
101
|
end
|
|
102
102
|
|
|
103
|
-
rule
|
|
103
|
+
rule name_part_authors_mix
|
|
104
|
+
a:species_name space b:authors_part space c:subspecies_name space d:authors_part {
|
|
105
|
+
def value
|
|
106
|
+
a.value + " " + b.value + " " + c.value + " " + d.value
|
|
107
|
+
end
|
|
108
|
+
def canonical
|
|
109
|
+
a.canonical + " " + c.canonical
|
|
110
|
+
end
|
|
111
|
+
def details
|
|
112
|
+
a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details})
|
|
113
|
+
end
|
|
114
|
+
}
|
|
115
|
+
/
|
|
104
116
|
a:species_name space b:authors_part space c:subspecies_name {
|
|
105
117
|
def value
|
|
106
118
|
a.value + " " + b.value + " " + c.value
|
|
@@ -2,7 +2,7 @@ dir = File.dirname("__FILE__")
|
|
|
2
2
|
require 'rubygems'
|
|
3
3
|
require 'spec'
|
|
4
4
|
require 'treetop'
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
Treetop.load(File.expand_path(dir + '../../lib/biodiversity/parser/scientific_name'))
|
|
7
7
|
|
|
8
8
|
describe ScientificName do
|
|
@@ -53,7 +53,8 @@ describe ScientificName do
|
|
|
53
53
|
it 'should parse species autonym for complex subspecies authorships' do
|
|
54
54
|
parse("Aus bus Linn. var. bus").should_not be_nil
|
|
55
55
|
details("Aus bus Linn. var. bus").should == {:species=>"bus", :species_authors=>{:authors=>{:names=>["Linn."]}}, :genus=>"Aus", :subspecies=>[{:rank=>"var.", :value=>"bus"}]}
|
|
56
|
-
|
|
56
|
+
parse("Agalinis purpurea (L.) Briton var. borealis (Berg.) Peterson 1987").should_not be_nil
|
|
57
|
+
details("Agalinis purpurea (L.) Briton var. borealis (Berg.) Peterson 1987").should == {:species=>"purpurea", :genus=>"Agalinis", :species_authors=>{:orig_authors=>{:names=>["L."]}, :authors=>{:names=>["Briton"]}}, :subspecies_authors=>{:orig_authors=>{:names=>["Berg."]}, :authors=>{:year=>"1987", :names=>["Peterson"]}}, :subspecies=>[{:value=>"borealis", :rank=>"var."}]}
|
|
57
58
|
end
|
|
58
59
|
|
|
59
60
|
it 'should parse several authors' do
|