dimus-biodiversity 0.0.16 → 0.0.18
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +17 -0
- data/lib/biodiversity/parser/scientific_name_clean.rb +437 -323
- data/lib/biodiversity/parser/scientific_name_clean.treetop +47 -12
- data/spec/parser/scientific_name_canonical.spec.rb +6 -4
- data/spec/parser/scientific_name_clean.spec.rb +22 -1
- metadata +2 -2
@@ -469,7 +469,7 @@ grammar ScientificNameClean
|
|
469
469
|
end
|
470
470
|
}
|
471
471
|
/
|
472
|
-
("anon."/"f."/"bis"/"arg."/
|
472
|
+
("anon."/"f."/"bis"/"arg."/author_prefix/"et al.\{\?\}"/"et al.") {
|
473
473
|
def value
|
474
474
|
text_value.strip
|
475
475
|
end
|
@@ -498,6 +498,10 @@ grammar ScientificNameClean
|
|
498
498
|
end
|
499
499
|
}
|
500
500
|
end
|
501
|
+
|
502
|
+
rule author_prefix
|
503
|
+
"da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von"
|
504
|
+
end
|
501
505
|
|
502
506
|
rule name_part
|
503
507
|
space a:species_name space b:rank space_hard c:editorials_full {
|
@@ -517,6 +521,24 @@ grammar ScientificNameClean
|
|
517
521
|
end
|
518
522
|
}
|
519
523
|
/
|
524
|
+
space a:species_name &(space author_prefix) {
|
525
|
+
def value
|
526
|
+
a.value
|
527
|
+
end
|
528
|
+
|
529
|
+
def canonical
|
530
|
+
a.canonical
|
531
|
+
end
|
532
|
+
|
533
|
+
def pos
|
534
|
+
a.pos
|
535
|
+
end
|
536
|
+
|
537
|
+
def details
|
538
|
+
a.details
|
539
|
+
end
|
540
|
+
}
|
541
|
+
/
|
520
542
|
space a:species_name space b:subspecies_names {
|
521
543
|
def value
|
522
544
|
a.value + b.value
|
@@ -534,13 +556,13 @@ grammar ScientificNameClean
|
|
534
556
|
end
|
535
557
|
}
|
536
558
|
/
|
537
|
-
space a:species_name space b:
|
559
|
+
space a:species_name space b:species_word ![\.] {
|
538
560
|
def value
|
539
561
|
a.value + " " + b.value
|
540
562
|
end
|
541
563
|
|
542
564
|
def canonical
|
543
|
-
value
|
565
|
+
a.canonical + " " + b.value
|
544
566
|
end
|
545
567
|
|
546
568
|
def pos
|
@@ -581,7 +603,7 @@ grammar ScientificNameClean
|
|
581
603
|
end
|
582
604
|
|
583
605
|
rule subspecies_name
|
584
|
-
sel:rank space_hard a:
|
606
|
+
sel:rank space_hard a:species_word {
|
585
607
|
def value
|
586
608
|
sel.apply(a)
|
587
609
|
end
|
@@ -626,7 +648,7 @@ grammar ScientificNameClean
|
|
626
648
|
end
|
627
649
|
|
628
650
|
rule rank
|
629
|
-
("morph."/"f.sp."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
|
651
|
+
("morph."/"f.sp."/"B"/"ssp."/"nat"/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
|
630
652
|
/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
631
653
|
{
|
632
654
|
def value
|
@@ -665,7 +687,7 @@ grammar ScientificNameClean
|
|
665
687
|
end
|
666
688
|
|
667
689
|
rule species_name
|
668
|
-
hybrid_separator space_hard a:cap_latin_word space_hard b:
|
690
|
+
hybrid_separator space_hard a:cap_latin_word space_hard b:species_word {
|
669
691
|
def value
|
670
692
|
"× " + a.value + " " + b.value
|
671
693
|
end
|
@@ -699,7 +721,7 @@ grammar ScientificNameClean
|
|
699
721
|
end
|
700
722
|
}
|
701
723
|
/
|
702
|
-
a:cap_latin_word space_hard hybrid_separator space_hard b:
|
724
|
+
a:cap_latin_word space_hard hybrid_separator space_hard b:species_word {
|
703
725
|
def value
|
704
726
|
a.value + " × " + b.value
|
705
727
|
end
|
@@ -716,7 +738,7 @@ grammar ScientificNameClean
|
|
716
738
|
end
|
717
739
|
}
|
718
740
|
/
|
719
|
-
a:cap_latin_word space b:subgenus space c:
|
741
|
+
a:cap_latin_word space b:subgenus space c:species_word {
|
720
742
|
def value
|
721
743
|
a.value + " " + b.value + " " + c.value
|
722
744
|
end
|
@@ -733,7 +755,7 @@ grammar ScientificNameClean
|
|
733
755
|
end
|
734
756
|
}
|
735
757
|
/
|
736
|
-
a:cap_latin_word space_hard b:
|
758
|
+
a:cap_latin_word space_hard b:species_word {
|
737
759
|
def value
|
738
760
|
a.value + " " + b.value
|
739
761
|
end
|
@@ -757,6 +779,10 @@ grammar ScientificNameClean
|
|
757
779
|
"(" + a.value + ")"
|
758
780
|
end
|
759
781
|
|
782
|
+
def canonical
|
783
|
+
''
|
784
|
+
end
|
785
|
+
|
760
786
|
def pos
|
761
787
|
{a.interval.begin => ['subgenus', a.interval.end]}
|
762
788
|
end
|
@@ -792,7 +818,7 @@ grammar ScientificNameClean
|
|
792
818
|
" " + text_value + " " + a.value
|
793
819
|
end
|
794
820
|
def canonical(a)
|
795
|
-
"
|
821
|
+
""
|
796
822
|
end
|
797
823
|
def details(a = nil)
|
798
824
|
{:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]}
|
@@ -800,7 +826,6 @@ grammar ScientificNameClean
|
|
800
826
|
}
|
801
827
|
end
|
802
828
|
|
803
|
-
|
804
829
|
rule cap_latin_word
|
805
830
|
a:([A-Z]/cap_digraph) b:latin_word "?" {
|
806
831
|
def value
|
@@ -856,6 +881,16 @@ grammar ScientificNameClean
|
|
856
881
|
end
|
857
882
|
}
|
858
883
|
end
|
884
|
+
|
885
|
+
rule species_word
|
886
|
+
a:[0-9]+ "-"? b:latin_word {
|
887
|
+
def value
|
888
|
+
a.text_value + "-"+ b.value
|
889
|
+
end
|
890
|
+
}
|
891
|
+
/
|
892
|
+
latin_word
|
893
|
+
end
|
859
894
|
|
860
895
|
rule latin_word
|
861
896
|
a:[a-zëüäöïé] b:full_name_letters {
|
@@ -1021,4 +1056,4 @@ grammar ScientificNameClean
|
|
1021
1056
|
end
|
1022
1057
|
}
|
1023
1058
|
end
|
1024
|
-
end
|
1059
|
+
end
|
@@ -36,13 +36,15 @@ describe ScientificNameCanonical do
|
|
36
36
|
|
37
37
|
it 'should parse names with valid name part and unparseable rest' do
|
38
38
|
[
|
39
|
-
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',{:genus=>"Moraea", :species=>"spathulata", :name_part_verbatim=>"Moraea spathulata", :auth_part_verbatim=>"( (L. f. Klatt"}, {0=>["genus", 6], 7=>["species", 17]} ],
|
40
|
-
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',{:genus=>"Verpericola", :species=>"megasoma", :name_part_verbatim=>"Verpericola megasoma", :auth_part_verbatim=>"\"\"Dall\" Pils."}, {0=>["genus", 11], 12=>["species", 20]}]
|
39
|
+
['Moraea spathulata ( (L. f. Klatt','Moraea spathulata','Moraea spathulata',{:genus=>"Moraea", :species=>"spathulata", :name_part_verbatim=>"Moraea spathulata", :auth_part_verbatim=>"( (L. f. Klatt"}, {0=>["genus", 6], 7=>["species", 17]} ],
|
40
|
+
['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma','Verpericola megasoma',{:genus=>"Verpericola", :species=>"megasoma", :name_part_verbatim=>"Verpericola megasoma", :auth_part_verbatim=>"\"\"Dall\" Pils."}, {0=>["genus", 11], 12=>["species", 20]}],
|
41
|
+
['Nesticus cellulanus affinis Kulczynski, in Chyzer & Kulczynski, 1894','Nesticus cellulanus affinis','Nesticus cellulanus affinis',{:genus=>"Nesticus", :species=>"cellulanus", :subspecies=>{:rank=>"n/a", :value=>"affinis"}, :name_part_verbatim=>"Nesticus cellulanus", :auth_part_verbatim=>"Kulczynski, in Chyzer & Kulczynski, 1894"},{0=>["genus", 8], 9=>["species", 19], 20=>["subspecies", 27]}]
|
41
42
|
].each do |n|
|
42
43
|
parse(n[0]).should_not be_nil
|
43
44
|
value(n[0]).should == n[1]
|
44
|
-
|
45
|
-
|
45
|
+
canonical(n[0]).should == n[2]
|
46
|
+
details(n[0]).should == n[3]
|
47
|
+
pos(n[0]).should == n[4]
|
46
48
|
end
|
47
49
|
end
|
48
50
|
end
|
@@ -175,7 +175,7 @@ describe ScientificNameClean do
|
|
175
175
|
it 'should parse scientific name' do
|
176
176
|
sn = "Abacetus laevicollis de Chaudoir, 1869"
|
177
177
|
parse(sn).should_not be_nil
|
178
|
-
|
178
|
+
canonical(sn).should == 'Abacetus laevicollis'
|
179
179
|
parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
|
180
180
|
value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun et Crous 2003"
|
181
181
|
canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
|
@@ -474,4 +474,25 @@ end
|
|
474
474
|
pos(sn).should == {0=>["genus", 14], 15=>["species", 22]}
|
475
475
|
end
|
476
476
|
|
477
|
+
it 'should parse new additions' do
|
478
|
+
sn = "Abacetus laevicollis de Chaudoir, 1869"
|
479
|
+
parse(sn).should_not be_nil
|
480
|
+
canonical(sn).should == 'Abacetus laevicollis'
|
481
|
+
sn = "Gastrosericus eremorum van Beaumont 1955"
|
482
|
+
canonical(sn).should == 'Gastrosericus eremorum'
|
483
|
+
sn = "Gastrosericus eremorum von Beaumont 1955"
|
484
|
+
canonical(sn).should == 'Gastrosericus eremorum'
|
485
|
+
sn = "Cypraeovula (Luponia) amphithales perdentata"
|
486
|
+
canonical(sn).should == 'Cypraeovula amphithales perdentata'
|
487
|
+
details(sn).should == {:genus=>"Cypraeovula", :subgenus=>"Luponia", :species=>"amphithales", :subspecies=>{:rank=>"n/a", :value=>"perdentata"}}
|
488
|
+
sn = "Polyrhachis orsyllus nat musculus Forel 1901"
|
489
|
+
canonical(sn).should == "Polyrhachis orsyllus musculus"
|
490
|
+
sn = 'Latrodectus 13-guttatus Thorell, 1875'
|
491
|
+
canonical(sn).should == 'Latrodectus 13-guttatus'
|
492
|
+
value(sn).should == 'Latrodectus 13-guttatus Thorell 1875'
|
493
|
+
sn = 'Latrodectus 3guttatus Thorell, 1875'
|
494
|
+
canonical(sn).should == 'Latrodectus 3-guttatus'
|
495
|
+
value(sn).should == 'Latrodectus 3-guttatus Thorell 1875'
|
496
|
+
end
|
497
|
+
|
477
498
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dimus-biodiversity
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.18
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dmitry Mozzherin
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-12 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|