dimus-biodiversity 0.0.16 → 0.0.18

Sign up to get free protection for your applications and to get access to all the features.
@@ -469,7 +469,7 @@ grammar ScientificNameClean
469
469
  end
470
470
  }
471
471
  /
472
- ("anon."/"f."/"bis"/"arg."/"da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"et al.\{\?\}"/"et al.") {
472
+ ("anon."/"f."/"bis"/"arg."/author_prefix/"et al.\{\?\}"/"et al.") {
473
473
  def value
474
474
  text_value.strip
475
475
  end
@@ -498,6 +498,10 @@ grammar ScientificNameClean
498
498
  end
499
499
  }
500
500
  end
501
+
502
+ rule author_prefix
503
+ "da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von"
504
+ end
501
505
 
502
506
  rule name_part
503
507
  space a:species_name space b:rank space_hard c:editorials_full {
@@ -517,6 +521,24 @@ grammar ScientificNameClean
517
521
  end
518
522
  }
519
523
  /
524
+ space a:species_name &(space author_prefix) {
525
+ def value
526
+ a.value
527
+ end
528
+
529
+ def canonical
530
+ a.canonical
531
+ end
532
+
533
+ def pos
534
+ a.pos
535
+ end
536
+
537
+ def details
538
+ a.details
539
+ end
540
+ }
541
+ /
520
542
  space a:species_name space b:subspecies_names {
521
543
  def value
522
544
  a.value + b.value
@@ -534,13 +556,13 @@ grammar ScientificNameClean
534
556
  end
535
557
  }
536
558
  /
537
- space a:species_name space b:latin_word ![\.] {
559
+ space a:species_name space b:species_word ![\.] {
538
560
  def value
539
561
  a.value + " " + b.value
540
562
  end
541
563
 
542
564
  def canonical
543
- value
565
+ a.canonical + " " + b.value
544
566
  end
545
567
 
546
568
  def pos
@@ -581,7 +603,7 @@ grammar ScientificNameClean
581
603
  end
582
604
 
583
605
  rule subspecies_name
584
- sel:rank space_hard a:latin_word {
606
+ sel:rank space_hard a:species_word {
585
607
  def value
586
608
  sel.apply(a)
587
609
  end
@@ -626,7 +648,7 @@ grammar ScientificNameClean
626
648
  end
627
649
 
628
650
  rule rank
629
- ("morph."/"f.sp."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
651
+ ("morph."/"f.sp."/"B"/"ssp."/"nat"/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
630
652
  /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
631
653
  {
632
654
  def value
@@ -665,7 +687,7 @@ grammar ScientificNameClean
665
687
  end
666
688
 
667
689
  rule species_name
668
- hybrid_separator space_hard a:cap_latin_word space_hard b:latin_word {
690
+ hybrid_separator space_hard a:cap_latin_word space_hard b:species_word {
669
691
  def value
670
692
  "× " + a.value + " " + b.value
671
693
  end
@@ -699,7 +721,7 @@ grammar ScientificNameClean
699
721
  end
700
722
  }
701
723
  /
702
- a:cap_latin_word space_hard hybrid_separator space_hard b:latin_word {
724
+ a:cap_latin_word space_hard hybrid_separator space_hard b:species_word {
703
725
  def value
704
726
  a.value + " × " + b.value
705
727
  end
@@ -716,7 +738,7 @@ grammar ScientificNameClean
716
738
  end
717
739
  }
718
740
  /
719
- a:cap_latin_word space b:subgenus space c:latin_word {
741
+ a:cap_latin_word space b:subgenus space c:species_word {
720
742
  def value
721
743
  a.value + " " + b.value + " " + c.value
722
744
  end
@@ -733,7 +755,7 @@ grammar ScientificNameClean
733
755
  end
734
756
  }
735
757
  /
736
- a:cap_latin_word space_hard b:latin_word {
758
+ a:cap_latin_word space_hard b:species_word {
737
759
  def value
738
760
  a.value + " " + b.value
739
761
  end
@@ -757,6 +779,10 @@ grammar ScientificNameClean
757
779
  "(" + a.value + ")"
758
780
  end
759
781
 
782
+ def canonical
783
+ ''
784
+ end
785
+
760
786
  def pos
761
787
  {a.interval.begin => ['subgenus', a.interval.end]}
762
788
  end
@@ -792,7 +818,7 @@ grammar ScientificNameClean
792
818
  " " + text_value + " " + a.value
793
819
  end
794
820
  def canonical(a)
795
- " " + a.value
821
+ ""
796
822
  end
797
823
  def details(a = nil)
798
824
  {:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]}
@@ -800,7 +826,6 @@ grammar ScientificNameClean
800
826
  }
801
827
  end
802
828
 
803
-
804
829
  rule cap_latin_word
805
830
  a:([A-Z]/cap_digraph) b:latin_word "?" {
806
831
  def value
@@ -856,6 +881,16 @@ grammar ScientificNameClean
856
881
  end
857
882
  }
858
883
  end
884
+
885
+ rule species_word
886
+ a:[0-9]+ "-"? b:latin_word {
887
+ def value
888
+ a.text_value + "-"+ b.value
889
+ end
890
+ }
891
+ /
892
+ latin_word
893
+ end
859
894
 
860
895
  rule latin_word
861
896
  a:[a-zëüäöïé] b:full_name_letters {
@@ -1021,4 +1056,4 @@ grammar ScientificNameClean
1021
1056
  end
1022
1057
  }
1023
1058
  end
1024
- end
1059
+ end
@@ -36,13 +36,15 @@ describe ScientificNameCanonical do
36
36
 
37
37
  it 'should parse names with valid name part and unparseable rest' do
38
38
  [
39
- ['Moraea spathulata ( (L. f. Klatt','Moraea spathulata',{:genus=>"Moraea", :species=>"spathulata", :name_part_verbatim=>"Moraea spathulata", :auth_part_verbatim=>"( (L. f. Klatt"}, {0=>["genus", 6], 7=>["species", 17]} ],
40
- ['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma',{:genus=>"Verpericola", :species=>"megasoma", :name_part_verbatim=>"Verpericola megasoma", :auth_part_verbatim=>"\"\"Dall\" Pils."}, {0=>["genus", 11], 12=>["species", 20]}]
39
+ ['Moraea spathulata ( (L. f. Klatt','Moraea spathulata','Moraea spathulata',{:genus=>"Moraea", :species=>"spathulata", :name_part_verbatim=>"Moraea spathulata", :auth_part_verbatim=>"( (L. f. Klatt"}, {0=>["genus", 6], 7=>["species", 17]} ],
40
+ ['Verpericola megasoma ""Dall" Pils.','Verpericola megasoma','Verpericola megasoma',{:genus=>"Verpericola", :species=>"megasoma", :name_part_verbatim=>"Verpericola megasoma", :auth_part_verbatim=>"\"\"Dall\" Pils."}, {0=>["genus", 11], 12=>["species", 20]}],
41
+ ['Nesticus cellulanus affinis Kulczynski, in Chyzer & Kulczynski, 1894','Nesticus cellulanus affinis','Nesticus cellulanus affinis',{:genus=>"Nesticus", :species=>"cellulanus", :subspecies=>{:rank=>"n/a", :value=>"affinis"}, :name_part_verbatim=>"Nesticus cellulanus", :auth_part_verbatim=>"Kulczynski, in Chyzer & Kulczynski, 1894"},{0=>["genus", 8], 9=>["species", 19], 20=>["subspecies", 27]}]
41
42
  ].each do |n|
42
43
  parse(n[0]).should_not be_nil
43
44
  value(n[0]).should == n[1]
44
- details(n[0]).should == n[2]
45
- pos(n[0]).should == n[3]
45
+ canonical(n[0]).should == n[2]
46
+ details(n[0]).should == n[3]
47
+ pos(n[0]).should == n[4]
46
48
  end
47
49
  end
48
50
  end
@@ -175,7 +175,7 @@ describe ScientificNameClean do
175
175
  it 'should parse scientific name' do
176
176
  sn = "Abacetus laevicollis de Chaudoir, 1869"
177
177
  parse(sn).should_not be_nil
178
- #TODO!!!!!! canonical(sn).should == 'Abacetus laevicollis'
178
+ canonical(sn).should == 'Abacetus laevicollis'
179
179
  parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003").should_not be_nil
180
180
  value("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii (H.C. Burnett) U. Braun et Crous 2003"
181
181
  canonical("Pseudocercospora dendrobii(H.C. Burnett)U. Braun & Crous 2003").should == "Pseudocercospora dendrobii"
@@ -474,4 +474,25 @@ end
474
474
  pos(sn).should == {0=>["genus", 14], 15=>["species", 22]}
475
475
  end
476
476
 
477
+ it 'should parse new additions' do
478
+ sn = "Abacetus laevicollis de Chaudoir, 1869"
479
+ parse(sn).should_not be_nil
480
+ canonical(sn).should == 'Abacetus laevicollis'
481
+ sn = "Gastrosericus eremorum van Beaumont 1955"
482
+ canonical(sn).should == 'Gastrosericus eremorum'
483
+ sn = "Gastrosericus eremorum von Beaumont 1955"
484
+ canonical(sn).should == 'Gastrosericus eremorum'
485
+ sn = "Cypraeovula (Luponia) amphithales perdentata"
486
+ canonical(sn).should == 'Cypraeovula amphithales perdentata'
487
+ details(sn).should == {:genus=>"Cypraeovula", :subgenus=>"Luponia", :species=>"amphithales", :subspecies=>{:rank=>"n/a", :value=>"perdentata"}}
488
+ sn = "Polyrhachis orsyllus nat musculus Forel 1901"
489
+ canonical(sn).should == "Polyrhachis orsyllus musculus"
490
+ sn = 'Latrodectus 13-guttatus Thorell, 1875'
491
+ canonical(sn).should == 'Latrodectus 13-guttatus'
492
+ value(sn).should == 'Latrodectus 13-guttatus Thorell 1875'
493
+ sn = 'Latrodectus 3guttatus Thorell, 1875'
494
+ canonical(sn).should == 'Latrodectus 3-guttatus'
495
+ value(sn).should == 'Latrodectus 3-guttatus Thorell 1875'
496
+ end
497
+
477
498
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dimus-biodiversity
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.16
4
+ version: 0.0.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmitry Mozzherin
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-04-11 00:00:00 -07:00
12
+ date: 2009-06-12 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency