biodiversity19 0.5.15 → 0.5.16

Sign up to get free protection for your applications and to get access to all the features.
@@ -22,6 +22,10 @@ grammar ScientificNameClean
22
22
  def details
23
23
  a.details.class == Array ? a.details : [a.details]
24
24
  end
25
+
26
+ def parser_run
27
+ 1
28
+ end
25
29
  }
26
30
  end
27
31
 
@@ -509,7 +513,7 @@ grammar ScientificNameClean
509
513
  end
510
514
 
511
515
  rule genus
512
- a:cap_latin_word !(space_hard author_prefix_word space_hard author_word) {
516
+ a:(cap_latin_word_pair/cap_latin_word) !(space_hard author_prefix_word space_hard author_word) {
513
517
  def value
514
518
  a.value
515
519
  end
@@ -555,7 +559,7 @@ grammar ScientificNameClean
555
559
  end
556
560
 
557
561
  rule uninomial_string
558
- cap_latin_word {
562
+ (cap_latin_word_pair/cap_latin_word) {
559
563
  def canonical
560
564
  value
561
565
  end
@@ -938,6 +942,14 @@ grammar ScientificNameClean
938
942
  }
939
943
  end
940
944
 
945
+ rule cap_latin_word_pair
946
+ a:cap_latin_word "-" b:cap_latin_word {
947
+ def value
948
+ a.value + b.value.downcase
949
+ end
950
+ }
951
+ end
952
+
941
953
  rule cap_latin_word
942
954
  a:([A-Z]/cap_digraph) b:latin_word "?" {
943
955
  def value
@@ -951,6 +963,12 @@ grammar ScientificNameClean
951
963
  end
952
964
  }
953
965
  /
966
+ a:("AE"/"OE") b:latin_word {
967
+ def value
968
+ a.text_value[0..0] + 'e' + b.value
969
+ end
970
+ }
971
+ /
954
972
  ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
955
973
  def value
956
974
  text_value
@@ -1041,42 +1059,32 @@ grammar ScientificNameClean
1041
1059
  end
1042
1060
 
1043
1061
  rule latin_word
1044
- a:[a-zëüäöïéåóç] b:full_name_letters {
1045
- def value
1046
- a.text_value + b.value
1047
- end
1048
- }
1049
- /
1050
- a:digraph b:full_name_letters {
1062
+ a:[a-zëæœ] b:valid_name_letters {
1051
1063
  def value
1052
- a.value + b.value
1064
+ l = a.text_value
1065
+ l = 'ae' if l == 'æ'
1066
+ l = 'oe' if l == 'œ'
1067
+ l + b.value
1053
1068
  end
1054
1069
  }
1055
1070
  end
1056
1071
 
1057
- rule full_name_letters
1058
- a:digraph b:full_name_letters {
1059
- def value
1060
- a.value + b.value
1061
- end
1062
- }
1063
- /
1064
- a:valid_name_letters b:digraph c:full_name_letters {
1065
- def value
1066
- a.value + b.value + c.value
1067
- end
1068
- }
1069
- /
1070
- valid_name_letters
1071
- end
1072
-
1073
1072
  rule valid_name_letters
1074
- [a-z\-ëüäöïéåóç]+ {
1073
+ [a-z\-ëæœ]+ {
1075
1074
  def value
1076
- text_value
1075
+ res = ''
1076
+ text_value.split('').each do |l|
1077
+ l = 'ae' if l == 'æ'
1078
+ l = 'oe' if l == 'œ'
1079
+ # not sure if we should normalize ë as well. It is legal in botanical code, but it
1080
+ # might be beneficial to normalize it for the reconsiliation purposes
1081
+ # l = 'e' if l == 'ë'
1082
+ res << l
1083
+ end
1084
+ res
1077
1085
  end
1078
1086
  }
1079
- end
1087
+ end
1080
1088
 
1081
1089
  rule cap_digraph
1082
1090
  "Æ" {
@@ -1092,20 +1100,6 @@ grammar ScientificNameClean
1092
1100
  }
1093
1101
  end
1094
1102
 
1095
- rule digraph
1096
- "æ" {
1097
- def value
1098
- 'ae'
1099
- end
1100
- }
1101
- /
1102
- "œ" {
1103
- def value
1104
- 'oe'
1105
- end
1106
- }
1107
- end
1108
-
1109
1103
  rule year
1110
1104
  b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
1111
1105
  def value
@@ -1177,9 +1171,9 @@ grammar ScientificNameClean
1177
1171
  end
1178
1172
 
1179
1173
  rule multiplication_sign
1180
- "×" {
1174
+ ("×"/"*") {
1181
1175
  def value
1182
- text_value
1176
+ "×"
1183
1177
  end
1184
1178
  }
1185
1179
  end
@@ -3,11 +3,51 @@ module ScientificNameDirty
3
3
  include Treetop::Runtime
4
4
 
5
5
  def root
6
- @root || :root
6
+ @root ||= :root
7
7
  end
8
8
 
9
9
  include ScientificNameClean
10
10
 
11
+ module Root0
12
+ def space1
13
+ elements[0]
14
+ end
15
+
16
+ def a
17
+ elements[1]
18
+ end
19
+
20
+ def space2
21
+ elements[2]
22
+ end
23
+ end
24
+
25
+ module Root1
26
+ def value
27
+ a.value.gsub(/\s{2,}/, ' ').strip
28
+ end
29
+
30
+ def canonical
31
+ a.canonical.gsub(/\s{2,}/, ' ').strip
32
+ end
33
+
34
+ def pos
35
+ a.pos
36
+ end
37
+
38
+ def hybrid
39
+ a.hybrid
40
+ end
41
+
42
+ def details
43
+ a.details.class == Array ? a.details : [a.details]
44
+ end
45
+
46
+ def parser_run
47
+ 2
48
+ end
49
+ end
50
+
11
51
  def _nt_root
12
52
  start_index = index
13
53
  if node_cache[:root].has_key?(index)
@@ -19,7 +59,25 @@ module ScientificNameDirty
19
59
  return cached
20
60
  end
21
61
 
22
- r0 = super
62
+ i0, s0 = index, []
63
+ r1 = _nt_space
64
+ s0 << r1
65
+ if r1
66
+ r2 = _nt_scientific_name_5
67
+ s0 << r2
68
+ if r2
69
+ r3 = _nt_space
70
+ s0 << r3
71
+ end
72
+ end
73
+ if s0.last
74
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
75
+ r0.extend(Root0)
76
+ r0.extend(Root1)
77
+ else
78
+ @index = i0
79
+ r0 = nil
80
+ end
23
81
 
24
82
  node_cache[:root][start_index] = r0
25
83
 
@@ -329,6 +387,161 @@ module ScientificNameDirty
329
387
  r0
330
388
  end
331
389
 
390
+ module LatinWord0
391
+ def a
392
+ elements[0]
393
+ end
394
+
395
+ def b
396
+ elements[1]
397
+ end
398
+ end
399
+
400
+ module LatinWord1
401
+ def value
402
+ res = ''
403
+ text_value.split('').each do |l|
404
+ l = 'ae' if l == 'æ'
405
+ l = 'oe' if l == 'œ'
406
+ res << l
407
+ end
408
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
409
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
410
+ end
411
+ end
412
+
413
+ def _nt_latin_word
414
+ start_index = index
415
+ if node_cache[:latin_word].has_key?(index)
416
+ cached = node_cache[:latin_word][index]
417
+ if cached
418
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
419
+ @index = cached.interval.end
420
+ end
421
+ return cached
422
+ end
423
+
424
+ i0, s0 = index, []
425
+ if has_terminal?('\G[a-z\\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž]', true, index)
426
+ r1 = true
427
+ @index += 1
428
+ else
429
+ r1 = nil
430
+ end
431
+ s0 << r1
432
+ if r1
433
+ r2 = _nt_valid_name_letters
434
+ s0 << r2
435
+ end
436
+ if s0.last
437
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
438
+ r0.extend(LatinWord0)
439
+ r0.extend(LatinWord1)
440
+ else
441
+ @index = i0
442
+ r0 = nil
443
+ end
444
+
445
+ node_cache[:latin_word][start_index] = r0
446
+
447
+ r0
448
+ end
449
+
450
+ module ValidNameLetters0
451
+ def value
452
+ res = ''
453
+ text_value.split('').each do |l|
454
+ l = 'ae' if l == 'æ'
455
+ l = 'oe' if l == 'œ'
456
+ res << l
457
+ end
458
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
459
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
460
+ end
461
+ end
462
+
463
+ def _nt_valid_name_letters
464
+ start_index = index
465
+ if node_cache[:valid_name_letters].has_key?(index)
466
+ cached = node_cache[:valid_name_letters][index]
467
+ if cached
468
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
469
+ @index = cached.interval.end
470
+ end
471
+ return cached
472
+ end
473
+
474
+ s0, i0 = [], index
475
+ loop do
476
+ if has_terminal?('\G[a-z\\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž]', true, index)
477
+ r1 = true
478
+ @index += 1
479
+ else
480
+ r1 = nil
481
+ end
482
+ if r1
483
+ s0 << r1
484
+ else
485
+ break
486
+ end
487
+ end
488
+ if s0.empty?
489
+ @index = i0
490
+ r0 = nil
491
+ else
492
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
493
+ r0.extend(ValidNameLetters0)
494
+ end
495
+
496
+ node_cache[:valid_name_letters][start_index] = r0
497
+
498
+ r0
499
+ end
500
+
501
+ module ValidNameLetters0
502
+ def value
503
+ text_value
504
+ end
505
+ end
506
+
507
+ def _nt_valid_name_letters
508
+ start_index = index
509
+ if node_cache[:valid_name_letters].has_key?(index)
510
+ cached = node_cache[:valid_name_letters][index]
511
+ if cached
512
+ cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
513
+ @index = cached.interval.end
514
+ end
515
+ return cached
516
+ end
517
+
518
+ s0, i0 = [], index
519
+ loop do
520
+ if has_terminal?('\G[a-z\\-ëüäöïéåóç]', true, index)
521
+ r1 = true
522
+ @index += 1
523
+ else
524
+ r1 = nil
525
+ end
526
+ if r1
527
+ s0 << r1
528
+ else
529
+ break
530
+ end
531
+ end
532
+ if s0.empty?
533
+ @index = i0
534
+ r0 = nil
535
+ else
536
+ r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
537
+ r0.extend(ValidNameLetters0)
538
+ end
539
+
540
+ node_cache[:valid_name_letters][start_index] = r0
541
+
542
+ r0
543
+ end
544
+
332
545
  module RightParen0
333
546
  def space
334
547
  elements[1]
@@ -3,7 +3,31 @@ grammar ScientificNameDirty
3
3
  include ScientificNameClean
4
4
 
5
5
  rule root
6
- super
6
+ space a:scientific_name_5 space {
7
+ def value
8
+ a.value.gsub(/\s{2,}/, ' ').strip
9
+ end
10
+
11
+ def canonical
12
+ a.canonical.gsub(/\s{2,}/, ' ').strip
13
+ end
14
+
15
+ def pos
16
+ a.pos
17
+ end
18
+
19
+ def hybrid
20
+ a.hybrid
21
+ end
22
+
23
+ def details
24
+ a.details.class == Array ? a.details : [a.details]
25
+ end
26
+
27
+ def parser_run
28
+ 2
29
+ end
30
+ }
7
31
  end
8
32
 
9
33
  rule scientific_name_5
@@ -90,6 +114,43 @@ grammar ScientificNameDirty
90
114
  super
91
115
  end
92
116
 
117
+ rule latin_word
118
+ a:[a-z\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž] b:valid_name_letters {
119
+ def value
120
+ res = ''
121
+ text_value.split('').each do |l|
122
+ l = 'ae' if l == 'æ'
123
+ l = 'oe' if l == 'œ'
124
+ res << l
125
+ end
126
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
127
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
128
+ end
129
+ }
130
+ end
131
+
132
+ rule valid_name_letters
133
+ [a-z\-ëæœàâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž]+ {
134
+ def value
135
+ res = ''
136
+ text_value.split('').each do |l|
137
+ l = 'ae' if l == 'æ'
138
+ l = 'oe' if l == 'œ'
139
+ res << l
140
+ end
141
+ res.tr('àâåãäáçčéèíìïňññóòôøõöúùürŕřŗššşž',
142
+ 'aaaaaacceeiiinnnoooooouuurrrrsssz')
143
+ end
144
+ }
145
+ end
146
+ rule valid_name_letters
147
+ [a-z\-ëüäöïéåóç]+ {
148
+ def value
149
+ text_value
150
+ end
151
+ }
152
+ end
153
+
93
154
  rule right_paren
94
155
  ")" space ")"
95
156
  /
@@ -34,6 +34,7 @@ class ScientificNameParser
34
34
  :canonical => self.canonical,
35
35
  :hybrid => hybrid,
36
36
  :details => self.details,
37
+ :parser_run => self.parser_run,
37
38
  :positions => self.pos
38
39
  })
39
40
  else
@@ -6,8 +6,7 @@ describe ScientificNameCanonical do
6
6
  before(:all) do
7
7
  set_parser(ScientificNameCanonicalParser.new)
8
8
  end
9
-
10
-
9
+
11
10
  it 'should parse names with valid name part and unparseable rest' do
12
11
  [
13
12
  ['Morea ssjjlajajaj324$33 234243242','Morea', [{:uninomial=>{:string=>"Morea"}}], {0=>["uninomial", 5]}],
@@ -97,6 +97,7 @@ describe ScientificNameClean do
97
97
  ["Leœptura laetifica Dow, 1913", "Leoeptura laetifica Dow 1913"],
98
98
  ['Ærenea cognata Lacordaire, 1872', 'Aerenea cognata Lacordaire 1872'],
99
99
  ['Œdicnemus capensis', 'Oedicnemus capensis'],
100
+ ['Œnanthæ œnanthe','Oenanthae oenanthe'],
100
101
  ['Œnanthe œnanthe','Oenanthe oenanthe']
101
102
  ]
102
103
  names.each do |name_pair|
@@ -105,19 +106,13 @@ describe ScientificNameClean do
105
106
  end
106
107
  end
107
108
 
108
- it 'should parse names with "common" utf-8 charactes' do
109
- names = ["Rühlella","Sténométope laevissimus Bibron 1855"].each do |name|
110
- parse(name).should_not be_nil
111
- end
112
- sn = "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
113
- parse(sn).should_not be_nil
114
- value(sn).should == "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
115
- canonical(sn).should == "Trematosphaeria phaeospora"
116
- details(sn).should == [{:genus=>{:string=>"Trematosphaeria"}, :species=>{:string=>"phaeospora", :authorship=>"(E. Müll.) L. Holm 1957", :combinationAuthorTeam=>{:authorTeam=>"L. Holm", :author=>["L. Holm"], :year=>"1957"}, :basionymAuthorTeam=>{:authorTeam=>"E. Müll.", :author=>["E. Müll."]}}}]
117
- pos(sn).should == {0=>["genus", 15], 16=>["species", 26], 28=>["author_word", 30], 31=>["author_word", 36], 46=>["author_word", 48], 61=>["author_word", 65], 66=>["year", 70]}
118
-
109
+ it 'should parse names with e-umlaut' do
110
+ sn = 'Kalanchoë tuberosa'
111
+ canonical(sn).should == 'Kalanchoë tuberosa'
112
+ sn = 'Isoëtes asplundii H. P. Fuchs'
113
+ canonical(sn).should == 'Isoëtes asplundii'
119
114
  end
120
-
115
+
121
116
  it 'should parse infragenus (ICZN code)' do
122
117
  sn = "Hegeter (Hegeter) intercedens Lindberg H 1950"
123
118
  parse(sn).should_not be_nil
@@ -298,6 +293,8 @@ describe ScientificNameClean do
298
293
  sn = 'Latrodectus 3guttatus Thorell, 1875'
299
294
  canonical(sn).should == 'Latrodectus 3-guttatus'
300
295
  value(sn).should == 'Latrodectus 3-guttatus Thorell 1875'
296
+ sn = 'Balaninus c-album Schönherr, CJ., 1836'
297
+ canonical(sn).should == 'Balaninus c-album'
301
298
  end
302
299
 
303
300
  it "should parse name with morph." do
@@ -448,13 +445,13 @@ describe ScientificNameClean do
448
445
  end
449
446
 
450
447
  it 'should parse names with taxon concept' do
451
- sn = "Sténométope laevissimus sec. Eschmeyer 2004"
448
+ sn = "Stenometope laevissimus sec. Eschmeyer 2004"
452
449
  val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s().gsub(/column /,'')
453
- details(sn).should == [{:genus=>{:string=>"Sténométope"}, :species=>{:string=>"laevissimus"}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
450
+ details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus"}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
454
451
  pos(sn).should == {0=>["genus", 11], 12=>["species", 23], 29=>["author_word", 38], 39=>["year", 43]}
455
- sn = "Sténométope laevissimus Bibron 1855 sec. Eschmeyer 2004"
452
+ sn = "Stenometope laevissimus Bibron 1855 sec. Eschmeyer 2004"
456
453
  parse(sn).should_not be_nil
457
- details(sn).should == [{:genus=>{:string=>"Sténométope"}, :species=>{:string=>"laevissimus", :authorship=>"Bibron 1855", :basionymAuthorTeam=>{:authorTeam=>"Bibron", :author=>["Bibron"], :year=>"1855"}}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
454
+ details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus", :authorship=>"Bibron 1855", :basionymAuthorTeam=>{:authorTeam=>"Bibron", :author=>["Bibron"], :year=>"1855"}}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
458
455
  pos(sn).should == {0=>["genus", 11], 12=>["species", 23], 24=>["author_word", 30], 31=>["year", 35], 41=>["author_word", 50], 51=>["year", 55]}
459
456
  end
460
457
 
@@ -462,6 +459,15 @@ describe ScientificNameClean do
462
459
  parse(" Asplenium X inexpectatum (E.L. Braun 1940) Morton (1956) ").should_not be_nil
463
460
  end
464
461
 
462
+ it 'should parse names with any number of spaces' do
463
+ sn = "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
464
+ parse(sn).should_not be_nil
465
+ value(sn).should == "Trematosphaeria phaeospora (E. Müll.) L. Holm 1957"
466
+ canonical(sn).should == "Trematosphaeria phaeospora"
467
+ details(sn).should == [{:genus=>{:string=>"Trematosphaeria"}, :species=>{:string=>"phaeospora", :authorship=>"(E. Müll.) L. Holm 1957", :combinationAuthorTeam=>{:authorTeam=>"L. Holm", :author=>["L. Holm"], :year=>"1957"}, :basionymAuthorTeam=>{:authorTeam=>"E. Müll.", :author=>["E. Müll."]}}}]
468
+ pos(sn).should == {0=>["genus", 15], 16=>["species", 26], 28=>["author_word", 30], 31=>["author_word", 36], 46=>["author_word", 48], 61=>["author_word", 65], 66=>["year", 70]}
469
+ end
470
+
465
471
  it 'should not parse serveral authors groups with several years NOT CORRECT' do
466
472
  parse("Pseudocercospora dendrobii (H.C. Burnett 1883) (Leight.) (Movss. 1967) U. Braun & Crous 2003").should be_nil
467
473
  end
@@ -469,23 +475,17 @@ describe ScientificNameClean do
469
475
  it "should not parse unallowed utf-8 chars in name part" do
470
476
  parse("Érematosphaeria phaespora").should be_nil
471
477
  parse("Trematosphaeria phaeáapora").should be_nil
472
- parse("Trematоsphaeria phaeáapora").should be_nil #cyrillic o
478
+ parse("Trematоsphaeria phaeaapora").should be_nil #cyrillic o
473
479
  end
474
480
 
475
481
  it "should parse new stuff" do
476
482
  sn = 'Nesticus quelpartensis Paik & Namkung, in Paik, Yaginuma & Namkung, 1969'
477
483
  details(sn).should == [{:genus=>{:string=>"Nesticus"}, :species=>{:string=>"quelpartensis", :authorship=>"Paik & Namkung, in Paik, Yaginuma & Namkung, 1969", :basionymAuthorTeam=>{:authorTeam=>"Paik & Namkung", :author=>["Paik", "Namkung"], :exAuthorTeam=>{:authorTeam=>"Paik, Yaginuma & Namkung", :author=>["Paik", "Yaginuma", "Namkung"], :year=>"1969"}}}}]
478
484
  parse('Dipoena yoshidai Ono, in Ono et al., 1991').should_not be_nil
479
- sn = 'Choriozopella trägårdhi Lawrence, 1947'
480
- details(sn).should == [{:genus=>{:string=>"Choriozopella"}, :species=>{:string=>"trägårdhi", :authorship=>"Lawrence, 1947", :basionymAuthorTeam=>{:authorTeam=>"Lawrence", :author=>["Lawrence"], :year=>"1947"}}}]
481
485
  sn = 'Latrodectus mactans bishopi Kaston, 1938'
482
486
  details(sn).should == [{:genus=>{:string=>"Latrodectus"}, :species=>{:string=>"mactans"}, :infraspecies=>[{:string=>"bishopi", :rank=>"n/a", :authorship=>"Kaston, 1938", :basionymAuthorTeam=>{:authorTeam=>"Kaston", :author=>["Kaston"], :year=>"1938"}}]}]
483
487
  sn = 'Diplocephalus aff. procerus Thaler, 1972'
484
488
  details(sn).should == [{:genus=>{:string=>"Diplocephalus"}, :species=>{:string=>"procerus", :authorship=>"Thaler, 1972", :basionymAuthorTeam=>{:authorTeam=>"Thaler", :author=>["Thaler"], :year=>"1972"}}}]
485
- sn = 'Dyarcyops birói Kulczynski, 1908'
486
- details(sn).should == [{:genus=>{:string=>"Dyarcyops"}, :species=>{:string=>"birói", :authorship=>"Kulczynski, 1908", :basionymAuthorTeam=>{:authorTeam=>"Kulczynski", :author=>["Kulczynski"], :year=>"1908"}}}]
487
- sn = 'Sparassus françoisi Simon, 1898'
488
- details(sn).should == [{:genus=>{:string=>"Sparassus"}, :species=>{:string=>"françoisi", :authorship=>"Simon, 1898", :basionymAuthorTeam=>{:authorTeam=>"Simon", :author=>["Simon"], :year=>"1898"}}}]
489
489
  sn = 'Thiobacillus x Parker and Prisk 1953' #have to figure out black lists for this one
490
490
  sn = 'Bacille de Plaut, Kritchevsky and Séguin 1921'
491
491
  details(sn).should == [{:uninomial=>{:string=>"Bacille", :authorship=>"de Plaut, Kritchevsky and Séguin 1921", :basionymAuthorTeam=>{:authorTeam=>"de Plaut, Kritchevsky and Séguin", :author=>["de Plaut", "Kritchevsky", "Séguin"], :year=>"1921"}}}]
@@ -501,4 +501,26 @@ describe ScientificNameClean do
501
501
  details(sn).should == [{:genus=>{:string=>"Flexibacter"}, :species=>{:string=>"elegans", :authorship=>"Soriano 1945, non Lewin 1969", :basionymAuthorTeam=>{:authorTeam=>"Soriano", :author=>["Soriano"], :year=>"1945"}}}]
502
502
  end
503
503
 
504
+ # it 'should parse hybrid names with capitalized second name in genus (botanical code error)' do
505
+ # sn = 'Anacampti-Platanthera P. Fourn.'
506
+ # @parser.parse(sn)
507
+ # puts @parser.failure_reason
508
+ # parse(sn).should_not be_nil
509
+ # canonical(sn).should == 'Anacamptiplatanthera'
510
+ # sn = 'Anacampti-Platanthera vulgaris P. Fourn.'
511
+ # parse(sn).should_not be_nil
512
+ # canonical(sn).should == 'Anacamptiplatanthera'
513
+ # end
514
+
515
+ # it 'shoud parse hybrid names with * character' do
516
+ # sn = "Carduus acanthoides * crispus"
517
+ # details(sn).should == ''
518
+ # end
519
+
520
+ it 'should parse genus names starting with uppercase letters AE OE' do
521
+ sn = 'AEmona separata Broun 1921'
522
+ canonical(sn).should == 'Aemona separata'
523
+ sn = 'OEmona simplex White, 1855'
524
+ canonical(sn).should == 'Oemona simplex'
525
+ end
504
526
  end
@@ -7,7 +7,6 @@ describe ScientificNameDirty do
7
7
  set_parser(ScientificNameDirtyParser.new)
8
8
  end
9
9
 
10
-
11
10
  it 'should parse clean names' do
12
11
  parse("Betula verucosa (L.) Bar. 1899").should_not be_nil
13
12
  end
@@ -85,6 +84,23 @@ describe ScientificNameDirty do
85
84
  details('Oscillaria caviae Simons 1920, according to Simons 1922').should == [{:genus=>{:string=>"Oscillaria"}, :species=>{:string=>"caviae", :authorship=>"Simons 1920", :basionymAuthorTeam=>{:authorTeam=>"Simons", :author=>["Simons"], :year=>"1920"}}}]
86
85
  sn = 'Bacterium monocytogenes hominis"" Nyfeldt 1932'
87
86
  details(sn).should == [{:genus=>{:string=>"Bacterium"}, :species=>{:string=>"monocytogenes"}, :infraspecies=>[{:string=>"hominis", :rank=>"n/a"}]}]
87
+ sn = 'Choriozopella trägårdhi Lawrence, 1947'
88
+ details(sn).should == [{:genus=>{:string=>"Choriozopella"}, :species=>{:string=>"tragardhi", :authorship=>"Lawrence, 1947", :basionymAuthorTeam=>{:authorTeam=>"Lawrence", :author=>["Lawrence"], :year=>"1947"}}}]
89
+ sn = 'Sparassus françoisi Simon, 1898'
90
+ details(sn).should == [{:genus=>{:string=>"Sparassus"}, :species=>{:string=>"francoisi", :authorship=>"Simon, 1898", :basionymAuthorTeam=>{:authorTeam=>"Simon", :author=>["Simon"], :year=>"1898"}}}]
91
+ sn = 'Dyarcyops birói Kulczynski, 1908'
92
+ details(sn).should == [{:genus=>{:string=>"Dyarcyops"}, :species=>{:string=>"biroi", :authorship=>"Kulczynski, 1908", :basionymAuthorTeam=>{:authorTeam=>"Kulczynski", :author=>["Kulczynski"], :year=>"1908"}}}]
93
+ end
94
+
95
+ it 'should parse names with "common" utf-8 charactes' do
96
+ names = ["Rühlella","Sténométope laevissimus Bibron 1855"].each do |name|
97
+ parse(name).should_not be_nil
98
+ end
88
99
  end
89
100
 
101
+ # AsterophUa japonica
102
+ # AsyTuktus ridiculw Parent 1931
103
+ # AtremOEa Staud 1870
104
+
105
+
90
106
  end