biodiversity19 0.5.16 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,3 +1,5 @@
1
+ lib/biodiversity/parser/*rb
2
+ *.gemspec
1
3
  *.sw?
2
4
  .DS_Store
3
5
  coverage
data/README.rdoc CHANGED
@@ -1,17 +1,17 @@
1
1
  = Biodiversity
2
2
 
3
- Parses species scientific name and breaks it into elements.
3
+ Parses taxonomic scientific name and breaks it into semantic elements.
4
4
 
5
5
  == Installation
6
6
 
7
- To install gem you need RubyGems >= 1.2.0
7
+ To install gem you need RubyGems >= 1.3.6
8
8
 
9
- $ gem sources -a http://gems.github.com (you only have to do this once)
10
- $ sudo gem install dimus-biodiversity
9
+ $ sudo gem install biodiversity #for ruby 1.8.x
10
+ $ sudo gem install biodiversity19 #for ruby 1.9.x
11
11
 
12
12
  == Example usage
13
13
 
14
- You can parse file with species names from command line. File should contain one scientific name per line
14
+ You can parse file with taxonomic names from command line. File should contain one scientific name per line
15
15
 
16
16
  nnparser file_with_names
17
17
 
data/Rakefile CHANGED
@@ -13,11 +13,13 @@ Spec::Rake::SpecTask.new do |t|
13
13
  t.pattern = 'spec/**/*spec.rb'
14
14
  end
15
15
 
16
+ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
17
+
16
18
 
17
19
  begin
18
20
  require 'jeweler'
19
21
  Jeweler::Tasks.new do |gem|
20
- gem.name = "biodiversity19"
22
+ gem.name = ruby_version < 19 ? "biodiversity" : "biodiversity19"
21
23
  gem.summary = 'Parser of scientific names'
22
24
  gem.description = 'Tools for biodiversity informatics'
23
25
  gem.email = "dmozzherin@gmail.com"
@@ -37,11 +39,14 @@ end
37
39
 
38
40
  task :tt do
39
41
  ['scientific_name_clean', 'scientific_name_dirty', 'scientific_name_canonical'].each do |f|
40
- system("tt #{dir}/lib/biodiversity/parser/#{f}.treetop")
41
- rf = "#{dir}/lib/biodiversity/parser/#{f}.rb"
42
+ file = "#{dir}/lib/biodiversity/parser/#{f}"
43
+ FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
44
+ system("tt #{file}.treetop")
45
+ rf = "#{file}.rb"
42
46
  rfn = open(rf + ".tmp", 'w')
43
47
  skip_head = false
44
48
  f = open(rf)
49
+ #getting around a bug in treetop which prevents setting UTF-8 encoding in ruby19
45
50
  f.each_with_index do |l, i|
46
51
  skip_head = l.match(/^# Autogenerated/) if i == 0
47
52
  if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.16
1
+ 0.6.0
data/bin/nnparse CHANGED
@@ -1,11 +1,15 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'rubygems'
3
- gem 'biodiversity' rescue nil
3
+ gem_name = RUBY_VERSION.split('.')[0..1].join('').to_i > 18 ? 'biodiversity19' : 'biodiversity'
4
+ gem gem_name rescue nil
4
5
 
5
6
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
6
7
  require 'biodiversity'
7
8
  require 'json'
8
9
 
10
+ def parser_error(name)
11
+ {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
12
+ end
9
13
 
10
14
  if ARGV.empty?
11
15
  puts "Usage:\n\nnnparse file_with_scientific_names [output_file]\n\ndefault output_file is parsed.json\n\n"
@@ -31,12 +35,12 @@ IO.foreach(input) do |line|
31
35
  $KCODE = 'NONE'
32
36
  end
33
37
  p.parse(name)
34
- parsed_data = p.parsed.all_json rescue {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
38
+ parsed_data = p.parsed.all_json rescue parser_error(name)
35
39
  if ruby_min_version < 19
36
40
  $KCODE = old_kcode
37
41
  end
38
42
  rescue
39
- parsed_data = {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
43
+ parsed_data = parser_error(name)
40
44
  end
41
45
  o.write parsed_data + "\n"
42
46
  end
data/bin/parserver CHANGED
@@ -2,6 +2,7 @@
2
2
  require 'rubygems'
3
3
  require 'socket'
4
4
  require 'biodiversity' # Get sockets from stdlib
5
+ puts "Running parser service on port 4334"
5
6
  parser = ScientificNameParser.new
6
7
  server = TCPServer.open(4334) # Socket to listen on port 4334
7
8
  loop do # Servers run forever
@@ -30,6 +30,28 @@ grammar ScientificNameClean
30
30
  end
31
31
 
32
32
  rule scientific_name_5
33
+ a:multinomial_name space_hard hybrid_character space_hard b:species {
34
+ def value
35
+ a.value + " × " + b.value
36
+ end
37
+
38
+ def canonical
39
+ a.canonical + " × " + b.canonical
40
+ end
41
+
42
+ def pos
43
+ a.pos.merge(b.pos)
44
+ end
45
+
46
+ def hybrid
47
+ true
48
+ end
49
+
50
+ def details
51
+ [a.details, b.details.merge({:genus => a.details[:genus]})]
52
+ end
53
+ }
54
+ /
33
55
  a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
34
56
  def value
35
57
  a.value + " " + b.apply(c)
@@ -62,7 +84,7 @@ grammar ScientificNameClean
62
84
  end
63
85
 
64
86
  def canonical
65
- a.canonical + " " + b.canonical
87
+ a.canonical + " × " + b.canonical
66
88
  end
67
89
 
68
90
  def pos
@@ -196,7 +218,7 @@ grammar ScientificNameClean
196
218
  end
197
219
 
198
220
  def canonical
199
- a.canonical + " " + b.canonical + " " + c.canonical + " " + d.canonical
221
+ a.canonical + " " + c.canonical + " " + d.canonical
200
222
  end
201
223
 
202
224
  def pos
@@ -381,7 +403,7 @@ grammar ScientificNameClean
381
403
  end
382
404
 
383
405
  rule rank
384
- ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
406
+ ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var"/"subsp."/"subsp"/"subf."/"race"/"α"
385
407
  /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
386
408
  {
387
409
  def value
@@ -405,7 +427,7 @@ grammar ScientificNameClean
405
427
  end
406
428
 
407
429
  rule rank_forma
408
- ("forma"/"form."/"fo."/"f.")
430
+ ("forma"/"form."/"form"/"fo."/"f.")
409
431
  {
410
432
  def value
411
433
  "f."
@@ -449,28 +471,28 @@ grammar ScientificNameClean
449
471
  end
450
472
 
451
473
  rule species_string
452
- a:species_word &(space_hard author_prefix_word space_hard) {
453
- def value
454
- a.value
455
- end
456
-
457
- def canonical
458
- a.value
459
- end
460
-
461
- def hybrid
462
- a.hybrid rescue false
463
- end
464
-
465
- def pos
466
- {a.interval.begin => ['species', a.interval.end]}
467
- end
468
-
469
- def details
470
- {:species => {:string => a.value}}
471
- end
472
- }
473
- /
474
+ # a:species_word &(space_hard author_prefix_word space_hard) {
475
+ # def value
476
+ # a.value
477
+ # end
478
+ #
479
+ # def canonical
480
+ # a.value
481
+ # end
482
+ #
483
+ # def hybrid
484
+ # a.hybrid rescue false
485
+ # end
486
+ #
487
+ # def pos
488
+ # {a.interval.begin => ['species', a.interval.end]}
489
+ # end
490
+ #
491
+ # def details
492
+ # {:species => {:string => a.value}}
493
+ # end
494
+ # }
495
+ # /
474
496
  species_word {
475
497
  def canonical
476
498
  value
@@ -493,7 +515,7 @@ grammar ScientificNameClean
493
515
  end
494
516
 
495
517
  rule infragenus
496
- left_paren space a:cap_latin_word space right_paren {
518
+ left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
497
519
  def value
498
520
  "(" + a.value + ")"
499
521
  end
@@ -513,7 +535,7 @@ grammar ScientificNameClean
513
535
  end
514
536
 
515
537
  rule genus
516
- a:(cap_latin_word_pair/cap_latin_word) !(space_hard author_prefix_word space_hard author_word) {
538
+ a:uninomial_string !(space_hard author_prefix_word space_hard author_word) {
517
539
  def value
518
540
  a.value
519
541
  end
@@ -533,6 +555,50 @@ grammar ScientificNameClean
533
555
  end
534
556
 
535
557
  rule uninomial_name
558
+ a:uninomial_string space b:infragenus space c:simple_authorship {
559
+ def value
560
+ a.value + " " + b.value + " " + c.value
561
+ end
562
+
563
+ def canonical
564
+ a.canonical
565
+ end
566
+
567
+ def pos
568
+ a.pos.merge(b.pos).merge(c.pos)
569
+ end
570
+
571
+ def hybrid
572
+ false
573
+ end
574
+
575
+ def details
576
+ {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
577
+ end
578
+ }
579
+ /
580
+ a:uninomial_string space b:infragenus {
581
+ def value
582
+ a.value + " " + b.value
583
+ end
584
+
585
+ def canonical
586
+ a.canonical
587
+ end
588
+
589
+ def pos
590
+ a.pos.merge(b.pos)
591
+ end
592
+
593
+ def hybrid
594
+ false
595
+ end
596
+
597
+ def details
598
+ {:uninomial => a.details[:uninomial].merge(b.details)}
599
+ end
600
+ }
601
+ /
536
602
  a:uninomial_string space_hard b:authorship {
537
603
  def value
538
604
  a.value + " " + b.value
@@ -799,7 +865,7 @@ grammar ScientificNameClean
799
865
 
800
866
 
801
867
  rule unknown_auth
802
- ("auct."/"hort."/"anon."/"ht.") {
868
+ ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") {
803
869
  def value
804
870
  text_value
805
871
  end
@@ -837,7 +903,7 @@ grammar ScientificNameClean
837
903
  end
838
904
 
839
905
  rule author_name
840
- space a:author_prefix_word space b:author_name space {
906
+ space a:author_prefix_word space b:author_name {
841
907
  def value
842
908
  a.value + " " + b.value
843
909
  end
@@ -851,7 +917,7 @@ grammar ScientificNameClean
851
917
  end
852
918
  }
853
919
  /
854
- space a:author_word space b:author_name space {
920
+ a:author_word space b:author_name {
855
921
  def value
856
922
  a.value + " " + b.value
857
923
  end
@@ -883,7 +949,7 @@ grammar ScientificNameClean
883
949
  end
884
950
  }
885
951
  /
886
- ("arg."/"et al.\{\?\}"/"et al.") {
952
+ ("arg."/"et al.\{\?\}"/"et al."/"et al") {
887
953
  def value
888
954
  text_value.strip
889
955
  end
@@ -930,7 +996,7 @@ grammar ScientificNameClean
930
996
  end
931
997
 
932
998
  rule author_prefix_word
933
- space ("ab"/"bis"/"da"/"der"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
999
+ space ("ab"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
934
1000
  def value
935
1001
  text_value
936
1002
  end
@@ -976,6 +1042,14 @@ grammar ScientificNameClean
976
1042
  }
977
1043
  end
978
1044
 
1045
+ rule capped_dotted_char
1046
+ [A-Z] "." {
1047
+ def value
1048
+ text_value
1049
+ end
1050
+ }
1051
+ end
1052
+
979
1053
  rule species_word_hybrid
980
1054
  a:multiplication_sign space b:species_word {
981
1055
  def value
@@ -1051,7 +1125,9 @@ grammar ScientificNameClean
1051
1125
  rule species_word
1052
1126
  a:[0-9]+ "-"? b:latin_word {
1053
1127
  def value
1054
- a.text_value + "-" + b.value
1128
+ num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
1129
+ a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
1130
+ a_value + b.value
1055
1131
  end
1056
1132
  }
1057
1133
  /
@@ -1059,18 +1135,21 @@ grammar ScientificNameClean
1059
1135
  end
1060
1136
 
1061
1137
  rule latin_word
1062
- a:[a-zëæœ] b:valid_name_letters {
1138
+ a:valid_name_letters "-" b:latin_word {
1139
+ def value
1140
+ a.value + "-" + b.value
1141
+ end
1142
+ }
1143
+ /
1144
+ a:valid_name_letter b:valid_name_letters {
1063
1145
  def value
1064
- l = a.text_value
1065
- l = 'ae' if l == 'æ'
1066
- l = 'oe' if l == 'œ'
1067
- l + b.value
1146
+ a.value + b.value
1068
1147
  end
1069
1148
  }
1070
1149
  end
1071
1150
 
1072
1151
  rule valid_name_letters
1073
- [a-z\-ëæœ]+ {
1152
+ [a-zëæœ]+ {
1074
1153
  def value
1075
1154
  res = ''
1076
1155
  text_value.split('').each do |l|
@@ -1086,6 +1165,18 @@ grammar ScientificNameClean
1086
1165
  }
1087
1166
  end
1088
1167
 
1168
+ rule valid_name_letter
1169
+ [a-zëæœ] {
1170
+ def value
1171
+ res = text_value
1172
+ res = 'ae' if res == 'æ'
1173
+ res = 'oe' if res == 'œ'
1174
+ res
1175
+ end
1176
+ }
1177
+ end
1178
+
1179
+
1089
1180
  rule cap_digraph
1090
1181
  "Æ" {
1091
1182
  def value
@@ -6,6 +6,35 @@ require File.join(dir, *%w[parser scientific_name_canonical])
6
6
  require 'rubygems'
7
7
  require 'json'
8
8
 
9
+ module PreProcessor
10
+ NOTES = /\s+(species\s+group|species\s+complex|group|author)\b.*$/i
11
+ TAXON_CONCEPTS1 = /\s+(sensu\.|sensu|auct\.|auct)\b.*$/i
12
+ TAXON_CONCEPTS2 = /\s+(\(?s\.\s?s\.|\(?s\.\s?l\.|\(?s\.\s?str\.|\(?s\.\s?lat\.|sec\.|sec|near)\b.*$/
13
+ TAXON_CONCEPTS3 = /(,\s*|\s+)(pro parte|p.\s?p.)\s*$/i
14
+ NOMEN_CONCEPTS = /(,\s*|\s+)(\(?nomen|\(?nom\.|\(?comb\.).*$/i
15
+ LAST_WORD_JUNK = /(,\s*|\s+)(von|van|sensu|new|non|nec|cf|ssp|subsp|subgen|hybrid|hort.|hort)\s*$/i
16
+
17
+ def self.clean(a_string)
18
+ [NOTES, TAXON_CONCEPTS1, TAXON_CONCEPTS2, TAXON_CONCEPTS3, NOMEN_CONCEPTS, LAST_WORD_JUNK].each do |i|
19
+ a_string = a_string.gsub(i, '')
20
+ end
21
+ a_string = a_string.tr('ſ','s') #old 's'
22
+ a_string
23
+ end
24
+ end
25
+
26
+ # we can use these expressions when we are ready to parse virus names
27
+ # class VirusParser
28
+ # def initialize
29
+ # @order = /^\s*[A-Z][a-z]\+virales/i
30
+ # @family = /^\s*[A-Z][a-z]\+viridae|viroidae/i
31
+ # @subfamily = /^\s*[A-Z][a-z]\+virinae|viroinae/i
32
+ # @genus = /^\s*[A-Z][a-z]\+virus|viroid/i
33
+ # @species = /^\s*[A-z0-9u0391-u03C9\[\] ]\+virus|phage|viroid|satellite|prion[A-z0-9u0391-u03C9\[\] ]\+/i
34
+ # @parsed = nil
35
+ # end
36
+ # end
37
+
9
38
  class ScientificNameParser
10
39
 
11
40
  def initialize
@@ -15,21 +44,36 @@ class ScientificNameParser
15
44
  @canonical = ScientificNameCanonicalParser.new
16
45
  @parsed = nil
17
46
  end
18
-
47
+
48
+ def virus?(a_string)
49
+ !!(a_string.match(/\sICTV\s*$/) || a_string.match(/\s(virus|phage|viroid|satellite|prion)\b/i))
50
+ end
51
+
19
52
  def parsed
20
53
  @parsed
21
54
  end
22
55
 
23
56
  def parse(a_string)
24
57
  @verbatim = a_string
25
- @parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || {:verbatim => a_string}
26
- def @parsed.all
58
+ a_string = PreProcessor::clean(a_string)
59
+
60
+ if virus?(a_string)
61
+ @parsed = { :verbatim => a_string, :virus => true }
62
+ else
63
+ @parsed = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || { :verbatim => a_string }
64
+ end
65
+
66
+ def @parsed.verbatim=(a_string)
67
+ @verbatim = a_string
68
+ end
69
+
70
+ def @parsed.all(verbatim = @verbatim)
27
71
  parsed = self.class != Hash
28
72
  res = {:parsed => parsed}
29
73
  if parsed
30
74
  hybrid = self.hybrid rescue false
31
75
  res.merge!({
32
- :verbatim => self.text_value,
76
+ :verbatim => @verbatim,
33
77
  :normalized => self.value,
34
78
  :canonical => self.canonical,
35
79
  :hybrid => hybrid,
@@ -51,7 +95,8 @@ class ScientificNameParser
51
95
  def @parsed.all_json
52
96
  self.all.to_json rescue ''
53
97
  end
54
-
98
+
99
+ @parsed.verbatim = @verbatim
55
100
  @parsed.all
56
101
  end
57
102
  end
@@ -98,7 +98,8 @@ describe ScientificNameClean do
98
98
  ['Ærenea cognata Lacordaire, 1872', 'Aerenea cognata Lacordaire 1872'],
99
99
  ['Œdicnemus capensis', 'Oedicnemus capensis'],
100
100
  ['Œnanthæ œnanthe','Oenanthae oenanthe'],
101
- ['Œnanthe œnanthe','Oenanthe oenanthe']
101
+ ['Œnanthe œnanthe','Oenanthe oenanthe'],
102
+ ['Cerambyx thomæ Gmelin J. F., 1790', 'Cerambyx thomae Gmelin J. F. 1790']
102
103
  ]
103
104
  names.each do |name_pair|
104
105
  parse(name_pair[0]).should_not be_nil
@@ -120,6 +121,18 @@ describe ScientificNameClean do
120
121
  canonical(sn).should == "Hegeter intercedens"
121
122
  details(sn).should == [{:genus=>{:string=>"Hegeter"}, :infragenus=>{:string=>"Hegeter"}, :species=>{:string=>"intercedens", :authorship=>"Lindberg H 1950", :basionymAuthorTeam=>{:authorTeam=>"Lindberg H", :author=>["Lindberg H"], :year=>"1950"}}}]
122
123
  pos(sn).should == {0=>["genus", 7], 9=>["infragenus", 16], 18=>["species", 29], 30=>["author_word", 38], 39=>["author_word", 40], 41=>["year", 45]}
124
+ sn = "Ixodes (Ixodes) hexagonus hexagonus Neumann, 1911"
125
+ canonical(sn).should == "Ixodes hexagonus hexagonus"
126
+ sn = "Brachytrypus (B.) grandidieri"
127
+ canonical(sn).should == "Brachytrypus grandidieri"
128
+ details(sn).should == [{:genus=>{:string=>"Brachytrypus"}, :infragenus=>{:string=>"B."}, :species=>{:string=>"grandidieri"}}]
129
+ sn = "Empis (Argyrandrus) Bezzi 1909"
130
+ details(sn).should == [{:uninomial=>{:string=>"Empis", :infragenus=>{:string=>"Argyrandrus"}, :authorship=>"Bezzi 1909", :basionymAuthorTeam=>{:authorTeam=>"Bezzi", :author=>["Bezzi"], :year=>"1909"}}}]
131
+ sn = "Platydoris (Bergh )"
132
+ details(sn).should == [{:uninomial=>{:string=>"Platydoris", :infragenus=>{:string=>"Bergh"}}}]
133
+ value(sn).should == "Platydoris (Bergh)"
134
+ sn = "Platydoris (B.)"
135
+ details(sn).should == [{:uninomial=>{:string=>"Platydoris", :infragenus=>{:string=>"B."}}}]
123
136
  end
124
137
 
125
138
  it 'should parse several authors without a year' do
@@ -219,6 +232,8 @@ describe ScientificNameClean do
219
232
  value(sn).should == "Phaeographis inusta var. macularis (Leight.) A.L. Sm. 1861"
220
233
  canonical(sn).should == "Phaeographis inusta macularis"
221
234
  pos(sn).should == {0=>["genus", 12], 13=>["species", 19], 25=>["infraspecies", 34], 35=>["author_word", 42], 44=>["author_word", 48], 49=>["author_word", 52], 53=>["year", 57]}
235
+ sn = "Cassytha peninsularis J. Z. Weber var. flindersii"
236
+ canonical(sn).should == "Cassytha peninsularis flindersii"
222
237
  end
223
238
 
224
239
  it 'should parse unknown original authors (auct.)/(hort.)/(?)' do
@@ -239,7 +254,7 @@ describe ScientificNameClean do
239
254
  pos(sn).should == {0=>["genus", 4], 5=>["species", 10], 11=>["unknown_author", 14]}
240
255
  end
241
256
 
242
- it 'shuould parse real world examples' do
257
+ it 'should parse real world examples' do
243
258
  sn = "Stagonospora polyspora M.T. Lucas & Sousa da Câmara 1934"
244
259
  parse(sn).should_not be_nil
245
260
  value(sn).should == "Stagonospora polyspora M.T. Lucas et Sousa da Câmara 1934"
@@ -283,16 +298,16 @@ describe ScientificNameClean do
283
298
  sn = "Gastrosericus eremorum von Beaumont 1955"
284
299
  canonical(sn).should == 'Gastrosericus eremorum'
285
300
  sn = "Cypraeovula (Luponia) amphithales perdentata"
286
- canonical(sn).should == 'Cypraeovula Luponia amphithales perdentata'
301
+ canonical(sn).should == 'Cypraeovula amphithales perdentata'
287
302
  details(sn).should == [{:genus=>{:string=>"Cypraeovula"}, :infragenus=>{:string=>"Luponia"}, :species=>{:string=>"amphithales"}, :infraspecies=>[{:string=>"perdentata", :rank=>"n/a"}]}]
288
303
  sn = "Polyrhachis orsyllus nat musculus Forel 1901"
289
304
  canonical(sn).should == "Polyrhachis orsyllus musculus"
290
305
  sn = 'Latrodectus 13-guttatus Thorell, 1875'
291
- canonical(sn).should == 'Latrodectus 13-guttatus'
292
- value(sn).should == 'Latrodectus 13-guttatus Thorell 1875'
293
- sn = 'Latrodectus 3guttatus Thorell, 1875'
294
- canonical(sn).should == 'Latrodectus 3-guttatus'
295
- value(sn).should == 'Latrodectus 3-guttatus Thorell 1875'
306
+ canonical(sn).should == 'Latrodectus tredecguttatus'
307
+ value(sn).should == 'Latrodectus tredecguttatus Thorell 1875'
308
+ sn = 'Latrodectus 3-guttatus Thorell, 1875'
309
+ canonical(sn).should == 'Latrodectus triguttatus'
310
+ value(sn).should == 'Latrodectus triguttatus Thorell 1875'
296
311
  sn = 'Balaninus c-album Schönherr, CJ., 1836'
297
312
  canonical(sn).should == 'Balaninus c-album'
298
313
  end
@@ -353,7 +368,7 @@ describe ScientificNameClean do
353
368
  parse(sn).should_not be_nil
354
369
  value(sn).should == "Arthopyrenia hyalospora (Nyl.) R.C. Harris comb. nov."
355
370
  canonical(sn).should == "Arthopyrenia hyalospora"
356
- details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.) R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris ", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}, :status=>"comb. nov."}]
371
+ details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora", :authorship=>"(Nyl.) R.C. Harris", :combinationAuthorTeam=>{:authorTeam=>"R.C. Harris", :author=>["R.C. Harris"]}, :basionymAuthorTeam=>{:authorTeam=>"Nyl.", :author=>["Nyl."]}}, :status=>"comb. nov."}]
357
372
  pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 29], 31=>["author_word", 35], 36=>["author_word", 42]}
358
373
  end
359
374
 
@@ -414,6 +429,9 @@ describe ScientificNameClean do
414
429
  parse(res[0]).hybrid.should be_true
415
430
  details(res[0]).should == res[1]
416
431
  end
432
+ sn = "Rosa alpina x pomifera"
433
+ canonical(sn).should == "Rosa alpina × pomifera"
434
+ parse(sn).details.should == [{:genus=>{:string=>"Rosa"}, :species=>{:string=>"alpina"}}, {:species=>{:string=>"pomifera"}, :genus=>{:string=>"Rosa"}}]
417
435
  end
418
436
 
419
437
  it "should parse hybrid combination" do
@@ -421,14 +439,14 @@ describe ScientificNameClean do
421
439
  parse(sn).should_not be_nil
422
440
  parse(sn).hybrid.should be_true
423
441
  value(sn).should == "Arthopyrenia hyalospora \303\227 Hydnellum scrobiculatum"
424
- canonical(sn).should == "Arthopyrenia hyalospora Hydnellum scrobiculatum"
442
+ canonical(sn).should == "Arthopyrenia hyalospora × Hydnellum scrobiculatum"
425
443
  details(sn).should == [{:genus=>{:string=>"Arthopyrenia"}, :species=>{:string=>"hyalospora"}}, {:genus=>{:string=>"Hydnellum"}, :species=>{:string=>"scrobiculatum"}}]
426
444
  pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 26=>["genus", 35], 36=>["species", 49]}
427
445
  sn = "Arthopyrenia hyalospora (Banker) D. Hall X Hydnellum scrobiculatum D.E. Stuntz"
428
446
  parse(sn).should_not be_nil
429
447
  parse(sn).hybrid.should be_true
430
448
  value(sn).should == "Arthopyrenia hyalospora (Banker) D. Hall \303\227 Hydnellum scrobiculatum D.E. Stuntz"
431
- canonical(sn).should == "Arthopyrenia hyalospora Hydnellum scrobiculatum"
449
+ canonical(sn).should == "Arthopyrenia hyalospora × Hydnellum scrobiculatum"
432
450
  pos(sn).should == {0=>["genus", 12], 13=>["species", 23], 25=>["author_word", 31], 33=>["author_word", 35], 36=>["author_word", 40], 43=>["genus", 52], 53=>["species", 66], 67=>["author_word", 71], 72=>["author_word", 78]}
433
451
  value("Arthopyrenia hyalospora X").should == "Arthopyrenia hyalospora \303\227 ?"
434
452
  sn = "Arthopyrenia hyalospora x"
@@ -446,7 +464,6 @@ describe ScientificNameClean do
446
464
 
447
465
  it 'should parse names with taxon concept' do
448
466
  sn = "Stenometope laevissimus sec. Eschmeyer 2004"
449
- val = @parser.failure_reason.to_s.match(/column [0-9]*/).to_s().gsub(/column /,'')
450
467
  details(sn).should == [{:genus=>{:string=>"Stenometope"}, :species=>{:string=>"laevissimus"}, :taxon_concept=>{:authorship=>"Eschmeyer 2004", :basionymAuthorTeam=>{:authorTeam=>"Eschmeyer", :author=>["Eschmeyer"], :year=>"2004"}}}]
451
468
  pos(sn).should == {0=>["genus", 11], 12=>["species", 23], 29=>["author_word", 38], 39=>["year", 43]}
452
469
  sn = "Stenometope laevissimus Bibron 1855 sec. Eschmeyer 2004"
@@ -501,26 +518,35 @@ describe ScientificNameClean do
501
518
  details(sn).should == [{:genus=>{:string=>"Flexibacter"}, :species=>{:string=>"elegans", :authorship=>"Soriano 1945, non Lewin 1969", :basionymAuthorTeam=>{:authorTeam=>"Soriano", :author=>["Soriano"], :year=>"1945"}}}]
502
519
  end
503
520
 
504
- # it 'should parse hybrid names with capitalized second name in genus (botanical code error)' do
505
- # sn = 'Anacampti-Platanthera P. Fourn.'
506
- # @parser.parse(sn)
507
- # puts @parser.failure_reason
508
- # parse(sn).should_not be_nil
509
- # canonical(sn).should == 'Anacamptiplatanthera'
510
- # sn = 'Anacampti-Platanthera vulgaris P. Fourn.'
511
- # parse(sn).should_not be_nil
512
- # canonical(sn).should == 'Anacamptiplatanthera'
513
- # end
521
+ # Combination genus names should be merged without dash or capital letter
522
+ it 'should parse hybrid names with capitalized second name in genus (botanical code error)' do
523
+ sn = 'Anacampti-Platanthera P. Fourn.'
524
+ parse(sn).should_not be_nil
525
+ canonical(sn).should == 'Anacamptiplatanthera'
526
+ sn = 'Anacampti-Platanthera vulgaris P. Fourn.'
527
+ parse(sn).should_not be_nil
528
+ canonical(sn).should == 'Anacamptiplatanthera vulgaris'
529
+ end
514
530
 
515
- # it 'shoud parse hybrid names with * character' do
516
- # sn = "Carduus acanthoides * crispus"
517
- # details(sn).should == ''
518
- # end
519
-
520
531
  it 'should parse genus names starting with uppercase letters AE OE' do
521
532
  sn = 'AEmona separata Broun 1921'
522
533
  canonical(sn).should == 'Aemona separata'
523
534
  sn = 'OEmona simplex White, 1855'
524
535
  canonical(sn).should == 'Oemona simplex'
525
536
  end
537
+ #"Arthrosamanea eriorhachis (Harms & sine ref. ) Aubrév." -- ignore & sine ref. (means without reference)
538
+
539
+ =begin
540
+ new stuff
541
+
542
+ sn = "Orchidaceae × Asconopsis hort."
543
+ canonical(sn).should == "Orchidaceae x Asconopsis"
544
+ sn
545
+ Tamiops swinhoei near hainanus|Tamiops swinhoei near hainanus
546
+ Conus textile form archiepiscopus|Conus textile form archiepiscopus|
547
+ Crypticus pseudosericeus ssp. olivieri Desbrochers des Loges,1881|Crypticus pseudosericeus olivieri des
548
+ Solanum nigrum subsp nigrum|Solanum nigrum subsp nigrum
549
+ Protoglossus taeniatum author unknown|Protoglossus taeniatum author unknown
550
+ Dupontiella (S. ?) bicolor|Dupontiella|
551
+ =end
526
552
  end