biodiversity19 2.1.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.travis.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  rvm:
2
2
  - 1.9.3
3
+ - 2.0.0
3
4
  bundler_args: --without development
4
5
  branches:
5
6
  only:
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ 3.0.0 -- removing support for ruby 1.8.7, making biodiversity gem be the same
2
+ as biodiversity19, deprecating biodiversity19. A few newly discovered bugs
3
+ are fixed.
4
+
1
5
  2.1.0 -- added ScientificNameParser.version method
2
6
 
3
7
  2.0.0 -- backward incompatibe change in parserver, therefore new major number.
data/README.md CHANGED
@@ -8,14 +8,18 @@ Biodiversity
8
8
 
9
9
  Parses taxonomic scientific name and breaks it into semantic elements.
10
10
 
11
+ *WARNING, IMPORTANT!:*
12
+ Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
13
+ biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
14
+
15
+ biodiversity19 is now deprecated and will be phased out in a couple of years.
16
+ You are strongly encouraged to change your dependencies from
17
+ biodiversity19 to biodiversity
18
+
11
19
  Installation
12
20
  ------------
13
21
 
14
- *WARNING:* Do not use Ruby 1.8.7 -- it is outdated. The
15
- biodiversity gem for Ruby 1.8.7 is not getting updated anymore
16
-
17
- sudo gem install biodiversity19 #for ruby 1.9.x
18
- sudo gem install biodiversity #for ruby 1.8.x
22
+ sudo gem install biodiversity
19
23
 
20
24
  Example usage
21
25
  -------------
@@ -25,7 +29,12 @@ Example usage
25
29
  You can parse file with taxonomic names from command line.
26
30
  File should contain one scientific name per line
27
31
 
28
- nnparser file_with_names
32
+ nnparse file_with_names
33
+
34
+ The resuls will be put into parsed.json file in the current directory.
35
+ To save results into a different file:
36
+
37
+ nnparse file_with_names output_file
29
38
 
30
39
  ### As a socket server
31
40
 
@@ -112,6 +121,18 @@ You can use it as a library in Ruby, JRuby etc.
112
121
  # to get detailed information about elements of the name
113
122
  parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
114
123
 
124
+ Returned result is not always linear, if name is complex. To get simple linear
125
+ representation of the name you can use:
126
+
127
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:position]
128
+ # returns {0=>["genus", 16], 17=>["species", 26],
129
+ # 28=>["author_word", 32], 33=>["author_word", 40],
130
+ # 42=>["author_word", 44], 45=>["author_word", 50],
131
+ # 53=>["author_word", 58], 59=>["year", 63]}
132
+ # where the key is the char index of the start of
133
+ # a word, first element of the value is a semantic meaning
134
+ # of the word, second element of the value is the character index
135
+ # of end of the word
115
136
 
116
137
  To parse using several CPUs (4 seem to be optimal)
117
138
 
data/Rakefile CHANGED
@@ -20,35 +20,41 @@ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
20
20
  begin
21
21
  require 'jeweler'
22
22
  Jeweler::Tasks.new do |gem|
23
- gem.name = ruby_version < 19 ? "biodiversity" : "biodiversity19"
23
+ gem.name = 'biodiversity19'
24
+ #To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
24
25
  gem.summary = 'Parser of scientific names'
25
26
  gem.description = 'Tools for biodiversity informatics'
26
- gem.email = "dmozzherin@gmail.com"
27
- gem.homepage = "http://github.com/GlobalNamesArchitecture/biodiversity"
28
- gem.authors = ["Dmitry Mozzherin"]
27
+ gem.email = 'dmozzherin@gmail.com'
28
+ gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
29
+ gem.authors = ['Dmitry Mozzherin']
29
30
  gem.has_rdoc = false
30
31
  gem.bindir = 'bin'
31
32
  gem.executables = ['nnparse', 'parserver']
32
33
  gem.add_dependency('treetop')
33
34
  gem.add_dependency('parallel')
34
- gem.add_dependency('json') if ruby_version < 19
35
+ # gem.add_dependency('json') if ruby_version < 19
35
36
  gem.add_development_dependency "rspec"
36
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
37
+ # gem is a Gem::Specification...
38
+ # see http://www.rubygems.org/read/chapter/20 for additional settings
37
39
  end
38
40
  rescue LoadError
39
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
41
+ puts 'Jeweler (or a dependency) not available. ' +
42
+ 'Install it with: sudo gem install jeweler'
40
43
  end
41
44
 
42
45
  task :tt do
43
- ['scientific_name_clean', 'scientific_name_dirty', 'scientific_name_canonical'].each do |f|
46
+ ['scientific_name_clean',
47
+ 'scientific_name_dirty',
48
+ 'scientific_name_canonical'].each do |f|
44
49
  file = "#{dir}/lib/biodiversity/parser/#{f}"
45
50
  FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
46
51
  system("tt #{file}.treetop")
47
52
  rf = "#{file}.rb"
48
- rfn = open(rf + ".tmp", 'w')
53
+ rfn = open(rf + '.tmp', 'w')
49
54
  skip_head = false
50
55
  f = open(rf)
51
- #getting around a bug in treetop which prevents setting UTF-8 encoding in ruby19
56
+ # getting around a bug in treetop which prevents setting
57
+ # UTF-8 encoding in ruby19
52
58
  f.each_with_index do |l, i|
53
59
  skip_head = l.match(/^# Autogenerated/) if i == 0
54
60
  if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.1.0
1
+ 3.0.0
@@ -8,19 +8,19 @@ grammar ScientificNameClean
8
8
  def value
9
9
  a.value.gsub(/\s{2,}/, ' ').strip
10
10
  end
11
-
11
+
12
12
  def canonical
13
13
  a.canonical.gsub(/\s{2,}/, ' ').strip
14
14
  end
15
-
15
+
16
16
  def pos
17
17
  a.pos
18
18
  end
19
-
19
+
20
20
  def hybrid
21
21
  a.hybrid
22
22
  end
23
-
23
+
24
24
  def details
25
25
  a.details.class == Array ? a.details : [a.details]
26
26
  end
@@ -30,25 +30,25 @@ grammar ScientificNameClean
30
30
  end
31
31
  }
32
32
  end
33
-
33
+
34
34
  rule scientific_name_5
35
35
  a:multinomial_name space_hard hybrid_character space_hard b:species {
36
36
  def value
37
37
  a.value + " × " + b.value
38
38
  end
39
-
39
+
40
40
  def canonical
41
41
  a.canonical + " × " + b.canonical
42
42
  end
43
-
43
+
44
44
  def pos
45
45
  a.pos.merge(b.pos)
46
46
  end
47
-
47
+
48
48
  def hybrid
49
49
  true
50
50
  end
51
-
51
+
52
52
  def details
53
53
  [a.details, b.details.merge({:genus => a.details[:genus]})]
54
54
  end
@@ -58,19 +58,19 @@ grammar ScientificNameClean
58
58
  def value
59
59
  a.value + " " + b.apply(c)
60
60
  end
61
-
61
+
62
62
  def canonical
63
63
  a.canonical
64
64
  end
65
-
65
+
66
66
  def pos
67
67
  a.pos.merge(c.pos)
68
68
  end
69
-
69
+
70
70
  def hybrid
71
71
  a.hybrid
72
72
  end
73
-
73
+
74
74
  def details
75
75
  a.details.merge(b.details(c))
76
76
  end
@@ -78,25 +78,25 @@ grammar ScientificNameClean
78
78
  /
79
79
  scientific_name_4
80
80
  end
81
-
81
+
82
82
  rule scientific_name_4
83
83
  a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
84
84
  def value
85
85
  a.value + " × " + b.value
86
86
  end
87
-
87
+
88
88
  def canonical
89
89
  a.canonical + " × " + b.canonical
90
90
  end
91
-
91
+
92
92
  def pos
93
93
  a.pos.merge(b.pos)
94
94
  end
95
-
95
+
96
96
  def hybrid
97
97
  true
98
98
  end
99
-
99
+
100
100
  def details
101
101
  [a.details, b.details]
102
102
  end
@@ -106,19 +106,19 @@ grammar ScientificNameClean
106
106
  def value
107
107
  a.value + " × ?"
108
108
  end
109
-
109
+
110
110
  def canonical
111
111
  a.canonical
112
112
  end
113
-
113
+
114
114
  def pos
115
115
  a.pos
116
116
  end
117
-
117
+
118
118
  def hybrid
119
119
  true
120
120
  end
121
-
121
+
122
122
  def details
123
123
  [a.details, "?"]
124
124
  end
@@ -126,25 +126,25 @@ grammar ScientificNameClean
126
126
  /
127
127
  scientific_name_3
128
128
  end
129
-
129
+
130
130
  rule scientific_name_3
131
131
  a:hybrid_character space b:scientific_name_2 {
132
132
  def value
133
133
  a.value + " " + b.value
134
134
  end
135
-
135
+
136
136
  def canonical
137
137
  b.canonical
138
138
  end
139
-
139
+
140
140
  def pos
141
141
  b.pos
142
142
  end
143
-
143
+
144
144
  def hybrid
145
145
  true
146
146
  end
147
-
147
+
148
148
  def details
149
149
  b.details
150
150
  end
@@ -152,25 +152,25 @@ grammar ScientificNameClean
152
152
  /
153
153
  scientific_name_2
154
154
  end
155
-
155
+
156
156
  rule scientific_name_2
157
157
  a:scientific_name_1 space b:status_part {
158
158
  def value
159
159
  a.value + " " + b.value
160
160
  end
161
-
161
+
162
162
  def canonical
163
163
  a.canonical
164
164
  end
165
-
165
+
166
166
  def pos
167
167
  a.pos
168
168
  end
169
-
169
+
170
170
  def hybrid
171
171
  a.hybrid rescue false
172
172
  end
173
-
173
+
174
174
  def details
175
175
  a.details.merge(b.details)
176
176
  end
@@ -184,10 +184,10 @@ grammar ScientificNameClean
184
184
  /
185
185
  multinomial_name
186
186
  /
187
- uninomial_name
187
+ uninomial_name
188
188
  end
189
-
190
-
189
+
190
+
191
191
  rule status_part
192
192
  a:status_word space b:status_part {
193
193
  def value
@@ -200,7 +200,7 @@ grammar ScientificNameClean
200
200
  /
201
201
  status_word
202
202
  end
203
-
203
+
204
204
  rule status_word
205
205
  latin_word [\.] {
206
206
  def value
@@ -216,7 +216,7 @@ grammar ScientificNameClean
216
216
 
217
217
  rule unparsed
218
218
  .+ space {
219
-
219
+
220
220
  def value
221
221
  ''
222
222
  end
@@ -238,52 +238,52 @@ grammar ScientificNameClean
238
238
  end
239
239
  }
240
240
  end
241
-
241
+
242
242
  rule multinomial_name
243
243
  a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
244
244
  def value
245
245
  a.value + " " + b.value + " " + c.value + " " + d.value
246
246
  end
247
-
247
+
248
248
  def canonical
249
249
  a.canonical + " " + c.canonical + " " + d.canonical
250
250
  end
251
-
251
+
252
252
  def pos
253
253
  a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
254
254
  end
255
-
255
+
256
256
  def hybrid
257
257
  c.hybrid rescue false
258
258
  end
259
-
259
+
260
260
  def details
261
261
  a.details.merge(b.details).merge(c.details).merge(d.details)
262
262
  end
263
263
  }
264
- /
264
+ /
265
265
  a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
266
266
  def value
267
267
  a.value + " " + b.value + " " + c.value + " " + d.value
268
268
  end
269
-
269
+
270
270
  def canonical
271
271
  a.canonical + " " + c.canonical + " " + d.canonical
272
272
  end
273
-
273
+
274
274
  def pos
275
275
  a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
276
276
  end
277
-
277
+
278
278
  def hybrid
279
279
  c.hybrid rescue false
280
280
  end
281
-
281
+
282
282
  def details
283
283
  a.details.merge(b.details).merge(c.details).merge(d.details)
284
284
  end
285
285
  }
286
- /
286
+ /
287
287
  a:genus space b:infragenus space aid:annotation_identification? space c:species {
288
288
  def value
289
289
  if defined? aid.apply
@@ -292,7 +292,7 @@ grammar ScientificNameClean
292
292
  a.value + " " + b.value + " " + c.value
293
293
  end
294
294
  end
295
-
295
+
296
296
  def canonical
297
297
  if defined? aid.apply
298
298
  a.canonical + aid.canonical(c)
@@ -300,7 +300,7 @@ grammar ScientificNameClean
300
300
  a.canonical + " " + c.canonical
301
301
  end
302
302
  end
303
-
303
+
304
304
  def pos
305
305
  if defined? aid.apply
306
306
  a.pos.merge(b.pos).merge(aid.pos(c))
@@ -308,11 +308,11 @@ grammar ScientificNameClean
308
308
  a.pos.merge(b.pos).merge(c.pos)
309
309
  end
310
310
  end
311
-
311
+
312
312
  def hybrid
313
313
  c.hybrid rescue false
314
314
  end
315
-
315
+
316
316
  def details
317
317
  if defined? aid.apply
318
318
  a.details.merge(b.details).merge(aid.apply(c))
@@ -324,21 +324,21 @@ grammar ScientificNameClean
324
324
  /
325
325
  a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
326
326
  def value
327
- a.value + " " + b.value + " " + c.value
327
+ a.value + " " + b.value + " " + c.value
328
328
  end
329
329
 
330
330
  def canonical
331
331
  a.canonical + " " + b.canonical + " " + c.canonical
332
332
  end
333
-
333
+
334
334
  def pos
335
335
  a.pos.merge(b.pos).merge(c.pos)
336
336
  end
337
-
337
+
338
338
  def hybrid
339
339
  b.hybrid rescue false
340
340
  end
341
-
341
+
342
342
  def details
343
343
  a.details.merge(b.details).merge(c.details)
344
344
  end
@@ -349,7 +349,7 @@ grammar ScientificNameClean
349
349
  if defined? aid.apply
350
350
  a.value + aid.apply(b)
351
351
  else
352
- a.value + " " + b.value
352
+ a.value + " " + b.value
353
353
  end
354
354
  end
355
355
 
@@ -360,7 +360,7 @@ grammar ScientificNameClean
360
360
  a.canonical + " " + b.canonical
361
361
  end
362
362
  end
363
-
363
+
364
364
  def pos
365
365
  if defined? aid.apply
366
366
  a.pos.merge(aid.pos(b))
@@ -368,11 +368,11 @@ grammar ScientificNameClean
368
368
  a.pos.merge(b.pos)
369
369
  end
370
370
  end
371
-
371
+
372
372
  def hybrid
373
373
  b.hybrid rescue false
374
374
  end
375
-
375
+
376
376
  def details
377
377
  if defined? aid.apply
378
378
  a.details.merge(aid.details(b))
@@ -390,15 +390,15 @@ grammar ScientificNameClean
390
390
  def canonical
391
391
  a.canonical + aid.canonical(b)
392
392
  end
393
-
393
+
394
394
  def pos
395
395
  a.pos.merge(aid.pos(b))
396
396
  end
397
-
397
+
398
398
  def hybrid
399
399
  false
400
400
  end
401
-
401
+
402
402
  def details
403
403
  a.details.merge(aid.details(b))
404
404
  end
@@ -408,7 +408,7 @@ grammar ScientificNameClean
408
408
  rule multiuninomial_name
409
409
  a:uninomial_name space b:rank_uninomial space c:uninomial_name {
410
410
 
411
- def value
411
+ def value
412
412
  a.value + " " + b.value + " " + c.value
413
413
  end
414
414
 
@@ -429,23 +429,23 @@ grammar ScientificNameClean
429
429
  end
430
430
  }
431
431
  end
432
-
432
+
433
433
  rule infraspecies_mult
434
434
  a:infraspecies space b:infraspecies_mult {
435
435
  def value
436
436
  a.value + " " + b.value
437
437
  end
438
-
438
+
439
439
  def canonical
440
440
  a.canonical + " " + b.canonical
441
441
  end
442
-
442
+
443
443
  def pos
444
444
  a.pos.merge(b.pos)
445
445
  end
446
-
446
+
447
447
  def details
448
- a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
448
+ a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
449
449
  b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
450
450
  a.details.merge({:infraspecies => a_array + b_array})
451
451
  end
@@ -461,42 +461,42 @@ grammar ScientificNameClean
461
461
  end
462
462
  }
463
463
  end
464
-
464
+
465
465
  rule infraspecies
466
466
  a:infraspecies_string space b:authorship {
467
467
  def value
468
468
  a.value + " " + b.value
469
469
  end
470
-
470
+
471
471
  def canonical
472
472
  a.canonical
473
473
  end
474
-
474
+
475
475
  def pos
476
476
  a.pos.merge(b.pos)
477
477
  end
478
-
478
+
479
479
  def details
480
480
  {:infraspecies => a.details[:infraspecies].merge(b.details)}
481
481
  end
482
482
  }
483
483
  /
484
- infraspecies_string
484
+ infraspecies_string
485
485
  end
486
-
486
+
487
487
  rule infraspecies_string
488
488
  sel:rank space a:species_word {
489
- def value
489
+ def value
490
490
  sel.apply(a)
491
491
  end
492
492
  def canonical
493
493
  sel.canonical(a)
494
494
  end
495
-
495
+
496
496
  def pos
497
497
  sel.pos(a)
498
498
  end
499
-
499
+
500
500
  def details
501
501
  sel.details(a)
502
502
  end
@@ -506,18 +506,18 @@ grammar ScientificNameClean
506
506
  def value
507
507
  aid.apply(a)
508
508
  end
509
-
509
+
510
510
  def canonical
511
511
  aid.canonical(a)
512
512
  end
513
-
513
+
514
514
  def pos
515
515
  def a.pos
516
516
  {interval.begin => ['infraspecies', a.interval.end]}
517
517
  end
518
518
  aid.pos(a)
519
519
  end
520
-
520
+
521
521
  def details
522
522
  def a.details
523
523
  {:infraspecies => {:string => value, :rank => 'n/a'}}
@@ -530,21 +530,21 @@ grammar ScientificNameClean
530
530
  def value
531
531
  a.value
532
532
  end
533
-
533
+
534
534
  def canonical
535
535
  value
536
536
  end
537
-
537
+
538
538
  def pos
539
539
  {interval.begin => ['infraspecies', interval.end]}
540
540
  end
541
-
541
+
542
542
  def details
543
543
  {:infraspecies => {:string => value, :rank => 'n/a'}}
544
544
  end
545
545
  }
546
546
  end
547
-
547
+
548
548
  rule taxon_concept_rank
549
549
  ("sec."/"sensu.") {
550
550
  def value
@@ -555,7 +555,7 @@ grammar ScientificNameClean
555
555
  end
556
556
  def details(a = nil)
557
557
  {:taxon_concept => a.details}
558
- end
558
+ end
559
559
  }
560
560
  end
561
561
 
@@ -573,12 +573,12 @@ grammar ScientificNameClean
573
573
  def canonical(a)
574
574
  " " + a.value
575
575
  end
576
-
576
+
577
577
  def pos(a)
578
- interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
578
+ interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
579
579
  {interval.begin => ['infraspecific_type', interval_end], a.interval.begin => ['infraspecies', a.interval.end]}
580
580
  end
581
-
581
+
582
582
  def details(a = nil)
583
583
  {:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
584
584
  end
@@ -594,31 +594,31 @@ grammar ScientificNameClean
594
594
  def pos(uni)
595
595
  {interval.begin => ['rank_uninomial', interval.end], uni.interval.begin => ['uninomial', uni.interval.end]}
596
596
  end
597
-
597
+
598
598
  def details(uni)
599
599
  {:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
600
600
  end
601
601
  }
602
602
  end
603
-
603
+
604
604
  rule species
605
605
  a:species_string space b:authorship {
606
606
  def value
607
607
  a.value + " " + b.value
608
608
  end
609
-
609
+
610
610
  def canonical
611
611
  a.canonical
612
612
  end
613
-
613
+
614
614
  def hybrid
615
615
  a.hybrid rescue false
616
616
  end
617
-
617
+
618
618
  def pos
619
619
  a.pos.merge(b.pos)
620
620
  end
621
-
621
+
622
622
  def details
623
623
  {:species => a.details[:species].merge(b.details)}
624
624
  end
@@ -626,21 +626,21 @@ grammar ScientificNameClean
626
626
  /
627
627
  species_string
628
628
  end
629
-
629
+
630
630
  rule species_string
631
631
  species_word {
632
632
  def canonical
633
633
  value
634
634
  end
635
-
635
+
636
636
  def pos
637
637
  {interval.begin => ['species', interval.end]}
638
638
  end
639
-
639
+
640
640
  def hybrid
641
641
  false
642
642
  end
643
-
643
+
644
644
  def details
645
645
  {:species => {:string => value}}
646
646
  end
@@ -648,41 +648,41 @@ grammar ScientificNameClean
648
648
  /
649
649
  species_word_hybrid
650
650
  end
651
-
651
+
652
652
  rule infragenus
653
653
  left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
654
654
  def value
655
655
  "(" + a.value + ")"
656
656
  end
657
-
657
+
658
658
  def canonical
659
659
  a.value
660
660
  end
661
-
661
+
662
662
  def pos
663
663
  {a.interval.begin => ['infragenus', a.interval.end]}
664
664
  end
665
-
665
+
666
666
  def details
667
667
  {:infragenus => {:string => a.value}}
668
668
  end
669
669
  }
670
670
  end
671
-
671
+
672
672
  rule genus
673
673
  a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
674
674
  def value
675
675
  a.value
676
676
  end
677
-
677
+
678
678
  def pos
679
679
  {a.interval.begin => ['genus', a.interval.end]}
680
680
  end
681
-
681
+
682
682
  def canonical
683
683
  a.value
684
684
  end
685
-
685
+
686
686
  def details
687
687
  {:genus => {:string => a.value}}
688
688
  end
@@ -706,27 +706,27 @@ grammar ScientificNameClean
706
706
  def details
707
707
  {:abbreviated_genus => {:string => value}}
708
708
  end
709
- }
709
+ }
710
710
  end
711
-
711
+
712
712
  rule uninomial_name
713
713
  a:uninomial_string space b:infragenus space c:simple_authorship {
714
714
  def value
715
715
  a.value + " " + b.value + " " + c.value
716
716
  end
717
-
717
+
718
718
  def canonical
719
719
  a.canonical
720
720
  end
721
-
721
+
722
722
  def pos
723
723
  a.pos.merge(b.pos).merge(c.pos)
724
724
  end
725
-
725
+
726
726
  def hybrid
727
727
  false
728
728
  end
729
-
729
+
730
730
  def details
731
731
  {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
732
732
  end
@@ -736,19 +736,19 @@ grammar ScientificNameClean
736
736
  def value
737
737
  a.value + " " + b.value
738
738
  end
739
-
739
+
740
740
  def canonical
741
741
  a.canonical
742
742
  end
743
-
743
+
744
744
  def pos
745
745
  a.pos.merge(b.pos)
746
746
  end
747
-
747
+
748
748
  def hybrid
749
749
  false
750
750
  end
751
-
751
+
752
752
  def details
753
753
  {:uninomial => a.details[:uninomial].merge(b.details)}
754
754
  end
@@ -758,19 +758,19 @@ grammar ScientificNameClean
758
758
  def value
759
759
  a.value + " " + b.value
760
760
  end
761
-
761
+
762
762
  def canonical
763
763
  a.canonical
764
764
  end
765
-
765
+
766
766
  def pos
767
767
  a.pos.merge(b.pos)
768
768
  end
769
-
769
+
770
770
  def hybrid
771
771
  false
772
772
  end
773
-
773
+
774
774
  def details
775
775
  {:uninomial => a.details[:uninomial].merge(b.details)}
776
776
  end
@@ -784,31 +784,31 @@ grammar ScientificNameClean
784
784
  def canonical
785
785
  value
786
786
  end
787
-
787
+
788
788
  def pos
789
789
  {interval.begin => ['uninomial', interval.end]}
790
790
  end
791
-
791
+
792
792
  def hybrid
793
793
  false
794
794
  end
795
-
796
- def details
795
+
796
+ def details
797
797
  {:uninomial => {:string => value}}
798
798
  end
799
799
  }
800
800
  end
801
-
801
+
802
802
  rule authorship
803
803
  a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
804
804
  def value
805
805
  a.value + " " + b.value + " " + c.value
806
806
  end
807
-
807
+
808
808
  def pos
809
809
  a.pos.merge(b.pos).merge(c.pos)
810
810
  end
811
-
811
+
812
812
  def details
813
813
  val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
814
814
  val[:combinationAuthorTeam].merge!(c.details)
@@ -820,11 +820,11 @@ grammar ScientificNameClean
820
820
  def value
821
821
  a.value + " " + b.value
822
822
  end
823
-
823
+
824
824
  def pos
825
825
  a.pos.merge(b.pos)
826
826
  end
827
-
827
+
828
828
  def details
829
829
  {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
830
830
  end
@@ -836,11 +836,11 @@ grammar ScientificNameClean
836
836
  def value
837
837
  a.value + " " + b.value
838
838
  end
839
-
839
+
840
840
  def pos
841
841
  a.pos.merge(b.pos)
842
842
  end
843
-
843
+
844
844
  def details
845
845
  val = a.details
846
846
  val[:authorship] = text_value.strip
@@ -851,21 +851,21 @@ grammar ScientificNameClean
851
851
  /
852
852
  simple_authorship
853
853
  end
854
-
855
-
854
+
855
+
856
856
  rule basionym_authorship_with_parenthesis
857
857
  left_paren space a:authors_names space right_paren space [,]? space b:year {
858
858
  def value
859
859
  "(" + a.value + " " + b.value + ")"
860
860
  end
861
-
861
+
862
862
  def pos
863
863
  a.pos.merge(b.pos)
864
- end
865
-
864
+ end
865
+
866
866
  def details
867
- { :authorship => text_value,
868
- :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
867
+ { :authorship => text_value,
868
+ :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
869
869
  }
870
870
  end
871
871
  }
@@ -874,11 +874,11 @@ grammar ScientificNameClean
874
874
  def value
875
875
  "(" + a.value + " " + b.value + ")"
876
876
  end
877
-
877
+
878
878
  def pos
879
879
  a.pos.merge(b.pos)
880
880
  end
881
-
881
+
882
882
  def details
883
883
  val = a.details
884
884
  val[:basionymAuthorTeam].merge!(b.details)
@@ -891,15 +891,15 @@ grammar ScientificNameClean
891
891
  def value
892
892
  "(" + a.value + ")"
893
893
  end
894
-
894
+
895
895
  def pos
896
896
  a.pos
897
897
  end
898
-
898
+
899
899
  def details
900
900
  val = a.details
901
901
  val[:authorship] = text_value
902
- val
902
+ val
903
903
  end
904
904
  }
905
905
  /
@@ -907,32 +907,32 @@ grammar ScientificNameClean
907
907
  def value
908
908
  "(?)"
909
909
  end
910
-
910
+
911
911
  def pos
912
912
  {a.interval.begin => ['unknown_author', a.interval.end]}
913
913
  end
914
-
914
+
915
915
  def details
916
916
  {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
917
917
  end
918
918
  }
919
919
  end
920
-
920
+
921
921
  rule ex_authorship
922
922
  ex_sep space b:simple_authorship {
923
923
  def value
924
924
  " ex " + b.value
925
925
  end
926
-
926
+
927
927
  def pos
928
928
  b.pos
929
929
  end
930
-
930
+
931
931
  def details
932
932
  val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
933
933
  val
934
934
  end
935
- }
935
+ }
936
936
  end
937
937
 
938
938
  rule simple_authorship
@@ -940,17 +940,17 @@ grammar ScientificNameClean
940
940
  def value
941
941
  a.value + " " + b.value
942
942
  end
943
-
943
+
944
944
  def pos
945
945
  a.pos.merge(b.pos)
946
946
  end
947
-
947
+
948
948
  def details
949
949
  details_with_arg(:basionymAuthorTeam)
950
950
  end
951
-
951
+
952
952
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
953
- { :authorship => text_value,
953
+ { :authorship => text_value,
954
954
  authorTeamType.to_sym => {
955
955
  :authorTeam => a.text_value.strip
956
956
  }.merge(a.details).merge(b.details)
@@ -962,17 +962,17 @@ grammar ScientificNameClean
962
962
  def value
963
963
  a.value + " " + b.value
964
964
  end
965
-
965
+
966
966
  def pos
967
967
  a.pos.merge(b.pos)
968
968
  end
969
-
969
+
970
970
  def details
971
971
  details_with_arg(:basionymAuthorTeam)
972
972
  end
973
-
973
+
974
974
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
975
- { :authorship => text_value,
975
+ { :authorship => text_value,
976
976
  authorTeamType.to_sym => {
977
977
  :authorTeam => a.text_value.strip
978
978
  }.merge(a.details).merge(b.details)
@@ -986,27 +986,27 @@ grammar ScientificNameClean
986
986
  details[:basionymAuthorTeam].merge!(super)
987
987
  details
988
988
  end
989
-
989
+
990
990
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
991
- { :authorship => text_value,
991
+ { :authorship => text_value,
992
992
  authorTeamType.to_sym => {
993
993
  :authorTeam => text_value,
994
994
  }
995
- }
995
+ }
996
996
  end
997
997
  }
998
998
  end
999
-
999
+
1000
1000
  rule authors_names
1001
1001
  a:author_name space sep:author_separator space b:authors_names {
1002
1002
  def value
1003
1003
  sep.apply(a,b)
1004
1004
  end
1005
-
1005
+
1006
1006
  def pos
1007
1007
  sep.pos(a,b)
1008
1008
  end
1009
-
1009
+
1010
1010
  def details
1011
1011
  sep.details(a,b)
1012
1012
  end
@@ -1016,28 +1016,28 @@ grammar ScientificNameClean
1016
1016
  /
1017
1017
  unknown_auth
1018
1018
  end
1019
-
1020
-
1019
+
1020
+
1021
1021
  rule unknown_auth
1022
1022
  ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
1023
1023
  def value
1024
1024
  text_value
1025
1025
  end
1026
-
1026
+
1027
1027
  def pos
1028
1028
  {interval.begin => ['unknown_author', interval.end]}
1029
1029
  end
1030
-
1030
+
1031
1031
  def details
1032
1032
  {:author => ["unknown"]}
1033
1033
  end
1034
1034
  }
1035
1035
  end
1036
-
1036
+
1037
1037
  rule ex_sep
1038
1038
  ("ex"/"in") &[\s]
1039
1039
  end
1040
-
1040
+
1041
1041
  rule author_separator
1042
1042
  ("&amp;"/"&"/","/"and"/"et") {
1043
1043
  def apply(a,b)
@@ -1045,11 +1045,11 @@ grammar ScientificNameClean
1045
1045
  sep = " &" if ["&amp;", "&","and","et"].include? sep
1046
1046
  a.value + sep + " " + b.value
1047
1047
  end
1048
-
1048
+
1049
1049
  def pos(a,b)
1050
1050
  a.pos.merge(b.pos)
1051
1051
  end
1052
-
1052
+
1053
1053
  def details(a,b)
1054
1054
  {:author => a.details[:author] + b.details[:author]}
1055
1055
  end
@@ -1061,8 +1061,8 @@ grammar ScientificNameClean
1061
1061
  def value
1062
1062
  a.value + ' ' + b.value
1063
1063
  end
1064
-
1065
- def pos
1064
+
1065
+ def pos
1066
1066
  a.pos.merge(b.pos)
1067
1067
  end
1068
1068
 
@@ -1073,17 +1073,17 @@ grammar ScientificNameClean
1073
1073
  /
1074
1074
  author_name_without_postfix
1075
1075
  end
1076
-
1076
+
1077
1077
  rule author_name_without_postfix
1078
1078
  space a:author_prefix_word space b:author_name {
1079
1079
  def value
1080
1080
  a.value + " " + b.value
1081
1081
  end
1082
-
1082
+
1083
1083
  def pos
1084
1084
  a.pos.merge(b.pos)
1085
1085
  end
1086
-
1086
+
1087
1087
  def details
1088
1088
  {:author => [value]}
1089
1089
  end
@@ -1093,11 +1093,11 @@ grammar ScientificNameClean
1093
1093
  def value
1094
1094
  a.value + " " + b.value
1095
1095
  end
1096
-
1096
+
1097
1097
  def pos
1098
1098
  a.pos.merge(b.pos)
1099
1099
  end
1100
-
1100
+
1101
1101
  def details
1102
1102
  {:author => [value]}
1103
1103
  end
@@ -1105,17 +1105,17 @@ grammar ScientificNameClean
1105
1105
  /
1106
1106
  author_word
1107
1107
  end
1108
-
1108
+
1109
1109
  rule author_word
1110
1110
  "A S. Xu" {
1111
1111
  def value
1112
1112
  text_value.strip
1113
1113
  end
1114
-
1114
+
1115
1115
  def pos
1116
1116
  {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
1117
1117
  end
1118
-
1118
+
1119
1119
  def details
1120
1120
  {:author => [value]}
1121
1121
  end
@@ -1125,28 +1125,28 @@ grammar ScientificNameClean
1125
1125
  def value
1126
1126
  text_value.strip
1127
1127
  end
1128
-
1128
+
1129
1129
  def pos
1130
1130
  #cheating because there are several words in some of them
1131
1131
  {interval.begin => ['author_word', interval.end]}
1132
1132
  end
1133
-
1133
+
1134
1134
  def details
1135
1135
  {:author => [value]}
1136
1136
  end
1137
1137
  }
1138
- /
1138
+ /
1139
1139
  ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
1140
1140
  def value
1141
1141
  text_value.gsub(/([\p{Lu}]{3,})/) do |match|
1142
1142
  UnicodeUtils.titlecase(match)
1143
1143
  end
1144
1144
  end
1145
-
1145
+
1146
1146
  def pos
1147
1147
  {interval.begin => ['author_word', interval.end]}
1148
1148
  end
1149
-
1149
+
1150
1150
  def details
1151
1151
  {:author => [value]}
1152
1152
  end
@@ -1156,11 +1156,11 @@ grammar ScientificNameClean
1156
1156
  def value
1157
1157
  text_value
1158
1158
  end
1159
-
1159
+
1160
1160
  def pos
1161
1161
  {interval.begin => ['author_word', interval.end]}
1162
1162
  end
1163
-
1163
+
1164
1164
  def details
1165
1165
  {:author => [value]}
1166
1166
  end
@@ -1168,13 +1168,13 @@ grammar ScientificNameClean
1168
1168
  /
1169
1169
  author_prefix_word
1170
1170
  end
1171
-
1171
+
1172
1172
  rule author_prefix_word
1173
1173
  space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
1174
1174
  def value
1175
1175
  text_value
1176
1176
  end
1177
-
1177
+
1178
1178
  def pos
1179
1179
  #cheating because there are several words in some of them
1180
1180
  {interval.begin => ['author_word', interval.end]}
@@ -1184,7 +1184,7 @@ grammar ScientificNameClean
1184
1184
 
1185
1185
  rule author_postfix_word
1186
1186
  ("f."/"filius") {
1187
- def value
1187
+ def value
1188
1188
  text_value.strip
1189
1189
  end
1190
1190
 
@@ -1193,7 +1193,7 @@ grammar ScientificNameClean
1193
1193
  end
1194
1194
  }
1195
1195
  end
1196
-
1196
+
1197
1197
  rule cap_latin_word_pair
1198
1198
  a:cap_latin_word "-" b:cap_latin_word {
1199
1199
  def value
@@ -1201,7 +1201,7 @@ grammar ScientificNameClean
1201
1201
  end
1202
1202
  }
1203
1203
  end
1204
-
1204
+
1205
1205
  rule cap_latin_word
1206
1206
  a:([A-Z]/cap_digraph) b:latin_word "?" {
1207
1207
  def value
@@ -1241,19 +1241,19 @@ grammar ScientificNameClean
1241
1241
  def value
1242
1242
  a.value + " " + b.value
1243
1243
  end
1244
-
1244
+
1245
1245
  def canonical
1246
1246
  b.value
1247
1247
  end
1248
-
1248
+
1249
1249
  def hybrid
1250
1250
  true
1251
1251
  end
1252
-
1252
+
1253
1253
  def pos
1254
1254
  {b.interval.begin => ['species', b.interval.end]}
1255
1255
  end
1256
-
1256
+
1257
1257
  def details
1258
1258
  {:species => {:string => b.value}}
1259
1259
  end
@@ -1263,19 +1263,19 @@ grammar ScientificNameClean
1263
1263
  def value
1264
1264
  "× " + b.value
1265
1265
  end
1266
-
1266
+
1267
1267
  def canonical
1268
1268
  b.value
1269
1269
  end
1270
-
1270
+
1271
1271
  def hybrid
1272
1272
  true
1273
1273
  end
1274
-
1274
+
1275
1275
  def pos
1276
1276
  {b.interval.begin => ['species', b.interval.end]}
1277
1277
  end
1278
-
1278
+
1279
1279
  def details
1280
1280
  {:species => {:string => b.value}}
1281
1281
  end
@@ -1285,19 +1285,19 @@ grammar ScientificNameClean
1285
1285
  def value
1286
1286
  "× " + b.value
1287
1287
  end
1288
-
1288
+
1289
1289
  def canonical
1290
1290
  b.value
1291
1291
  end
1292
-
1292
+
1293
1293
  def hybrid
1294
1294
  true
1295
1295
  end
1296
-
1296
+
1297
1297
  def pos
1298
1298
  {b.interval.begin => ['species', b.interval.end]}
1299
1299
  end
1300
-
1300
+
1301
1301
  def details
1302
1302
  {:species => {:string => b.value}}
1303
1303
  end
@@ -1305,7 +1305,7 @@ grammar ScientificNameClean
1305
1305
  end
1306
1306
 
1307
1307
  rule annotation_identification
1308
- ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"spp."/"spp "/"aff."/"aff "/"monst."/"?") {
1308
+ ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
1309
1309
 
1310
1310
  def value
1311
1311
  text_value.strip
@@ -1392,9 +1392,9 @@ grammar ScientificNameClean
1392
1392
  text_value.split('').each do |l|
1393
1393
  l = 'ae' if l == 'æ'
1394
1394
  l = 'oe' if l == 'œ'
1395
- # We normalize ë as well. It is legal in botanical code, but it
1395
+ # We normalize ë as well. It is legal in botanical code, but it
1396
1396
  # is beneficial to normalize it for the reconsiliation purposes
1397
- l = 'e' if l == 'ë'
1397
+ l = 'e' if l == 'ë'
1398
1398
  res << l
1399
1399
  end
1400
1400
  res
@@ -1408,7 +1408,7 @@ grammar ScientificNameClean
1408
1408
  res = text_value
1409
1409
  res = 'ae' if res == 'æ'
1410
1410
  res = 'oe' if res == 'œ'
1411
- res = 'e' if res == 'ë'
1411
+ res = 'e' if res == 'ë'
1412
1412
  res
1413
1413
  end
1414
1414
  }
@@ -1426,7 +1426,7 @@ grammar ScientificNameClean
1426
1426
  def value
1427
1427
  'Oe'
1428
1428
  end
1429
- }
1429
+ }
1430
1430
  end
1431
1431
 
1432
1432
  rule year
@@ -1434,14 +1434,14 @@ grammar ScientificNameClean
1434
1434
  def value
1435
1435
  a.value
1436
1436
  end
1437
-
1437
+
1438
1438
  def pos
1439
1439
  a.pos
1440
1440
  end
1441
-
1441
+
1442
1442
  def details
1443
1443
  a.details
1444
- end
1444
+ end
1445
1445
  }
1446
1446
  /
1447
1447
  year_number_with_character
@@ -1464,31 +1464,31 @@ grammar ScientificNameClean
1464
1464
  end
1465
1465
  }
1466
1466
  end
1467
-
1467
+
1468
1468
  rule year_number
1469
1469
  [12] [7890] [0-9] ([0-9] [\?]?/"?") {
1470
1470
  def value
1471
1471
  text_value
1472
1472
  end
1473
-
1473
+
1474
1474
  def pos
1475
1475
  {interval.begin => ['year', interval.end]}
1476
1476
  end
1477
-
1477
+
1478
1478
  def details
1479
1479
  {:year => value}
1480
1480
  end
1481
1481
  }
1482
1482
  end
1483
-
1483
+
1484
1484
  rule left_paren
1485
1485
  "("
1486
1486
  end
1487
-
1487
+
1488
1488
  rule right_paren
1489
1489
  ")"
1490
1490
  end
1491
-
1491
+
1492
1492
  rule hybrid_character
1493
1493
  ("x"/"X") {
1494
1494
  def value
@@ -1498,7 +1498,7 @@ grammar ScientificNameClean
1498
1498
  /
1499
1499
  multiplication_sign
1500
1500
  end
1501
-
1501
+
1502
1502
  rule multiplication_sign
1503
1503
  ("×"/"*") {
1504
1504
  def value
@@ -1506,7 +1506,7 @@ grammar ScientificNameClean
1506
1506
  end
1507
1507
  }
1508
1508
  end
1509
-
1509
+
1510
1510
  rule space
1511
1511
  [\s]*
1512
1512
  end
@@ -1514,5 +1514,5 @@ grammar ScientificNameClean
1514
1514
  rule space_hard
1515
1515
  [\s]+
1516
1516
  end
1517
-
1517
+
1518
1518
  end