biodiversity19 2.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.travis.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  rvm:
2
2
  - 1.9.3
3
+ - 2.0.0
3
4
  bundler_args: --without development
4
5
  branches:
5
6
  only:
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ 3.0.0 -- removing support for ruby 1.8.7, making biodiversity gem be the same
2
+ as biodiversity19, deprecating biodiversity19. A few newly discovered bugs
3
+ are fixed.
4
+
1
5
  2.1.0 -- added ScientificNameParser.version method
2
6
 
3
7
  2.0.0 -- backward incompatibe change in parserver, therefore new major number.
data/README.md CHANGED
@@ -8,14 +8,18 @@ Biodiversity
8
8
 
9
9
  Parses taxonomic scientific name and breaks it into semantic elements.
10
10
 
11
+ *WARNING, IMPORTANT!:*
12
+ Support for Ruby 1.8.7 IS DROPPED. Both biodiversity and
13
+ biodiversity19 will be for Ruby > 1.9.1 and will be identical gems.
14
+
15
+ biodiversity19 is now deprecated and will be phased out in a couple of years.
16
+ You are strongly encouraged to change your dependencies from
17
+ biodiversity19 to biodiversity
18
+
11
19
  Installation
12
20
  ------------
13
21
 
14
- *WARNING:* Do not use Ruby 1.8.7 -- it is outdated. The
15
- biodiversity gem for Ruby 1.8.7 is not getting updated anymore
16
-
17
- sudo gem install biodiversity19 #for ruby 1.9.x
18
- sudo gem install biodiversity #for ruby 1.8.x
22
+ sudo gem install biodiversity
19
23
 
20
24
  Example usage
21
25
  -------------
@@ -25,7 +29,12 @@ Example usage
25
29
  You can parse file with taxonomic names from command line.
26
30
  File should contain one scientific name per line
27
31
 
28
- nnparser file_with_names
32
+ nnparse file_with_names
33
+
34
+ The resuls will be put into parsed.json file in the current directory.
35
+ To save results into a different file:
36
+
37
+ nnparse file_with_names output_file
29
38
 
30
39
  ### As a socket server
31
40
 
@@ -112,6 +121,18 @@ You can use it as a library in Ruby, JRuby etc.
112
121
  # to get detailed information about elements of the name
113
122
  parser.parse("Pseudocercospora dendrobii (H.C. Burnett 1883) U. Braun & Crous 2003")[:scientificName][:details]
114
123
 
124
+ Returned result is not always linear, if name is complex. To get simple linear
125
+ representation of the name you can use:
126
+
127
+ parser.parse("Pseudocercospora dendrobii (H.C. Burnett) U. Braun & Crous 2003")[:scientificName][:position]
128
+ # returns {0=>["genus", 16], 17=>["species", 26],
129
+ # 28=>["author_word", 32], 33=>["author_word", 40],
130
+ # 42=>["author_word", 44], 45=>["author_word", 50],
131
+ # 53=>["author_word", 58], 59=>["year", 63]}
132
+ # where the key is the char index of the start of
133
+ # a word, first element of the value is a semantic meaning
134
+ # of the word, second element of the value is the character index
135
+ # of end of the word
115
136
 
116
137
  To parse using several CPUs (4 seem to be optimal)
117
138
 
data/Rakefile CHANGED
@@ -20,35 +20,41 @@ ruby_version = RUBY_VERSION.split('.')[0..1].join('').to_i
20
20
  begin
21
21
  require 'jeweler'
22
22
  Jeweler::Tasks.new do |gem|
23
- gem.name = ruby_version < 19 ? "biodiversity" : "biodiversity19"
23
+ gem.name = 'biodiversity19'
24
+ #To delete ruby_version < 19 ? 'biodiversity' : 'biodiversity19'
24
25
  gem.summary = 'Parser of scientific names'
25
26
  gem.description = 'Tools for biodiversity informatics'
26
- gem.email = "dmozzherin@gmail.com"
27
- gem.homepage = "http://github.com/GlobalNamesArchitecture/biodiversity"
28
- gem.authors = ["Dmitry Mozzherin"]
27
+ gem.email = 'dmozzherin@gmail.com'
28
+ gem.homepage = 'http://github.com/GlobalNamesArchitecture/biodiversity'
29
+ gem.authors = ['Dmitry Mozzherin']
29
30
  gem.has_rdoc = false
30
31
  gem.bindir = 'bin'
31
32
  gem.executables = ['nnparse', 'parserver']
32
33
  gem.add_dependency('treetop')
33
34
  gem.add_dependency('parallel')
34
- gem.add_dependency('json') if ruby_version < 19
35
+ # gem.add_dependency('json') if ruby_version < 19
35
36
  gem.add_development_dependency "rspec"
36
- # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
37
+ # gem is a Gem::Specification...
38
+ # see http://www.rubygems.org/read/chapter/20 for additional settings
37
39
  end
38
40
  rescue LoadError
39
- puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
41
+ puts 'Jeweler (or a dependency) not available. ' +
42
+ 'Install it with: sudo gem install jeweler'
40
43
  end
41
44
 
42
45
  task :tt do
43
- ['scientific_name_clean', 'scientific_name_dirty', 'scientific_name_canonical'].each do |f|
46
+ ['scientific_name_clean',
47
+ 'scientific_name_dirty',
48
+ 'scientific_name_canonical'].each do |f|
44
49
  file = "#{dir}/lib/biodiversity/parser/#{f}"
45
50
  FileUtils.rm("#{file}.rb") if FileTest.exist?("#{file}.rb")
46
51
  system("tt #{file}.treetop")
47
52
  rf = "#{file}.rb"
48
- rfn = open(rf + ".tmp", 'w')
53
+ rfn = open(rf + '.tmp', 'w')
49
54
  skip_head = false
50
55
  f = open(rf)
51
- #getting around a bug in treetop which prevents setting UTF-8 encoding in ruby19
56
+ # getting around a bug in treetop which prevents setting
57
+ # UTF-8 encoding in ruby19
52
58
  f.each_with_index do |l, i|
53
59
  skip_head = l.match(/^# Autogenerated/) if i == 0
54
60
  if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
data/VERSION CHANGED
@@ -1 +1 @@
1
- 2.1.0
1
+ 3.0.0
@@ -8,19 +8,19 @@ grammar ScientificNameClean
8
8
  def value
9
9
  a.value.gsub(/\s{2,}/, ' ').strip
10
10
  end
11
-
11
+
12
12
  def canonical
13
13
  a.canonical.gsub(/\s{2,}/, ' ').strip
14
14
  end
15
-
15
+
16
16
  def pos
17
17
  a.pos
18
18
  end
19
-
19
+
20
20
  def hybrid
21
21
  a.hybrid
22
22
  end
23
-
23
+
24
24
  def details
25
25
  a.details.class == Array ? a.details : [a.details]
26
26
  end
@@ -30,25 +30,25 @@ grammar ScientificNameClean
30
30
  end
31
31
  }
32
32
  end
33
-
33
+
34
34
  rule scientific_name_5
35
35
  a:multinomial_name space_hard hybrid_character space_hard b:species {
36
36
  def value
37
37
  a.value + " × " + b.value
38
38
  end
39
-
39
+
40
40
  def canonical
41
41
  a.canonical + " × " + b.canonical
42
42
  end
43
-
43
+
44
44
  def pos
45
45
  a.pos.merge(b.pos)
46
46
  end
47
-
47
+
48
48
  def hybrid
49
49
  true
50
50
  end
51
-
51
+
52
52
  def details
53
53
  [a.details, b.details.merge({:genus => a.details[:genus]})]
54
54
  end
@@ -58,19 +58,19 @@ grammar ScientificNameClean
58
58
  def value
59
59
  a.value + " " + b.apply(c)
60
60
  end
61
-
61
+
62
62
  def canonical
63
63
  a.canonical
64
64
  end
65
-
65
+
66
66
  def pos
67
67
  a.pos.merge(c.pos)
68
68
  end
69
-
69
+
70
70
  def hybrid
71
71
  a.hybrid
72
72
  end
73
-
73
+
74
74
  def details
75
75
  a.details.merge(b.details(c))
76
76
  end
@@ -78,25 +78,25 @@ grammar ScientificNameClean
78
78
  /
79
79
  scientific_name_4
80
80
  end
81
-
81
+
82
82
  rule scientific_name_4
83
83
  a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
84
84
  def value
85
85
  a.value + " × " + b.value
86
86
  end
87
-
87
+
88
88
  def canonical
89
89
  a.canonical + " × " + b.canonical
90
90
  end
91
-
91
+
92
92
  def pos
93
93
  a.pos.merge(b.pos)
94
94
  end
95
-
95
+
96
96
  def hybrid
97
97
  true
98
98
  end
99
-
99
+
100
100
  def details
101
101
  [a.details, b.details]
102
102
  end
@@ -106,19 +106,19 @@ grammar ScientificNameClean
106
106
  def value
107
107
  a.value + " × ?"
108
108
  end
109
-
109
+
110
110
  def canonical
111
111
  a.canonical
112
112
  end
113
-
113
+
114
114
  def pos
115
115
  a.pos
116
116
  end
117
-
117
+
118
118
  def hybrid
119
119
  true
120
120
  end
121
-
121
+
122
122
  def details
123
123
  [a.details, "?"]
124
124
  end
@@ -126,25 +126,25 @@ grammar ScientificNameClean
126
126
  /
127
127
  scientific_name_3
128
128
  end
129
-
129
+
130
130
  rule scientific_name_3
131
131
  a:hybrid_character space b:scientific_name_2 {
132
132
  def value
133
133
  a.value + " " + b.value
134
134
  end
135
-
135
+
136
136
  def canonical
137
137
  b.canonical
138
138
  end
139
-
139
+
140
140
  def pos
141
141
  b.pos
142
142
  end
143
-
143
+
144
144
  def hybrid
145
145
  true
146
146
  end
147
-
147
+
148
148
  def details
149
149
  b.details
150
150
  end
@@ -152,25 +152,25 @@ grammar ScientificNameClean
152
152
  /
153
153
  scientific_name_2
154
154
  end
155
-
155
+
156
156
  rule scientific_name_2
157
157
  a:scientific_name_1 space b:status_part {
158
158
  def value
159
159
  a.value + " " + b.value
160
160
  end
161
-
161
+
162
162
  def canonical
163
163
  a.canonical
164
164
  end
165
-
165
+
166
166
  def pos
167
167
  a.pos
168
168
  end
169
-
169
+
170
170
  def hybrid
171
171
  a.hybrid rescue false
172
172
  end
173
-
173
+
174
174
  def details
175
175
  a.details.merge(b.details)
176
176
  end
@@ -184,10 +184,10 @@ grammar ScientificNameClean
184
184
  /
185
185
  multinomial_name
186
186
  /
187
- uninomial_name
187
+ uninomial_name
188
188
  end
189
-
190
-
189
+
190
+
191
191
  rule status_part
192
192
  a:status_word space b:status_part {
193
193
  def value
@@ -200,7 +200,7 @@ grammar ScientificNameClean
200
200
  /
201
201
  status_word
202
202
  end
203
-
203
+
204
204
  rule status_word
205
205
  latin_word [\.] {
206
206
  def value
@@ -216,7 +216,7 @@ grammar ScientificNameClean
216
216
 
217
217
  rule unparsed
218
218
  .+ space {
219
-
219
+
220
220
  def value
221
221
  ''
222
222
  end
@@ -238,52 +238,52 @@ grammar ScientificNameClean
238
238
  end
239
239
  }
240
240
  end
241
-
241
+
242
242
  rule multinomial_name
243
243
  a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
244
244
  def value
245
245
  a.value + " " + b.value + " " + c.value + " " + d.value
246
246
  end
247
-
247
+
248
248
  def canonical
249
249
  a.canonical + " " + c.canonical + " " + d.canonical
250
250
  end
251
-
251
+
252
252
  def pos
253
253
  a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
254
254
  end
255
-
255
+
256
256
  def hybrid
257
257
  c.hybrid rescue false
258
258
  end
259
-
259
+
260
260
  def details
261
261
  a.details.merge(b.details).merge(c.details).merge(d.details)
262
262
  end
263
263
  }
264
- /
264
+ /
265
265
  a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
266
266
  def value
267
267
  a.value + " " + b.value + " " + c.value + " " + d.value
268
268
  end
269
-
269
+
270
270
  def canonical
271
271
  a.canonical + " " + c.canonical + " " + d.canonical
272
272
  end
273
-
273
+
274
274
  def pos
275
275
  a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
276
276
  end
277
-
277
+
278
278
  def hybrid
279
279
  c.hybrid rescue false
280
280
  end
281
-
281
+
282
282
  def details
283
283
  a.details.merge(b.details).merge(c.details).merge(d.details)
284
284
  end
285
285
  }
286
- /
286
+ /
287
287
  a:genus space b:infragenus space aid:annotation_identification? space c:species {
288
288
  def value
289
289
  if defined? aid.apply
@@ -292,7 +292,7 @@ grammar ScientificNameClean
292
292
  a.value + " " + b.value + " " + c.value
293
293
  end
294
294
  end
295
-
295
+
296
296
  def canonical
297
297
  if defined? aid.apply
298
298
  a.canonical + aid.canonical(c)
@@ -300,7 +300,7 @@ grammar ScientificNameClean
300
300
  a.canonical + " " + c.canonical
301
301
  end
302
302
  end
303
-
303
+
304
304
  def pos
305
305
  if defined? aid.apply
306
306
  a.pos.merge(b.pos).merge(aid.pos(c))
@@ -308,11 +308,11 @@ grammar ScientificNameClean
308
308
  a.pos.merge(b.pos).merge(c.pos)
309
309
  end
310
310
  end
311
-
311
+
312
312
  def hybrid
313
313
  c.hybrid rescue false
314
314
  end
315
-
315
+
316
316
  def details
317
317
  if defined? aid.apply
318
318
  a.details.merge(b.details).merge(aid.apply(c))
@@ -324,21 +324,21 @@ grammar ScientificNameClean
324
324
  /
325
325
  a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
326
326
  def value
327
- a.value + " " + b.value + " " + c.value
327
+ a.value + " " + b.value + " " + c.value
328
328
  end
329
329
 
330
330
  def canonical
331
331
  a.canonical + " " + b.canonical + " " + c.canonical
332
332
  end
333
-
333
+
334
334
  def pos
335
335
  a.pos.merge(b.pos).merge(c.pos)
336
336
  end
337
-
337
+
338
338
  def hybrid
339
339
  b.hybrid rescue false
340
340
  end
341
-
341
+
342
342
  def details
343
343
  a.details.merge(b.details).merge(c.details)
344
344
  end
@@ -349,7 +349,7 @@ grammar ScientificNameClean
349
349
  if defined? aid.apply
350
350
  a.value + aid.apply(b)
351
351
  else
352
- a.value + " " + b.value
352
+ a.value + " " + b.value
353
353
  end
354
354
  end
355
355
 
@@ -360,7 +360,7 @@ grammar ScientificNameClean
360
360
  a.canonical + " " + b.canonical
361
361
  end
362
362
  end
363
-
363
+
364
364
  def pos
365
365
  if defined? aid.apply
366
366
  a.pos.merge(aid.pos(b))
@@ -368,11 +368,11 @@ grammar ScientificNameClean
368
368
  a.pos.merge(b.pos)
369
369
  end
370
370
  end
371
-
371
+
372
372
  def hybrid
373
373
  b.hybrid rescue false
374
374
  end
375
-
375
+
376
376
  def details
377
377
  if defined? aid.apply
378
378
  a.details.merge(aid.details(b))
@@ -390,15 +390,15 @@ grammar ScientificNameClean
390
390
  def canonical
391
391
  a.canonical + aid.canonical(b)
392
392
  end
393
-
393
+
394
394
  def pos
395
395
  a.pos.merge(aid.pos(b))
396
396
  end
397
-
397
+
398
398
  def hybrid
399
399
  false
400
400
  end
401
-
401
+
402
402
  def details
403
403
  a.details.merge(aid.details(b))
404
404
  end
@@ -408,7 +408,7 @@ grammar ScientificNameClean
408
408
  rule multiuninomial_name
409
409
  a:uninomial_name space b:rank_uninomial space c:uninomial_name {
410
410
 
411
- def value
411
+ def value
412
412
  a.value + " " + b.value + " " + c.value
413
413
  end
414
414
 
@@ -429,23 +429,23 @@ grammar ScientificNameClean
429
429
  end
430
430
  }
431
431
  end
432
-
432
+
433
433
  rule infraspecies_mult
434
434
  a:infraspecies space b:infraspecies_mult {
435
435
  def value
436
436
  a.value + " " + b.value
437
437
  end
438
-
438
+
439
439
  def canonical
440
440
  a.canonical + " " + b.canonical
441
441
  end
442
-
442
+
443
443
  def pos
444
444
  a.pos.merge(b.pos)
445
445
  end
446
-
446
+
447
447
  def details
448
- a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
448
+ a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
449
449
  b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
450
450
  a.details.merge({:infraspecies => a_array + b_array})
451
451
  end
@@ -461,42 +461,42 @@ grammar ScientificNameClean
461
461
  end
462
462
  }
463
463
  end
464
-
464
+
465
465
  rule infraspecies
466
466
  a:infraspecies_string space b:authorship {
467
467
  def value
468
468
  a.value + " " + b.value
469
469
  end
470
-
470
+
471
471
  def canonical
472
472
  a.canonical
473
473
  end
474
-
474
+
475
475
  def pos
476
476
  a.pos.merge(b.pos)
477
477
  end
478
-
478
+
479
479
  def details
480
480
  {:infraspecies => a.details[:infraspecies].merge(b.details)}
481
481
  end
482
482
  }
483
483
  /
484
- infraspecies_string
484
+ infraspecies_string
485
485
  end
486
-
486
+
487
487
  rule infraspecies_string
488
488
  sel:rank space a:species_word {
489
- def value
489
+ def value
490
490
  sel.apply(a)
491
491
  end
492
492
  def canonical
493
493
  sel.canonical(a)
494
494
  end
495
-
495
+
496
496
  def pos
497
497
  sel.pos(a)
498
498
  end
499
-
499
+
500
500
  def details
501
501
  sel.details(a)
502
502
  end
@@ -506,18 +506,18 @@ grammar ScientificNameClean
506
506
  def value
507
507
  aid.apply(a)
508
508
  end
509
-
509
+
510
510
  def canonical
511
511
  aid.canonical(a)
512
512
  end
513
-
513
+
514
514
  def pos
515
515
  def a.pos
516
516
  {interval.begin => ['infraspecies', a.interval.end]}
517
517
  end
518
518
  aid.pos(a)
519
519
  end
520
-
520
+
521
521
  def details
522
522
  def a.details
523
523
  {:infraspecies => {:string => value, :rank => 'n/a'}}
@@ -530,21 +530,21 @@ grammar ScientificNameClean
530
530
  def value
531
531
  a.value
532
532
  end
533
-
533
+
534
534
  def canonical
535
535
  value
536
536
  end
537
-
537
+
538
538
  def pos
539
539
  {interval.begin => ['infraspecies', interval.end]}
540
540
  end
541
-
541
+
542
542
  def details
543
543
  {:infraspecies => {:string => value, :rank => 'n/a'}}
544
544
  end
545
545
  }
546
546
  end
547
-
547
+
548
548
  rule taxon_concept_rank
549
549
  ("sec."/"sensu.") {
550
550
  def value
@@ -555,7 +555,7 @@ grammar ScientificNameClean
555
555
  end
556
556
  def details(a = nil)
557
557
  {:taxon_concept => a.details}
558
- end
558
+ end
559
559
  }
560
560
  end
561
561
 
@@ -573,12 +573,12 @@ grammar ScientificNameClean
573
573
  def canonical(a)
574
574
  " " + a.value
575
575
  end
576
-
576
+
577
577
  def pos(a)
578
- interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
578
+ interval_end = text_value[-1] == ' ' ? interval.end - 1 : interval.end
579
579
  {interval.begin => ['infraspecific_type', interval_end], a.interval.begin => ['infraspecies', a.interval.end]}
580
580
  end
581
-
581
+
582
582
  def details(a = nil)
583
583
  {:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
584
584
  end
@@ -594,31 +594,31 @@ grammar ScientificNameClean
594
594
  def pos(uni)
595
595
  {interval.begin => ['rank_uninomial', interval.end], uni.interval.begin => ['uninomial', uni.interval.end]}
596
596
  end
597
-
597
+
598
598
  def details(uni)
599
599
  {:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
600
600
  end
601
601
  }
602
602
  end
603
-
603
+
604
604
  rule species
605
605
  a:species_string space b:authorship {
606
606
  def value
607
607
  a.value + " " + b.value
608
608
  end
609
-
609
+
610
610
  def canonical
611
611
  a.canonical
612
612
  end
613
-
613
+
614
614
  def hybrid
615
615
  a.hybrid rescue false
616
616
  end
617
-
617
+
618
618
  def pos
619
619
  a.pos.merge(b.pos)
620
620
  end
621
-
621
+
622
622
  def details
623
623
  {:species => a.details[:species].merge(b.details)}
624
624
  end
@@ -626,21 +626,21 @@ grammar ScientificNameClean
626
626
  /
627
627
  species_string
628
628
  end
629
-
629
+
630
630
  rule species_string
631
631
  species_word {
632
632
  def canonical
633
633
  value
634
634
  end
635
-
635
+
636
636
  def pos
637
637
  {interval.begin => ['species', interval.end]}
638
638
  end
639
-
639
+
640
640
  def hybrid
641
641
  false
642
642
  end
643
-
643
+
644
644
  def details
645
645
  {:species => {:string => value}}
646
646
  end
@@ -648,41 +648,41 @@ grammar ScientificNameClean
648
648
  /
649
649
  species_word_hybrid
650
650
  end
651
-
651
+
652
652
  rule infragenus
653
653
  left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
654
654
  def value
655
655
  "(" + a.value + ")"
656
656
  end
657
-
657
+
658
658
  def canonical
659
659
  a.value
660
660
  end
661
-
661
+
662
662
  def pos
663
663
  {a.interval.begin => ['infragenus', a.interval.end]}
664
664
  end
665
-
665
+
666
666
  def details
667
667
  {:infragenus => {:string => a.value}}
668
668
  end
669
669
  }
670
670
  end
671
-
671
+
672
672
  rule genus
673
673
  a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
674
674
  def value
675
675
  a.value
676
676
  end
677
-
677
+
678
678
  def pos
679
679
  {a.interval.begin => ['genus', a.interval.end]}
680
680
  end
681
-
681
+
682
682
  def canonical
683
683
  a.value
684
684
  end
685
-
685
+
686
686
  def details
687
687
  {:genus => {:string => a.value}}
688
688
  end
@@ -706,27 +706,27 @@ grammar ScientificNameClean
706
706
  def details
707
707
  {:abbreviated_genus => {:string => value}}
708
708
  end
709
- }
709
+ }
710
710
  end
711
-
711
+
712
712
  rule uninomial_name
713
713
  a:uninomial_string space b:infragenus space c:simple_authorship {
714
714
  def value
715
715
  a.value + " " + b.value + " " + c.value
716
716
  end
717
-
717
+
718
718
  def canonical
719
719
  a.canonical
720
720
  end
721
-
721
+
722
722
  def pos
723
723
  a.pos.merge(b.pos).merge(c.pos)
724
724
  end
725
-
725
+
726
726
  def hybrid
727
727
  false
728
728
  end
729
-
729
+
730
730
  def details
731
731
  {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
732
732
  end
@@ -736,19 +736,19 @@ grammar ScientificNameClean
736
736
  def value
737
737
  a.value + " " + b.value
738
738
  end
739
-
739
+
740
740
  def canonical
741
741
  a.canonical
742
742
  end
743
-
743
+
744
744
  def pos
745
745
  a.pos.merge(b.pos)
746
746
  end
747
-
747
+
748
748
  def hybrid
749
749
  false
750
750
  end
751
-
751
+
752
752
  def details
753
753
  {:uninomial => a.details[:uninomial].merge(b.details)}
754
754
  end
@@ -758,19 +758,19 @@ grammar ScientificNameClean
758
758
  def value
759
759
  a.value + " " + b.value
760
760
  end
761
-
761
+
762
762
  def canonical
763
763
  a.canonical
764
764
  end
765
-
765
+
766
766
  def pos
767
767
  a.pos.merge(b.pos)
768
768
  end
769
-
769
+
770
770
  def hybrid
771
771
  false
772
772
  end
773
-
773
+
774
774
  def details
775
775
  {:uninomial => a.details[:uninomial].merge(b.details)}
776
776
  end
@@ -784,31 +784,31 @@ grammar ScientificNameClean
784
784
  def canonical
785
785
  value
786
786
  end
787
-
787
+
788
788
  def pos
789
789
  {interval.begin => ['uninomial', interval.end]}
790
790
  end
791
-
791
+
792
792
  def hybrid
793
793
  false
794
794
  end
795
-
796
- def details
795
+
796
+ def details
797
797
  {:uninomial => {:string => value}}
798
798
  end
799
799
  }
800
800
  end
801
-
801
+
802
802
  rule authorship
803
803
  a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
804
804
  def value
805
805
  a.value + " " + b.value + " " + c.value
806
806
  end
807
-
807
+
808
808
  def pos
809
809
  a.pos.merge(b.pos).merge(c.pos)
810
810
  end
811
-
811
+
812
812
  def details
813
813
  val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
814
814
  val[:combinationAuthorTeam].merge!(c.details)
@@ -820,11 +820,11 @@ grammar ScientificNameClean
820
820
  def value
821
821
  a.value + " " + b.value
822
822
  end
823
-
823
+
824
824
  def pos
825
825
  a.pos.merge(b.pos)
826
826
  end
827
-
827
+
828
828
  def details
829
829
  {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
830
830
  end
@@ -836,11 +836,11 @@ grammar ScientificNameClean
836
836
  def value
837
837
  a.value + " " + b.value
838
838
  end
839
-
839
+
840
840
  def pos
841
841
  a.pos.merge(b.pos)
842
842
  end
843
-
843
+
844
844
  def details
845
845
  val = a.details
846
846
  val[:authorship] = text_value.strip
@@ -851,21 +851,21 @@ grammar ScientificNameClean
851
851
  /
852
852
  simple_authorship
853
853
  end
854
-
855
-
854
+
855
+
856
856
  rule basionym_authorship_with_parenthesis
857
857
  left_paren space a:authors_names space right_paren space [,]? space b:year {
858
858
  def value
859
859
  "(" + a.value + " " + b.value + ")"
860
860
  end
861
-
861
+
862
862
  def pos
863
863
  a.pos.merge(b.pos)
864
- end
865
-
864
+ end
865
+
866
866
  def details
867
- { :authorship => text_value,
868
- :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
867
+ { :authorship => text_value,
868
+ :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
869
869
  }
870
870
  end
871
871
  }
@@ -874,11 +874,11 @@ grammar ScientificNameClean
874
874
  def value
875
875
  "(" + a.value + " " + b.value + ")"
876
876
  end
877
-
877
+
878
878
  def pos
879
879
  a.pos.merge(b.pos)
880
880
  end
881
-
881
+
882
882
  def details
883
883
  val = a.details
884
884
  val[:basionymAuthorTeam].merge!(b.details)
@@ -891,15 +891,15 @@ grammar ScientificNameClean
891
891
  def value
892
892
  "(" + a.value + ")"
893
893
  end
894
-
894
+
895
895
  def pos
896
896
  a.pos
897
897
  end
898
-
898
+
899
899
  def details
900
900
  val = a.details
901
901
  val[:authorship] = text_value
902
- val
902
+ val
903
903
  end
904
904
  }
905
905
  /
@@ -907,32 +907,32 @@ grammar ScientificNameClean
907
907
  def value
908
908
  "(?)"
909
909
  end
910
-
910
+
911
911
  def pos
912
912
  {a.interval.begin => ['unknown_author', a.interval.end]}
913
913
  end
914
-
914
+
915
915
  def details
916
916
  {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
917
917
  end
918
918
  }
919
919
  end
920
-
920
+
921
921
  rule ex_authorship
922
922
  ex_sep space b:simple_authorship {
923
923
  def value
924
924
  " ex " + b.value
925
925
  end
926
-
926
+
927
927
  def pos
928
928
  b.pos
929
929
  end
930
-
930
+
931
931
  def details
932
932
  val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
933
933
  val
934
934
  end
935
- }
935
+ }
936
936
  end
937
937
 
938
938
  rule simple_authorship
@@ -940,17 +940,17 @@ grammar ScientificNameClean
940
940
  def value
941
941
  a.value + " " + b.value
942
942
  end
943
-
943
+
944
944
  def pos
945
945
  a.pos.merge(b.pos)
946
946
  end
947
-
947
+
948
948
  def details
949
949
  details_with_arg(:basionymAuthorTeam)
950
950
  end
951
-
951
+
952
952
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
953
- { :authorship => text_value,
953
+ { :authorship => text_value,
954
954
  authorTeamType.to_sym => {
955
955
  :authorTeam => a.text_value.strip
956
956
  }.merge(a.details).merge(b.details)
@@ -962,17 +962,17 @@ grammar ScientificNameClean
962
962
  def value
963
963
  a.value + " " + b.value
964
964
  end
965
-
965
+
966
966
  def pos
967
967
  a.pos.merge(b.pos)
968
968
  end
969
-
969
+
970
970
  def details
971
971
  details_with_arg(:basionymAuthorTeam)
972
972
  end
973
-
973
+
974
974
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
975
- { :authorship => text_value,
975
+ { :authorship => text_value,
976
976
  authorTeamType.to_sym => {
977
977
  :authorTeam => a.text_value.strip
978
978
  }.merge(a.details).merge(b.details)
@@ -986,27 +986,27 @@ grammar ScientificNameClean
986
986
  details[:basionymAuthorTeam].merge!(super)
987
987
  details
988
988
  end
989
-
989
+
990
990
  def details_with_arg(authorTeamType = 'basionymAuthorTeam')
991
- { :authorship => text_value,
991
+ { :authorship => text_value,
992
992
  authorTeamType.to_sym => {
993
993
  :authorTeam => text_value,
994
994
  }
995
- }
995
+ }
996
996
  end
997
997
  }
998
998
  end
999
-
999
+
1000
1000
  rule authors_names
1001
1001
  a:author_name space sep:author_separator space b:authors_names {
1002
1002
  def value
1003
1003
  sep.apply(a,b)
1004
1004
  end
1005
-
1005
+
1006
1006
  def pos
1007
1007
  sep.pos(a,b)
1008
1008
  end
1009
-
1009
+
1010
1010
  def details
1011
1011
  sep.details(a,b)
1012
1012
  end
@@ -1016,28 +1016,28 @@ grammar ScientificNameClean
1016
1016
  /
1017
1017
  unknown_auth
1018
1018
  end
1019
-
1020
-
1019
+
1020
+
1021
1021
  rule unknown_auth
1022
1022
  ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
1023
1023
  def value
1024
1024
  text_value
1025
1025
  end
1026
-
1026
+
1027
1027
  def pos
1028
1028
  {interval.begin => ['unknown_author', interval.end]}
1029
1029
  end
1030
-
1030
+
1031
1031
  def details
1032
1032
  {:author => ["unknown"]}
1033
1033
  end
1034
1034
  }
1035
1035
  end
1036
-
1036
+
1037
1037
  rule ex_sep
1038
1038
  ("ex"/"in") &[\s]
1039
1039
  end
1040
-
1040
+
1041
1041
  rule author_separator
1042
1042
  ("&amp;"/"&"/","/"and"/"et") {
1043
1043
  def apply(a,b)
@@ -1045,11 +1045,11 @@ grammar ScientificNameClean
1045
1045
  sep = " &" if ["&amp;", "&","and","et"].include? sep
1046
1046
  a.value + sep + " " + b.value
1047
1047
  end
1048
-
1048
+
1049
1049
  def pos(a,b)
1050
1050
  a.pos.merge(b.pos)
1051
1051
  end
1052
-
1052
+
1053
1053
  def details(a,b)
1054
1054
  {:author => a.details[:author] + b.details[:author]}
1055
1055
  end
@@ -1061,8 +1061,8 @@ grammar ScientificNameClean
1061
1061
  def value
1062
1062
  a.value + ' ' + b.value
1063
1063
  end
1064
-
1065
- def pos
1064
+
1065
+ def pos
1066
1066
  a.pos.merge(b.pos)
1067
1067
  end
1068
1068
 
@@ -1073,17 +1073,17 @@ grammar ScientificNameClean
1073
1073
  /
1074
1074
  author_name_without_postfix
1075
1075
  end
1076
-
1076
+
1077
1077
  rule author_name_without_postfix
1078
1078
  space a:author_prefix_word space b:author_name {
1079
1079
  def value
1080
1080
  a.value + " " + b.value
1081
1081
  end
1082
-
1082
+
1083
1083
  def pos
1084
1084
  a.pos.merge(b.pos)
1085
1085
  end
1086
-
1086
+
1087
1087
  def details
1088
1088
  {:author => [value]}
1089
1089
  end
@@ -1093,11 +1093,11 @@ grammar ScientificNameClean
1093
1093
  def value
1094
1094
  a.value + " " + b.value
1095
1095
  end
1096
-
1096
+
1097
1097
  def pos
1098
1098
  a.pos.merge(b.pos)
1099
1099
  end
1100
-
1100
+
1101
1101
  def details
1102
1102
  {:author => [value]}
1103
1103
  end
@@ -1105,17 +1105,17 @@ grammar ScientificNameClean
1105
1105
  /
1106
1106
  author_word
1107
1107
  end
1108
-
1108
+
1109
1109
  rule author_word
1110
1110
  "A S. Xu" {
1111
1111
  def value
1112
1112
  text_value.strip
1113
1113
  end
1114
-
1114
+
1115
1115
  def pos
1116
1116
  {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
1117
1117
  end
1118
-
1118
+
1119
1119
  def details
1120
1120
  {:author => [value]}
1121
1121
  end
@@ -1125,28 +1125,28 @@ grammar ScientificNameClean
1125
1125
  def value
1126
1126
  text_value.strip
1127
1127
  end
1128
-
1128
+
1129
1129
  def pos
1130
1130
  #cheating because there are several words in some of them
1131
1131
  {interval.begin => ['author_word', interval.end]}
1132
1132
  end
1133
-
1133
+
1134
1134
  def details
1135
1135
  {:author => [value]}
1136
1136
  end
1137
1137
  }
1138
- /
1138
+ /
1139
1139
  ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
1140
1140
  def value
1141
1141
  text_value.gsub(/([\p{Lu}]{3,})/) do |match|
1142
1142
  UnicodeUtils.titlecase(match)
1143
1143
  end
1144
1144
  end
1145
-
1145
+
1146
1146
  def pos
1147
1147
  {interval.begin => ['author_word', interval.end]}
1148
1148
  end
1149
-
1149
+
1150
1150
  def details
1151
1151
  {:author => [value]}
1152
1152
  end
@@ -1156,11 +1156,11 @@ grammar ScientificNameClean
1156
1156
  def value
1157
1157
  text_value
1158
1158
  end
1159
-
1159
+
1160
1160
  def pos
1161
1161
  {interval.begin => ['author_word', interval.end]}
1162
1162
  end
1163
-
1163
+
1164
1164
  def details
1165
1165
  {:author => [value]}
1166
1166
  end
@@ -1168,13 +1168,13 @@ grammar ScientificNameClean
1168
1168
  /
1169
1169
  author_prefix_word
1170
1170
  end
1171
-
1171
+
1172
1172
  rule author_prefix_word
1173
1173
  space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
1174
1174
  def value
1175
1175
  text_value
1176
1176
  end
1177
-
1177
+
1178
1178
  def pos
1179
1179
  #cheating because there are several words in some of them
1180
1180
  {interval.begin => ['author_word', interval.end]}
@@ -1184,7 +1184,7 @@ grammar ScientificNameClean
1184
1184
 
1185
1185
  rule author_postfix_word
1186
1186
  ("f."/"filius") {
1187
- def value
1187
+ def value
1188
1188
  text_value.strip
1189
1189
  end
1190
1190
 
@@ -1193,7 +1193,7 @@ grammar ScientificNameClean
1193
1193
  end
1194
1194
  }
1195
1195
  end
1196
-
1196
+
1197
1197
  rule cap_latin_word_pair
1198
1198
  a:cap_latin_word "-" b:cap_latin_word {
1199
1199
  def value
@@ -1201,7 +1201,7 @@ grammar ScientificNameClean
1201
1201
  end
1202
1202
  }
1203
1203
  end
1204
-
1204
+
1205
1205
  rule cap_latin_word
1206
1206
  a:([A-Z]/cap_digraph) b:latin_word "?" {
1207
1207
  def value
@@ -1241,19 +1241,19 @@ grammar ScientificNameClean
1241
1241
  def value
1242
1242
  a.value + " " + b.value
1243
1243
  end
1244
-
1244
+
1245
1245
  def canonical
1246
1246
  b.value
1247
1247
  end
1248
-
1248
+
1249
1249
  def hybrid
1250
1250
  true
1251
1251
  end
1252
-
1252
+
1253
1253
  def pos
1254
1254
  {b.interval.begin => ['species', b.interval.end]}
1255
1255
  end
1256
-
1256
+
1257
1257
  def details
1258
1258
  {:species => {:string => b.value}}
1259
1259
  end
@@ -1263,19 +1263,19 @@ grammar ScientificNameClean
1263
1263
  def value
1264
1264
  "× " + b.value
1265
1265
  end
1266
-
1266
+
1267
1267
  def canonical
1268
1268
  b.value
1269
1269
  end
1270
-
1270
+
1271
1271
  def hybrid
1272
1272
  true
1273
1273
  end
1274
-
1274
+
1275
1275
  def pos
1276
1276
  {b.interval.begin => ['species', b.interval.end]}
1277
1277
  end
1278
-
1278
+
1279
1279
  def details
1280
1280
  {:species => {:string => b.value}}
1281
1281
  end
@@ -1285,19 +1285,19 @@ grammar ScientificNameClean
1285
1285
  def value
1286
1286
  "× " + b.value
1287
1287
  end
1288
-
1288
+
1289
1289
  def canonical
1290
1290
  b.value
1291
1291
  end
1292
-
1292
+
1293
1293
  def hybrid
1294
1294
  true
1295
1295
  end
1296
-
1296
+
1297
1297
  def pos
1298
1298
  {b.interval.begin => ['species', b.interval.end]}
1299
1299
  end
1300
-
1300
+
1301
1301
  def details
1302
1302
  {:species => {:string => b.value}}
1303
1303
  end
@@ -1305,7 +1305,7 @@ grammar ScientificNameClean
1305
1305
  end
1306
1306
 
1307
1307
  rule annotation_identification
1308
- ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"spp."/"spp "/"aff."/"aff "/"monst."/"?") {
1308
+ ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
1309
1309
 
1310
1310
  def value
1311
1311
  text_value.strip
@@ -1392,9 +1392,9 @@ grammar ScientificNameClean
1392
1392
  text_value.split('').each do |l|
1393
1393
  l = 'ae' if l == 'æ'
1394
1394
  l = 'oe' if l == 'œ'
1395
- # We normalize ë as well. It is legal in botanical code, but it
1395
+ # We normalize ë as well. It is legal in botanical code, but it
1396
1396
  # is beneficial to normalize it for the reconsiliation purposes
1397
- l = 'e' if l == 'ë'
1397
+ l = 'e' if l == 'ë'
1398
1398
  res << l
1399
1399
  end
1400
1400
  res
@@ -1408,7 +1408,7 @@ grammar ScientificNameClean
1408
1408
  res = text_value
1409
1409
  res = 'ae' if res == 'æ'
1410
1410
  res = 'oe' if res == 'œ'
1411
- res = 'e' if res == 'ë'
1411
+ res = 'e' if res == 'ë'
1412
1412
  res
1413
1413
  end
1414
1414
  }
@@ -1426,7 +1426,7 @@ grammar ScientificNameClean
1426
1426
  def value
1427
1427
  'Oe'
1428
1428
  end
1429
- }
1429
+ }
1430
1430
  end
1431
1431
 
1432
1432
  rule year
@@ -1434,14 +1434,14 @@ grammar ScientificNameClean
1434
1434
  def value
1435
1435
  a.value
1436
1436
  end
1437
-
1437
+
1438
1438
  def pos
1439
1439
  a.pos
1440
1440
  end
1441
-
1441
+
1442
1442
  def details
1443
1443
  a.details
1444
- end
1444
+ end
1445
1445
  }
1446
1446
  /
1447
1447
  year_number_with_character
@@ -1464,31 +1464,31 @@ grammar ScientificNameClean
1464
1464
  end
1465
1465
  }
1466
1466
  end
1467
-
1467
+
1468
1468
  rule year_number
1469
1469
  [12] [7890] [0-9] ([0-9] [\?]?/"?") {
1470
1470
  def value
1471
1471
  text_value
1472
1472
  end
1473
-
1473
+
1474
1474
  def pos
1475
1475
  {interval.begin => ['year', interval.end]}
1476
1476
  end
1477
-
1477
+
1478
1478
  def details
1479
1479
  {:year => value}
1480
1480
  end
1481
1481
  }
1482
1482
  end
1483
-
1483
+
1484
1484
  rule left_paren
1485
1485
  "("
1486
1486
  end
1487
-
1487
+
1488
1488
  rule right_paren
1489
1489
  ")"
1490
1490
  end
1491
-
1491
+
1492
1492
  rule hybrid_character
1493
1493
  ("x"/"X") {
1494
1494
  def value
@@ -1498,7 +1498,7 @@ grammar ScientificNameClean
1498
1498
  /
1499
1499
  multiplication_sign
1500
1500
  end
1501
-
1501
+
1502
1502
  rule multiplication_sign
1503
1503
  ("×"/"*") {
1504
1504
  def value
@@ -1506,7 +1506,7 @@ grammar ScientificNameClean
1506
1506
  end
1507
1507
  }
1508
1508
  end
1509
-
1509
+
1510
1510
  rule space
1511
1511
  [\s]*
1512
1512
  end
@@ -1514,5 +1514,5 @@ grammar ScientificNameClean
1514
1514
  rule space_hard
1515
1515
  [\s]+
1516
1516
  end
1517
-
1517
+
1518
1518
  end