dimus-biodiversity 0.0.18 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,31 +1,14 @@
1
1
  # encoding: UTF-8
2
2
  grammar ScientificNameClean
3
3
 
4
- rule composite_scientific_name
5
- a:scientific_name space hybrid_separator space b:scientific_name space {
4
+ rule root
5
+ space a:scientific_name_5 space {
6
6
  def value
7
- a.value + " × " + b.value
8
- end
9
- def canonical
10
- a.canonical + " × " + b.canonical
11
- end
12
-
13
- def pos
14
- a.pos.merge(b.pos)
15
- end
16
-
17
- def details
18
- {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => b.details}}
19
- end
20
- }
21
- /
22
- a:scientific_name space hybrid_separator space [\?]? {
23
- def value
24
- a.value + " × ?"
7
+ a.value.gsub(/\s{2,}/, ' ').strip
25
8
  end
26
9
 
27
10
  def canonical
28
- a.canonical
11
+ a.canonical.gsub(/\s{2,}/, ' ').strip
29
12
  end
30
13
 
31
14
  def pos
@@ -33,19 +16,15 @@ grammar ScientificNameClean
33
16
  end
34
17
 
35
18
  def details
36
- {:hybrid => {:scientific_name1 => a.details, :scientific_name2 => "?"}}
19
+ a.details
37
20
  end
38
21
  }
39
- /
40
- scientific_name
41
22
  end
42
23
 
43
- rule scientific_name
44
- name_part_authors_mix
45
- /
46
- space a:name_part space b:authors_part space c:taxon_concept_rank space d:authors_part space {
24
+ rule scientific_name_5
25
+ a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
47
26
  def value
48
- a.value + " " + b.value + " " + c.apply(d)
27
+ a.value + " " + b.apply(c)
49
28
  end
50
29
 
51
30
  def canonical
@@ -53,67 +32,81 @@ grammar ScientificNameClean
53
32
  end
54
33
 
55
34
  def pos
56
- a.pos.merge(b.pos).merge(d.pos)
35
+ a.pos.merge(c.pos)
57
36
  end
58
37
 
59
38
  def details
60
- a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip})
61
- end
39
+ a.details.merge(b.details(c))
40
+ end
62
41
  }
63
- /
64
- space a:name_part space b:taxon_concept_rank space c:authors_part space {
42
+ /
43
+ scientific_name_4
44
+ end
45
+
46
+ rule scientific_name_4
47
+ a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
65
48
  def value
66
- a.value + " " + b.apply(c)
49
+ a.value + " × " + b.value
67
50
  end
68
-
51
+
69
52
  def canonical
70
- a.canonical
53
+ a.canonical + " " + b.canonical
71
54
  end
72
55
 
73
56
  def pos
74
- a.pos.merge(c.pos)
57
+ a.pos.merge(b.pos)
75
58
  end
76
-
59
+
77
60
  def details
78
- a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
79
- end
61
+ {:hybridFormula => [a.details, b.details]}
62
+ end
80
63
  }
81
- /
82
- space a:name_part space b:authors_part space c:status_part space {
64
+ /
65
+ a:scientific_name_1 space hybrid_character space [\?]? {
83
66
  def value
84
- a.value + " " + b.value + " " + c.value
67
+ a.value + " × ?"
85
68
  end
69
+
86
70
  def canonical
87
71
  a.canonical
88
72
  end
89
73
 
90
74
  def pos
91
- a.pos.merge(b.pos)
75
+ a.pos
92
76
  end
93
77
 
94
78
  def details
95
- a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
79
+ {:hybridFormula => [a.details, "?"]}
96
80
  end
97
81
  }
98
82
  /
99
- space a:name_part space b:authors_part space {
100
- def value
83
+ scientific_name_3
84
+ end
85
+
86
+ rule scientific_name_3
87
+ a:hybrid_character space b:scientific_name_2 {
88
+ def value
101
89
  a.value + " " + b.value
102
90
  end
91
+
103
92
  def canonical
104
- a.canonical
93
+ b.canonical
105
94
  end
106
95
 
107
96
  def pos
108
- a.pos.merge(b.pos)
97
+ b.pos
109
98
  end
110
99
 
111
100
  def details
112
- a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
101
+ {:namedHybrid => b.details}
113
102
  end
114
103
  }
115
104
  /
116
- space a:name_part space b:year space {
105
+ scientific_name_2
106
+ end
107
+
108
+ rule scientific_name_2
109
+ a:scientific_name_1 space b:status_part {
117
110
  def value
118
111
  a.value + " " + b.value
119
112
  end
@@ -123,16 +116,23 @@ grammar ScientificNameClean
123
116
  end
124
117
 
125
118
  def pos
126
- a.pos.merge(b.pos)
119
+ a.pos
127
120
  end
128
121
 
129
122
  def details
130
- a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
123
+ a.details.merge(b.details)
131
124
  end
132
125
  }
133
126
  /
134
- name_part
127
+ scientific_name_1
135
128
  end
129
+
130
+ rule scientific_name_1
131
+ multinomial_name
132
+ /
133
+ uninomial_name
134
+ end
135
+
136
136
 
137
137
  rule status_part
138
138
  a:status_word space b:status_part {
@@ -156,73 +156,73 @@ grammar ScientificNameClean
156
156
  {:status => value}
157
157
  end
158
158
  }
159
- /
160
- latin_word
159
+ #/
160
+ #latin_word
161
161
  end
162
162
 
163
- rule name_part_authors_mix
164
- a:species_name space b:authors_part space c:subspecies_name space d:authors_part {
163
+
164
+ rule multinomial_name
165
+ a:genus space b:subgenus space c:species space_hard d:infraspecies_mult {
165
166
  def value
166
- (a.value + " " + b.value + " " + c.value + " " + d.value).gsub(/\s+/,' ')
167
+ a.value + " " + b.value + " " + c.value + " " + d.value
167
168
  end
169
+
168
170
  def canonical
169
- (a.canonical + " " + c.canonical).gsub(/\s+/,' ')
171
+ a.canonical + " " + c.canonical + " " + d.canonical
170
172
  end
171
-
173
+
172
174
  def pos
173
175
  a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
174
176
  end
175
-
177
+
176
178
  def details
177
- a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')})
179
+ a.details.merge(b.details).merge(c.details).merge(d.details)
178
180
  end
179
181
  }
180
- /
181
- a:species_name space b:authors_part space c:subspecies_name {
182
- def value
183
- (a.value + " " + b.value + " " + c.value).gsub(/\s+/,' ')
182
+ /
183
+ a:genus space b:subgenus space c:species {
184
+ def value
185
+ a.value + " " + b.value + " " + c.value
184
186
  end
187
+
185
188
  def canonical
186
- (a.canonical + " " + c.canonical).gsub(/\s+/,' ')
187
- end
188
- def details
189
- a.details.merge(c.details).merge({:species_authors=>b.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ')})
190
- end
191
- }
192
- end
193
-
194
- rule authors_part
195
- a:original_authors_revised_name space b:authors_revised_name {
196
- def value
197
- a.value + " " + b.value
189
+ a.canonical + " " + c.canonical
198
190
  end
199
191
 
200
192
  def pos
201
- a.pos.merge(b.pos)
193
+ a.pos.merge(b.pos).merge(c.pos)
202
194
  end
203
195
 
204
196
  def details
205
- a.details.merge(b.details)
197
+ a.details.merge(b.details).merge(c.details)
206
198
  end
207
199
  }
208
200
  /
209
- a:simple_authors_part space ex_sep space b:simple_authors_part {
210
- def value
211
- a.value + " ex " + b.value
201
+ a:genus space_hard b:species space_hard c:infraspecies_mult {
202
+ def value
203
+ a.value + " " + b.value + " " + c.value
212
204
  end
213
-
205
+
206
+ def canonical
207
+ a.canonical + " " + b.canonical + " " + c.canonical
208
+ end
209
+
214
210
  def pos
215
- a.pos.merge(b.pos)
211
+ a.pos.merge(b.pos).merge(c.pos)
216
212
  end
217
-
213
+
218
214
  def details
219
- {:revised_name_authors => {:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
215
+ a.details.merge(b.details).merge(c.details)
220
216
  end
221
217
  }
222
218
  /
223
- a:original_authors_revised_name space b:authors_names_full {
219
+ a:genus space_hard b:species {
224
220
  def value
225
- a.value + " " + b.value
221
+ a.value + " " + b.value
222
+ end
223
+
224
+ def canonical
225
+ a.canonical + " " + b.canonical
226
226
  end
227
227
 
228
228
  def pos
@@ -233,121 +233,156 @@ grammar ScientificNameClean
233
233
  a.details.merge(b.details)
234
234
  end
235
235
  }
236
- /
237
- authors_revised_name
238
- /
239
- original_authors_revised_name
240
- /
241
- simple_authors_part
242
236
  end
243
237
 
244
- rule simple_authors_part
245
- a:original_authors_names_full space b:authors_names_full {
238
+ rule infraspecies_mult
239
+ a:infraspecies space b:infraspecies_mult {
246
240
  def value
247
241
  a.value + " " + b.value
248
242
  end
249
243
 
244
+ def canonical
245
+ a.canonical + " " + b.canonical
246
+ end
247
+
250
248
  def pos
251
249
  a.pos.merge(b.pos)
252
250
  end
253
251
 
254
252
  def details
255
- a.details.merge(b.details)
253
+ #{:infraspecies => a.details[:infraspceies] << b.details[:infraspecies]}
254
+ a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
255
+ b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
256
+ a.details.merge({:infraspecies => a_array + b_array})
256
257
  end
257
258
  }
258
259
  /
259
- original_authors_names_full
260
- /
261
- authors_names_full
260
+ infraspecies
262
261
  end
263
262
 
264
- rule original_authors_names_full
265
- left_bracket space a:authors_names space right_bracket space [,]? space b:year {
263
+ rule infraspecies
264
+ a:infraspecies_epitheton space b:authorship {
266
265
  def value
267
- "(" + a.value + " " + b.value + ")"
266
+ a.value + " " + b.value
268
267
  end
269
-
268
+
269
+ def canonical
270
+ a.canonical
271
+ end
272
+
270
273
  def pos
271
274
  a.pos.merge(b.pos)
272
275
  end
273
-
276
+
274
277
  def details
275
- {:orig_authors => a.details[:authors], :year => b.details[:year]}
278
+ {:infraspecies => a.details[:infraspecies].merge(b.details)}
276
279
  end
277
280
  }
278
281
  /
279
- left_bracket space a:authors_names_full space right_bracket {
280
- def value
281
- "(" + a.value + ")"
282
+ infraspecies_epitheton
283
+ end
284
+
285
+ rule infraspecies_epitheton
286
+ sel:rank space_hard a:species_word {
287
+ def value
288
+ sel.apply(a)
289
+ end
290
+ def canonical
291
+ sel.canonical(a)
282
292
  end
283
293
 
284
294
  def pos
285
- a.pos
295
+ {a.interval.begin => ['infraspecies', a.interval.end]}
286
296
  end
287
-
297
+
288
298
  def details
289
- {:orig_authors => a.details[:authors]}
299
+ sel.details(a)
290
300
  end
291
301
  }
292
302
  /
293
- "[" space a:authors_names_full space "]" {
294
- def value
295
- "(" + a.value + ")"
296
- end
297
-
298
- def pos
299
- a.pos
300
- end
301
-
302
- def details
303
- {:orig_authors => a.details[:authors]}
304
- end
305
- }
306
- /
307
- left_bracket space a:unknown_auth space right_bracket {
303
+ species_word ![\.] {
308
304
  def value
309
- "(" + a.value + ")"
305
+ text_value
310
306
  end
311
307
 
308
+ def canonical
309
+ value
310
+ end
311
+
312
312
  def pos
313
- a.pos
313
+ {interval.begin => ['infraspecies', interval.end]}
314
314
  end
315
-
315
+
316
316
  def details
317
- {:orig_authors => a.details[:authors]}
317
+ {:infraspecies => {:epitheton => value, :rank => 'n/a'}}
318
318
  end
319
319
  }
320
- /
321
- left_bracket space "?" space right_bracket {
320
+ end
321
+
322
+ rule taxon_concept_rank
323
+ "sec." {
322
324
  def value
323
- "(?)"
325
+ "sec."
324
326
  end
325
- def details
326
- {:orig_authors => "unknown"}
327
+ def apply(a)
328
+ " " + value + " " + a.value
327
329
  end
330
+ def details(a = nil)
331
+ {:taxon_concept => a.details}
332
+ end
328
333
  }
329
334
  end
330
-
331
- rule original_authors_revised_name
332
- left_bracket space a:authors_revised_name space right_bracket {
335
+
336
+ rule rank
337
+ ("morph."/"f.sp."/"B"/"ssp."/"mut."/"nat"/"nothosubsp."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
338
+ /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
339
+ {
333
340
  def value
334
- "(" + a.value + ")"
341
+ text_value.strip
335
342
  end
336
-
337
- def pos
338
- a.pos
343
+
344
+ def apply(a)
345
+ " " + text_value + " " + a.value
346
+ end
347
+
348
+ def canonical(a)
349
+ " " + a.value
339
350
  end
340
351
 
341
- def details
342
- {:original_revised_name_authors => a.details[:revised_name_authors]}
352
+ def details(a = nil)
353
+ {:infraspecies => {:epitheton => (a.value rescue nil), :rank => text_value}}
354
+ end
355
+ }
356
+ /
357
+ rank_forma
358
+ end
359
+
360
+ rule rank_forma
361
+ ("forma"/"form."/"fo."/"f.")
362
+ {
363
+ def value
364
+ "f."
365
+ end
366
+ def apply(a)
367
+ " " + value + " " + a.value
368
+ end
369
+ def canonical(a)
370
+ " " + a.value
371
+ end
372
+ def details(a = nil)
373
+ {:infraspecies => {:epitheton => (a.value rescue nil), :rank => value}}
343
374
  end
344
375
  }
345
376
  end
346
377
 
347
- rule authors_revised_name
348
- a:authors_names_full space ex_sep space b:authors_names_full {
378
+ rule species
379
+ a:species_epitheton space b:authorship {
349
380
  def value
350
- a.value + " ex " + b.value
381
+ a.value + " " + b.value
382
+ end
383
+
384
+ def canonical
385
+ a.canonical
351
386
  end
352
387
 
353
388
  def pos
@@ -355,537 +390,553 @@ grammar ScientificNameClean
355
390
  end
356
391
 
357
392
  def details
358
- {:revised_name_authors =>{:revised_authors => a.details[:authors], :authors => b.details[:authors]}}
393
+ {:species => a.details[:species].merge(b.details)}
359
394
  end
360
- }
395
+ }
396
+ /
397
+ species_epitheton
361
398
  end
362
-
363
- rule authors_names_full
364
- a:authors_names space [,]? space b:year {
399
+
400
+ rule species_epitheton
401
+ a:species_word &(space_hard author_prefix_word space_hard) {
365
402
  def value
366
- a.value + " " + b.value
403
+ a.value
367
404
  end
368
405
 
406
+ def canonical
407
+ a.value
408
+ end
409
+
369
410
  def pos
370
- a.pos.merge(b.pos)
411
+ {a.interval.begin => ['species', a.interval.end]}
371
412
  end
372
-
413
+
373
414
  def details
374
- {:authors => {:names => a.details[:authors][:names]}.merge(b.details)}
415
+ {:species => {:epitheton => a.value}}
375
416
  end
376
417
  }
377
418
  /
378
- authors_names
379
- /
380
- unknown_auth
381
- end
382
-
383
- rule unknown_auth
384
- ("auct."/"hort."/"anon."/"ht.") {
385
- def value
386
- text_value
419
+ species_word {
420
+ def canonical
421
+ value
387
422
  end
388
423
 
389
424
  def pos
390
- {interval.begin => ['unknown_author', interval.end]}
425
+ {interval.begin => ['species', interval.end]}
391
426
  end
392
427
 
393
428
  def details
394
- {:authors => "unknown"}
429
+ {:species => {:epitheton => value}}
395
430
  end
396
431
  }
432
+ /
433
+ species_word_hybrid
397
434
  end
398
435
 
399
- rule ex_sep
400
- ("ex"/"in")
401
- end
402
-
403
- rule authors_names
404
- a:author_name space sep:author_name_separator space b:authors_names {
436
+ rule subgenus
437
+ left_paren space a:cap_latin_word space right_paren {
405
438
  def value
406
- sep.apply(a,b)
439
+ "(" + a.value + ")"
440
+ end
441
+
442
+ def canonical
443
+ a.value
407
444
  end
408
445
 
409
446
  def pos
410
- sep.pos(a,b)
447
+ {a.interval.begin => ['subgenus', a.interval.end]}
411
448
  end
412
449
 
413
450
  def details
414
- sep.details(a,b)
451
+ {:subgenus => {:epitheton => a.value}}
415
452
  end
416
453
  }
417
- /
418
- author_name
419
- end
454
+ end
420
455
 
421
- rule author_name_separator
422
- ("&"/","/"and"/"et") {
423
- def apply(a,b)
424
- sep = text_value.strip
425
- sep = " et" if ["&","and","et"].include? sep
426
- a.value + sep + " " + b.value
456
+ rule genus
457
+ cap_latin_word {
458
+ def pos
459
+ {interval.begin => ['genus', interval.end]}
427
460
  end
428
461
 
429
- def pos(a,b)
430
- a.pos.merge(b.pos)
462
+ def canonical
463
+ value
431
464
  end
432
-
433
- def details(a,b)
434
- {:authors => {:names => a.details[:authors][:names] + b.details[:authors][:names]}}
465
+
466
+ def details
467
+ {:genus => {:epitheton => value}}
435
468
  end
436
469
  }
437
470
  end
438
471
 
439
- rule author_name
440
- space a:author_word space b:author_name space {
472
+ rule uninomial_name
473
+ a:uninomial_epitheton space_hard b:authorship {
441
474
  def value
442
475
  a.value + " " + b.value
443
476
  end
444
477
 
478
+ def canonical
479
+ a.canonical
480
+ end
481
+
445
482
  def pos
446
483
  a.pos.merge(b.pos)
447
484
  end
448
485
 
449
486
  def details
450
- {:authors => {:names => [value]}}
487
+ {:uninomial => a.details[:uninomial].merge(b.details)}
451
488
  end
452
489
  }
453
490
  /
454
- author_word
491
+ uninomial_epitheton
492
+ end
493
+
494
+ rule uninomial_epitheton
495
+ cap_latin_word {
496
+ def canonical
497
+ value
498
+ end
499
+
500
+ def pos
501
+ {interval.begin => ['uninomial', interval.end]}
502
+ end
503
+
504
+ def details
505
+ {:uninomial => {:epitheton => value}}
506
+ end
507
+ }
455
508
  end
456
509
 
457
- rule author_word
458
- "A S. Xu" {
510
+ rule authorship
511
+ a:basionym_authorship_with_parenthesis space b:simple_authorship space c:ex_authorship {
459
512
  def value
460
- text_value.strip
513
+ a.value + " " + b.value + " " + c.value
461
514
  end
462
515
 
463
516
  def pos
464
- {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
517
+ a.pos.merge(b.pos).merge(c.pos)
465
518
  end
466
519
 
467
520
  def details
468
- {:authors => {:names => [value]}}
521
+ val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
522
+ val[:combinationAuthorTeam].merge!(c.details)
523
+ val
469
524
  end
470
525
  }
471
526
  /
472
- ("anon."/"f."/"bis"/"arg."/author_prefix/"et al.\{\?\}"/"et al.") {
527
+ a:basionym_authorship_with_parenthesis space b:simple_authorship {
473
528
  def value
474
- text_value.strip
529
+ a.value + " " + b.value
475
530
  end
476
531
 
477
532
  def pos
478
- #cheating because there are several words in some of them
479
- {interval.begin => ['author_word', interval.end]}
533
+ a.pos.merge(b.pos)
480
534
  end
481
535
 
482
536
  def details
483
- {:authors => {:names => [value]}}
537
+ {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
484
538
  end
485
539
  }
486
- /
487
- ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-Z]) [^0-9()\s&,]+ {
540
+ /
541
+ basionym_authorship_with_parenthesis
542
+ /
543
+ a:simple_authorship space b:ex_authorship {
488
544
  def value
489
- text_value.gsub(/\s+/, " ").strip
545
+ a.value + " " + b.value
490
546
  end
491
547
 
492
548
  def pos
493
- {interval.begin => ['author_word', interval.end]}
549
+ a.pos.merge(b.pos)
494
550
  end
495
551
 
496
552
  def details
497
- {:authors => {:names => [value]}}
553
+ val = a.details
554
+ val[:authorship] = text_value.strip
555
+ val[:basionymAuthorTeam].merge!(b.details)
556
+ val
498
557
  end
499
558
  }
559
+ /
560
+ simple_authorship
500
561
  end
501
-
502
- rule author_prefix
503
- "da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von"
504
- end
562
+
505
563
 
506
- rule name_part
507
- space a:species_name space b:rank space_hard c:editorials_full {
564
+ rule basionym_authorship_with_parenthesis
565
+ left_paren space a:authors_names space right_paren space [,]? space b:year {
508
566
  def value
509
- a.value + " " + b.value + " " + c.value
510
- end
511
- def canonical
512
- a.canonical
567
+ "(" + a.value + " " + b.value + ")"
513
568
  end
514
569
 
515
570
  def pos
516
- a.pos
517
- end
571
+ a.pos.merge(b.pos)
572
+ end
518
573
 
519
574
  def details
520
- a.details.merge(b.details).merge(c.details)
575
+ { :authorship => text_value,
576
+ :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
577
+ }
521
578
  end
522
579
  }
523
580
  /
524
- space a:species_name &(space author_prefix) {
581
+ left_paren space a:simple_authorship space b:ex_authorship space right_paren {
525
582
  def value
526
- a.value
583
+ "(" + a.value + " " + b.value + ")"
527
584
  end
528
585
 
529
- def canonical
530
- a.canonical
531
- end
532
-
533
586
  def pos
534
- a.pos
587
+ a.pos.merge(b.pos)
535
588
  end
536
-
589
+
537
590
  def details
538
- a.details
591
+ val = a.details
592
+ val[:basionymAuthorTeam].merge!(b.details)
593
+ val[:authorship] = text_value.strip
594
+ val
539
595
  end
540
596
  }
541
597
  /
542
- space a:species_name space b:subspecies_names {
598
+ left_paren space a:simple_authorship space right_paren {
543
599
  def value
544
- a.value + b.value
545
- end
546
- def canonical
547
- a.canonical + b.canonical
600
+ "(" + a.value + ")"
548
601
  end
549
602
 
550
603
  def pos
551
- a.pos.merge(b.pos)
604
+ a.pos
552
605
  end
553
606
 
554
607
  def details
555
- a.details.merge(b.details)
608
+ val = a.details
609
+ val[:authorship] = text_value
610
+ val
556
611
  end
557
612
  }
558
613
  /
559
- space a:species_name space b:species_word ![\.] {
614
+ left_paren space a:"?" space right_paren {
560
615
  def value
561
- a.value + " " + b.value
562
- end
563
-
564
- def canonical
565
- a.canonical + " " + b.value
616
+ "(?)"
566
617
  end
567
618
 
568
619
  def pos
569
- a.pos.merge({b.interval.begin => ['subspecies', b.interval.end]})
620
+ {a.interval.begin => ['unknown_author', a.interval.end]}
570
621
  end
571
622
 
572
623
  def details
573
- a.details.merge({:subspecies => {:rank => "n/a", :value =>b.value}})
624
+ {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ['?']}}
574
625
  end
575
626
  }
576
- /
577
- species_name
578
- /
579
- cap_latin_word
580
627
  end
581
628
 
582
- rule subspecies_names
583
- a:subspecies_name space b:subspecies_names {
629
+ rule ex_authorship
630
+ ex_sep space b:simple_authorship {
584
631
  def value
585
- a.value + b.value
586
- end
587
-
588
- def canonical
589
- a.canonical + b.canonical
632
+ " ex " + b.value
590
633
  end
591
634
 
592
635
  def pos
593
- a.pos.merge(b.pos)
636
+ b.pos
594
637
  end
595
638
 
596
639
  def details
597
- c = a.details[:subspecies] + b.details_subspecies
598
- a.details.merge({:subspecies => c, :is_valid => false})
640
+ val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
641
+ val
599
642
  end
600
- }
601
- /
602
- subspecies_name
643
+ }
603
644
  end
604
645
 
605
- rule subspecies_name
606
- sel:rank space_hard a:species_word {
607
- def value
608
- sel.apply(a)
609
- end
610
- def canonical
611
- sel.canonical(a)
646
+
647
+ rule simple_authorship
648
+ a:authors_names space [,]? space b:year {
649
+ def value
650
+ a.value + " " + b.value
612
651
  end
613
652
 
614
653
  def pos
615
- {a.interval.begin => ['subspecies', a.interval.end]}
654
+ a.pos.merge(b.pos)
616
655
  end
656
+
617
657
  def details
618
- sel.details(a)
658
+ details_with_arg(:basionymAuthorTeam)
619
659
  end
620
- def details_subspecies
621
- details[:subspecies]
660
+
661
+ def details_with_arg(authorTeamType = 'basionymAuthorTeam')
662
+ { :authorship => text_value,
663
+ authorTeamType.to_sym => {
664
+ :authorTeam => a.text_value.strip
665
+ }.merge(a.details).merge(b.details)
666
+ }
622
667
  end
623
668
  }
624
- end
625
-
626
- rule editorials_full
627
- "(" space a:editorials space ")" {
628
- def value
629
- "(" + a.value + ")"
630
- end
669
+ /
670
+ authors_names {
631
671
  def details
632
- {:editorial_markup => value, :is_valid => false}
672
+ details = details_with_arg(:basionymAuthorTeam)
673
+ details[:basionymAuthorTeam].merge!(super)
674
+ details
675
+ end
676
+
677
+ def details_with_arg(authorTeamType = 'basionymAuthorTeam')
678
+ { :authorship => text_value,
679
+ authorTeamType.to_sym => {
680
+ :authorTeam => text_value,
681
+ }
682
+ }
633
683
  end
634
684
  }
635
685
  end
636
686
 
637
- rule editorials
638
- space a:rank space [&]? space b:editorials {
687
+ rule authors_names
688
+ a:author_name space sep:author_separator space b:authors_names {
639
689
  def value
640
- a.value + b.value
690
+ sep.apply(a,b)
691
+ end
692
+
693
+ def pos
694
+ sep.pos(a,b)
641
695
  end
696
+
642
697
  def details
643
- {:editorial_markup => value, :is_valid => false}
644
- end
698
+ sep.details(a,b)
699
+ end
645
700
  }
646
701
  /
647
- rank
702
+ author_name
703
+ /
704
+ unknown_auth
648
705
  end
649
706
 
650
- rule rank
651
- ("morph."/"f.sp."/"B"/"ssp."/"nat"/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
652
- /"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
653
- {
707
+
708
+ rule unknown_auth
709
+ ("auct."/"hort."/"anon."/"ht.") {
654
710
  def value
655
- text_value.strip
656
- end
657
- def apply(a)
658
- " " + text_value + " " + a.value
711
+ text_value
659
712
  end
660
- def canonical(a)
661
- " " + a.value
713
+
714
+ def pos
715
+ {interval.begin => ['unknown_author', interval.end]}
662
716
  end
663
- def details(a = nil)
664
- {:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
717
+
718
+ def details
719
+ {:author => ["unknown"]}
665
720
  end
666
721
  }
667
- /
668
- rank_forma
669
722
  end
670
723
 
671
- rule rank_forma
672
- ("forma"/"form."/"fo."/"f.")
673
- {
674
- def value
675
- "f."
676
- end
677
- def apply(a)
678
- " " + value + " " + a.value
724
+ rule ex_sep
725
+ ("ex"/"in") &[\s]
726
+ end
727
+
728
+ rule author_separator
729
+ ("&"/","/"and"/"et") {
730
+ def apply(a,b)
731
+ sep = text_value.strip
732
+ sep = " et" if ["&","and","et"].include? sep
733
+ a.value + sep + " " + b.value
679
734
  end
680
- def canonical(a)
681
- " " + a.value
735
+
736
+ def pos(a,b)
737
+ a.pos.merge(b.pos)
682
738
  end
683
- def details(a = nil)
684
- {:subspecies => [{:rank => value, :value => (a.value rescue nil)}]}
739
+
740
+ def details(a,b)
741
+ {:author => a.details[:author] + b.details[:author]}
685
742
  end
686
743
  }
687
744
  end
688
745
 
689
- rule species_name
690
- hybrid_separator space_hard a:cap_latin_word space_hard b:species_word {
746
+ rule author_name
747
+ space a:author_prefix_word space b:author_name space {
691
748
  def value
692
- "× " + a.value + " " + b.value
693
- end
694
- def canonical
695
749
  a.value + " " + b.value
696
750
  end
697
751
 
698
752
  def pos
699
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
753
+ a.pos.merge(b.pos)
700
754
  end
701
755
 
702
756
  def details
703
- {:genus => a.value, :species => b.value, :cross => 'before'}
757
+ {:author => [value]}
704
758
  end
705
759
  }
706
760
  /
707
- hybrid_separator space_hard a:cap_latin_word {
761
+ space a:author_word space b:author_name space {
708
762
  def value
709
- "× " + a.value
710
- end
711
- def canonical
712
- a.value
763
+ a.value + " " + b.value
713
764
  end
714
765
 
715
766
  def pos
716
- {a.interval.begin => ['uninomial', a.interval.end]}
767
+ a.pos.merge(b.pos)
717
768
  end
718
769
 
719
770
  def details
720
- {:uninomial => a.value, :cross => 'before'}
771
+ {:author => [value]}
721
772
  end
722
773
  }
723
774
  /
724
- a:cap_latin_word space_hard hybrid_separator space_hard b:species_word {
775
+ author_word
776
+ end
777
+
778
+ rule author_word
779
+ "A S. Xu" {
725
780
  def value
726
- a.value + " × " + b.value
727
- end
728
- def canonical
729
- a.value + " " + b.value
781
+ text_value.strip
730
782
  end
731
783
 
732
784
  def pos
733
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
785
+ {interval.begin => ['author_word', 1], (interval.begin + 2) => ['author_word', 2], (interval.begin + 5) => ['author_word', 2]}
734
786
  end
735
787
 
736
788
  def details
737
- {:genus => a.value, :species => b.value, :cross => 'inside'}
789
+ {:author => [value]}
738
790
  end
739
791
  }
740
792
  /
741
- a:cap_latin_word space b:subgenus space c:species_word {
793
+ ("bis"/"arg."/"et al.\{\?\}"/"et al.") {
742
794
  def value
743
- a.value + " " + b.value + " " + c.value
744
- end
745
- def canonical
746
- a.value + " " + c.value
795
+ text_value.strip
747
796
  end
748
797
 
749
798
  def pos
750
- {a.interval.begin => ['genus', a.interval.end]}.merge(b.pos).merge({c.interval.begin => ['subspecies', c.interval.end]})
799
+ #cheating because there are several words in some of them
800
+ {interval.begin => ['author_word', interval.end]}
751
801
  end
752
802
 
753
803
  def details
754
- {:genus => a.value, :subgenus => b.details, :species => c.value}
804
+ {:author => [value]}
755
805
  end
756
- }
757
- /
758
- a:cap_latin_word space_hard b:species_word {
806
+ }
807
+ /
808
+ ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
759
809
  def value
760
- a.value + " " + b.value
761
- end
762
- def canonical
763
- value
810
+ text_value
764
811
  end
765
812
 
766
813
  def pos
767
- {a.interval.begin => ['genus', a.interval.end], b.interval.begin => ['species', b.interval.end]}
814
+ {interval.begin => ['author_word', interval.end]}
768
815
  end
769
816
 
770
817
  def details
771
- {:genus => a.value, :species => b.value}
818
+ {:author => [value]}
772
819
  end
773
820
  }
774
- end
775
-
776
- rule subgenus
777
- "(" space a:cap_latin_word space ")" {
821
+ /
822
+ "X" [^0-9\[\]\(\)\s&,]+ {
778
823
  def value
779
- "(" + a.value + ")"
780
- end
781
-
782
- def canonical
783
- ''
824
+ text_value
784
825
  end
785
826
 
786
827
  def pos
787
- {a.interval.begin => ['subgenus', a.interval.end]}
828
+ {interval.begin => ['author_word', interval.end]}
788
829
  end
789
830
 
790
831
  def details
791
- a.value
832
+ {:author => [value]}
792
833
  end
793
834
  }
835
+ /
836
+ author_prefix_word
794
837
  end
795
838
 
796
- rule taxon_concept_rank
797
- "sec." {
839
+ rule author_prefix_word
840
+ space ("da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"von") &space_hard {
798
841
  def value
799
- "sec."
842
+ text_value
800
843
  end
801
- def apply(a)
802
- " " + value + " " + a.value
844
+
845
+ def pos
846
+ #cheating because there are several words in some of them
847
+ {interval.begin => ['author_word', interval.end]}
803
848
  end
804
- def details(a = nil)
805
- {:taxon_concept => a.details}
806
- end
807
849
  }
808
850
  end
809
-
810
- # "subsect."/"subtrib."/"subgen."/"trib."/
811
- rule genus_rank
812
- ("subsect."/"subtrib."/"subgen."/"trib.")
813
- {
851
+
852
+ rule cap_latin_word
853
+ a:([A-Z]/cap_digraph) b:latin_word "?" {
814
854
  def value
815
- text_value.strip
816
- end
817
- def apply(a)
818
- " " + text_value + " " + a.value
855
+ (a.value rescue a.text_value) + b.value
819
856
  end
820
- def canonical(a)
821
- ""
857
+ }
858
+ /
859
+ a:([A-Z]/cap_digraph) b:latin_word {
860
+ def value
861
+ (a.value rescue a.text_value) + b.value
822
862
  end
823
- def details(a = nil)
824
- {:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]}
863
+ }
864
+ /
865
+ ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
866
+ def value
867
+ text_value
825
868
  end
826
869
  }
827
870
  end
828
-
829
- rule cap_latin_word
830
- a:([A-Z]/cap_digraph) b:latin_word "?" {
871
+
872
+ rule species_word_hybrid
873
+ a:multiplication_sign space b:species_word {
831
874
  def value
832
- (a.value rescue a.text_value) + b.value
875
+ a.value + " " + b.value
833
876
  end
834
877
 
835
- def canonical
836
- value
878
+ def canonical
879
+ b.value
837
880
  end
838
881
 
839
882
  def pos
840
- {a.interval.begin => ['uninomial', a.interval.end]}
883
+ {b.interval.begin => ['species', b.interval.end]}
841
884
  end
842
885
 
843
- def details
844
- {:uninomial => value}
886
+ def details
887
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
845
888
  end
846
889
  }
847
890
  /
848
- a:([A-Z]/cap_digraph) b:latin_word {
891
+ a:"X" space b:species_word {
849
892
  def value
850
- (a.value rescue a.text_value) + b.value
893
+ " + b.value
851
894
  end
852
895
 
853
- def canonical
854
- value
896
+ def canonical
897
+ b.value
855
898
  end
856
899
 
857
900
  def pos
858
- {a.interval.begin => ['uninomial',b.interval.end]}
901
+ {b.interval.begin => ['species', b.interval.end]}
859
902
  end
860
903
 
861
- def details
862
- {:uninomial => value}
904
+ def details
905
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
863
906
  end
864
907
  }
865
908
  /
866
- ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
909
+ a:"x" space_hard b:species_word {
867
910
  def value
868
- text_value
911
+ "× " + b.value
869
912
  end
870
913
 
871
914
  def canonical
872
- value
915
+ b.value
873
916
  end
874
917
 
875
918
  def pos
876
- {interval.begin => ['uninomial', interval.end]}
919
+ {b.interval.begin => ['species', b.interval.end]}
877
920
  end
878
921
 
879
922
  def details
880
- {:uninomial => value}
923
+ {:species => {:epitheton => b.value, :namedHybrid => true}}
881
924
  end
882
925
  }
883
926
  end
884
927
 
928
+ #rule species_word
929
+ ## (!"sensu" a:species_word_0) {
930
+ # def value
931
+ # a.value
932
+ # end
933
+ # }
934
+ #end
935
+
885
936
  rule species_word
886
937
  a:[0-9]+ "-"? b:latin_word {
887
938
  def value
888
- a.text_value + "-"+ b.value
939
+ a.text_value + "-" + b.value
889
940
  end
890
941
  }
891
942
  /
@@ -897,115 +948,107 @@ grammar ScientificNameClean
897
948
  def value
898
949
  a.text_value + b.value
899
950
  end
900
- def details
901
- {}
902
- end
903
951
  }
904
952
  /
905
953
  a:digraph b:full_name_letters {
906
954
  def value
907
955
  a.value + b.value
908
956
  end
909
- def details
910
- {}
911
- end
912
957
  }
913
958
  end
914
-
959
+
915
960
  rule full_name_letters
916
961
  a:digraph b:full_name_letters {
917
962
  def value
918
963
  a.value + b.value
919
964
  end
920
- def details
921
- {}
922
- end
923
965
  }
924
966
  /
925
967
  a:valid_name_letters b:digraph c:full_name_letters {
926
968
  def value
927
969
  a.value + b.value + c.value
928
970
  end
929
- def details
930
- {}
931
- end
932
971
  }
933
972
  /
934
973
  valid_name_letters
935
974
  end
936
-
975
+
937
976
  rule valid_name_letters
938
977
  [a-z\-ëüäöïé]+ {
939
978
  def value
940
979
  text_value
941
980
  end
942
- def details
943
- {}
944
- end
945
981
  }
946
982
  end
947
-
948
- rule cap_digraph
949
- "Æ" {
950
- def value
951
- 'Ae'
952
- end
953
- }
954
- /
955
- "Œ" {
956
- def value
957
- 'Oe'
958
- end
959
- }
960
- end
961
-
983
+
984
+ rule cap_digraph
985
+ "Æ" {
986
+ def value
987
+ 'Ae'
988
+ end
989
+ }
990
+ /
991
+ "Œ" {
992
+ def value
993
+ 'Oe'
994
+ end
995
+ }
996
+ end
997
+
962
998
  rule digraph
963
- "æ" {
964
- def value
965
- 'ae'
966
- end
967
- }
968
- /
969
- "œ" {
970
- def value
971
- 'oe'
972
- end
973
- }
974
- end
975
-
976
- rule hybrid_separator
977
- ("x"/"X"/"×") {
999
+ "æ" {
978
1000
  def value
979
- "x"
1001
+ 'ae'
980
1002
  end
981
- def details
982
- {}
1003
+ }
1004
+ /
1005
+ "œ" {
1006
+ def value
1007
+ 'oe'
983
1008
  end
984
1009
  }
985
1010
  end
986
-
1011
+
987
1012
  rule year
988
- year_with_character
989
- /
990
- [0-9\?]+ {
1013
+ b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
991
1014
  def value
992
- text_value.strip
1015
+ a.value
993
1016
  end
994
1017
 
995
1018
  def pos
996
- {interval.begin => ['year', interval.end]}
1019
+ a.pos
997
1020
  end
998
1021
 
1022
+ def details
1023
+ a.details
1024
+ end
1025
+ }
1026
+ /
1027
+ year_number_with_character
1028
+ /
1029
+ year_number
1030
+ end
1031
+
1032
+ rule year_number_with_character
1033
+ a:year_number [a-zA-Z] {
1034
+ def value
1035
+ a.text_value
1036
+ end
1037
+
1038
+ def pos
1039
+ {interval.begin => ['year', interval.end]}
1040
+ end
1041
+
999
1042
  def details
1000
1043
  {:year => value}
1001
1044
  end
1002
1045
  }
1003
1046
  end
1004
1047
 
1005
- rule year_with_character
1006
- a:[0-9\?]+ [a-zA-Z] {
1048
+ rule year_number
1049
+ [12] [7890] [0-9] [0-9]? [\?]? {
1007
1050
  def value
1008
- a.text_value
1051
+ text_value
1009
1052
  end
1010
1053
 
1011
1054
  def pos
@@ -1017,43 +1060,39 @@ grammar ScientificNameClean
1017
1060
  end
1018
1061
  }
1019
1062
  end
1020
-
1021
- # Next two rles only for ( (author) )
1022
- # doesn't touch parenthesis inside another one like (bla-bla-bla1 (bla-bla-bla2))
1023
-
1024
- rule left_bracket
1025
- "( ("/"("
1026
- {
1027
- def value
1028
- "("
1029
- end
1030
- }
1031
- end
1032
-
1033
- rule right_bracket
1034
- ") )"/")"
1035
- {
1036
- def value
1037
- ")"
1038
- end
1039
- }
1040
- end
1041
1063
 
1064
+ rule left_paren
1065
+ "("
1066
+ end
1042
1067
 
1043
- rule space
1044
- [\s]* {
1045
- def details
1046
- {
1047
- }
1068
+ rule right_paren
1069
+ ")"
1070
+ end
1071
+
1072
+ rule hybrid_character
1073
+ ("x"/"X") {
1074
+ def value
1075
+ "×"
1048
1076
  end
1049
1077
  }
1078
+ /
1079
+ multiplication_sign
1050
1080
  end
1051
-
1052
- rule space_hard
1053
- [\s]+ {
1054
- def details
1055
- {}
1081
+
1082
+ rule multiplication_sign
1083
+ "×" {
1084
+ def value
1085
+ text_value
1056
1086
  end
1057
1087
  }
1058
1088
  end
1059
- end
1089
+
1090
+ rule space
1091
+ [\s]*
1092
+ end
1093
+
1094
+ rule space_hard
1095
+ [\s]+
1096
+ end
1097
+
1098
+ end