biodiversity 3.5.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +9 -6
  4. data/.ruby-version +1 -1
  5. data/.travis.yml +1 -6
  6. data/CHANGELOG +3 -0
  7. data/Gemfile +2 -0
  8. data/README.md +37 -178
  9. data/Rakefile +15 -48
  10. data/biodiversity.gemspec +18 -21
  11. data/clib/linux/libgnparser.h +93 -0
  12. data/clib/linux/libgnparser.so +0 -0
  13. data/clib/mac/libgnparser.h +93 -0
  14. data/clib/mac/libgnparser.so +0 -0
  15. data/lib/biodiversity.rb +4 -9
  16. data/lib/biodiversity/parser.rb +65 -281
  17. data/lib/biodiversity/version.rb +8 -1
  18. data/spec/lib/biodiversity_spec.rb +9 -0
  19. data/spec/lib/parser_spec.rb +38 -0
  20. data/spec/spec_helper.rb +4 -81
  21. metadata +27 -102
  22. data/.byebug_history +0 -18
  23. data/.document +0 -5
  24. data/examples/socket_client.rb +0 -25
  25. data/lib/biodiversity/guid.rb +0 -1
  26. data/lib/biodiversity/guid/lsid.rb +0 -16
  27. data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -528
  28. data/lib/biodiversity/parser/scientific_name_canonical.treetop +0 -120
  29. data/lib/biodiversity/parser/scientific_name_clean.rb +0 -8991
  30. data/lib/biodiversity/parser/scientific_name_clean.treetop +0 -1632
  31. data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1298
  32. data/lib/biodiversity/parser/scientific_name_dirty.treetop +0 -264
  33. data/spec/biodiversity_spec.rb +0 -11
  34. data/spec/files/test_data.txt +0 -490
  35. data/spec/files/todo.txt +0 -55
  36. data/spec/guid/lsid.spec.rb +0 -15
  37. data/spec/parser/scientific_name_canonical_spec.rb +0 -36
  38. data/spec/parser/scientific_name_clean_spec.rb +0 -1137
  39. data/spec/parser/scientific_name_dirty_spec.rb +0 -165
  40. data/spec/parser/scientific_name_spec.rb +0 -193
@@ -1,1632 +0,0 @@
1
- # encoding: UTF-8
2
- require "unicode_utils"
3
-
4
- grammar ScientificNameClean
5
-
6
- rule root
7
- space a:scientific_name_5 space {
8
- def value
9
- a.value.gsub(/\s{2,}/, " ").strip
10
- end
11
-
12
- def canonical
13
- a.canonical.gsub(/\s{2,}/, " ").strip
14
- end
15
-
16
- def pos
17
- a.pos
18
- end
19
-
20
- def hybrid
21
- a.hybrid
22
- end
23
-
24
- def details
25
- a.details.class == Array ? a.details : [a.details]
26
- end
27
-
28
- def parser_run
29
- 1
30
- end
31
- }
32
- end
33
-
34
- rule scientific_name_5
35
- a:multinomial_name space_hard hybrid_character space_hard b:species {
36
- def value
37
- a.value + " × " + b.value
38
- end
39
-
40
- def canonical
41
- a.canonical + " × " + b.canonical
42
- end
43
-
44
- def pos
45
- a.pos.merge(b.pos)
46
- end
47
-
48
- def hybrid
49
- true
50
- end
51
-
52
- def details
53
- [a.details, b.details.merge({:genus => a.details[:genus]})]
54
- end
55
- }
56
- /
57
- a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
58
- def value
59
- a.value + " " + b.apply(c)
60
- end
61
-
62
- def canonical
63
- a.canonical
64
- end
65
-
66
- def pos
67
- a.pos.merge(c.pos)
68
- end
69
-
70
- def hybrid
71
- a.hybrid
72
- end
73
-
74
- def details
75
- a.details.merge(b.details(c))
76
- end
77
- }
78
- /
79
- scientific_name_4
80
- end
81
-
82
- rule scientific_name_4
83
- a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
84
- def value
85
- a.value + " × " + b.value
86
- end
87
-
88
- def canonical
89
- a.canonical + " × " + b.canonical
90
- end
91
-
92
- def pos
93
- a.pos.merge(b.pos)
94
- end
95
-
96
- def hybrid
97
- true
98
- end
99
-
100
- def details
101
- [a.details, b.details]
102
- end
103
- }
104
- /
105
- a:scientific_name_1 space hybrid_character space [\?]? {
106
- def value
107
- a.value + " × ?"
108
- end
109
-
110
- def canonical
111
- a.canonical
112
- end
113
-
114
- def pos
115
- a.pos
116
- end
117
-
118
- def hybrid
119
- true
120
- end
121
-
122
- def details
123
- [a.details, "?"]
124
- end
125
- }
126
- /
127
- scientific_name_3
128
- end
129
-
130
- rule scientific_name_3
131
- a:hybrid_character space b:scientific_name_2 {
132
- def value
133
- a.value + " " + b.value
134
- end
135
-
136
- def canonical
137
- b.canonical
138
- end
139
-
140
- def pos
141
- b.pos
142
- end
143
-
144
- def hybrid
145
- true
146
- end
147
-
148
- def details
149
- b.details
150
- end
151
- }
152
- /
153
- scientific_name_2
154
- end
155
-
156
- rule scientific_name_2
157
- a:scientific_name_1 space b:status_part {
158
- def value
159
- a.value + " " + b.value
160
- end
161
-
162
- def canonical
163
- a.canonical
164
- end
165
-
166
- def pos
167
- a.pos
168
- end
169
-
170
- def hybrid
171
- a.hybrid rescue false
172
- end
173
-
174
- def details
175
- a.details.merge(b.details)
176
- end
177
- }
178
- /
179
- scientific_name_1
180
- end
181
-
182
- rule scientific_name_1
183
- multiuninomial_name
184
- /
185
- multinomial_name
186
- /
187
- uninomial_name
188
- end
189
-
190
-
191
- rule status_part
192
- a:status_word space b:status_part {
193
- def value
194
- a.value + " " + b.value
195
- end
196
- def details
197
- {:status => value}
198
- end
199
- }
200
- /
201
- status_word
202
- end
203
-
204
- rule status_word
205
- latin_word [\.] {
206
- def value
207
- text_value.strip
208
- end
209
- def details
210
- {:status => value}
211
- end
212
- }
213
- #/
214
- #latin_word
215
- end
216
-
217
- rule unparsed
218
- .+ space {
219
-
220
- def value
221
- ""
222
- end
223
-
224
- def hybrid
225
- false
226
- end
227
-
228
- def canonical
229
- ""
230
- end
231
-
232
- def pos
233
- {interval.begin => ["unparsed", interval.end]}
234
- end
235
-
236
- def details
237
- {:unparsed => text_value}
238
- end
239
- }
240
- end
241
-
242
- rule multinomial_name
243
- a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
244
- def value
245
- a.value + " " + b.value + " " + c.value + " " + d.value
246
- end
247
-
248
- def canonical
249
- a.canonical + " " + c.canonical + " " + d.canonical
250
- end
251
-
252
- def pos
253
- a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
254
- end
255
-
256
- def hybrid
257
- c.hybrid rescue false
258
- end
259
-
260
- def details
261
- a.details.merge(b.details).merge(c.details).merge(d.details)
262
- end
263
- }
264
- /
265
- a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
266
- def value
267
- a.value + " " + b.value + " " + c.value + " " + d.value
268
- end
269
-
270
- def canonical
271
- a.canonical + " " + c.canonical + " " + d.canonical
272
- end
273
-
274
- def pos
275
- a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
276
- end
277
-
278
- def hybrid
279
- c.hybrid rescue false
280
- end
281
-
282
- def details
283
- a.details.merge(b.details).merge(c.details).merge(d.details)
284
- end
285
- }
286
- /
287
- a:genus space b:infragenus space aid:annotation_identification? space c:species {
288
- def value
289
- if defined? aid.apply
290
- a.value + " " + b.value + aid.apply(c)
291
- else
292
- a.value + " " + b.value + " " + c.value
293
- end
294
- end
295
-
296
- def canonical
297
- if defined? aid.apply
298
- a.canonical + aid.canonical(c)
299
- else
300
- a.canonical + " " + c.canonical
301
- end
302
- end
303
-
304
- def pos
305
- if defined? aid.apply
306
- a.pos.merge(b.pos).merge(aid.pos(c))
307
- else
308
- a.pos.merge(b.pos).merge(c.pos)
309
- end
310
- end
311
-
312
- def hybrid
313
- c.hybrid rescue false
314
- end
315
-
316
- def details
317
- if defined? aid.apply
318
- a.details.merge(b.details).merge(aid.apply(c))
319
- else
320
- a.details.merge(b.details).merge(c.details)
321
- end
322
- end
323
- }
324
- /
325
- a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
326
- def value
327
- a.value + " " + b.value + " " + c.value
328
- end
329
-
330
- def canonical
331
- a.canonical + " " + b.canonical + " " + c.canonical
332
- end
333
-
334
- def pos
335
- a.pos.merge(b.pos).merge(c.pos)
336
- end
337
-
338
- def hybrid
339
- b.hybrid rescue false
340
- end
341
-
342
- def details
343
- a.details.merge(b.details).merge(c.details)
344
- end
345
- }
346
- /
347
- a:genus space aid:annotation_identification? space b:species {
348
- def value
349
- if defined? aid.apply
350
- a.value + aid.apply(b)
351
- else
352
- a.value + " " + b.value
353
- end
354
- end
355
-
356
- def canonical
357
- if defined? aid.apply
358
- a.canonical + aid.canonical(b)
359
- else
360
- a.canonical + " " + b.canonical
361
- end
362
- end
363
-
364
- def pos
365
- if defined? aid.apply
366
- a.pos.merge(aid.pos(b))
367
- else
368
- a.pos.merge(b.pos)
369
- end
370
- end
371
-
372
- def hybrid
373
- b.hybrid rescue false
374
- end
375
-
376
- def details
377
- if defined? aid.apply
378
- a.details.merge(aid.details(b))
379
- else
380
- a.details.merge(b.details)
381
- end
382
- end
383
- }
384
- /
385
- a:genus space aid:annotation_identification space b:unparsed {
386
- def value
387
- a.value + aid.apply(b)
388
- end
389
-
390
- def canonical
391
- a.canonical + aid.canonical(b)
392
- end
393
-
394
- def pos
395
- a.pos.merge(aid.pos(b))
396
- end
397
-
398
- def hybrid
399
- false
400
- end
401
-
402
- def details
403
- a.details.merge(aid.details(b))
404
- end
405
- }
406
- end
407
-
408
- rule multiuninomial_name
409
- a:uninomial_name space b:rank_uninomial space c:uninomial_name {
410
-
411
- def value
412
- a.value + " " + b.value + " " + c.value
413
- end
414
-
415
- def canonical
416
- a.canonical
417
- end
418
-
419
- def hybrid
420
- false
421
- end
422
-
423
- def pos
424
- a.pos.merge(b.pos(c))
425
- end
426
-
427
- def details
428
- a.details.merge(b.details(c))
429
- end
430
- }
431
- end
432
-
433
- rule infraspecies_mult
434
- a:infraspecies space b:infraspecies_mult {
435
- def value
436
- a.value + " " + b.value
437
- end
438
-
439
- def canonical
440
- a.canonical + " " + b.canonical
441
- end
442
-
443
- def pos
444
- a.pos.merge(b.pos)
445
- end
446
-
447
- def details
448
- a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
449
- b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
450
- a.details.merge({:infraspecies => a_array + b_array})
451
- end
452
- }
453
- /
454
- infraspecies {
455
- def details
456
- if super[:annotation_identification]
457
- {:infraspecies => [{:annotation_identification => super[:annotation_identification], :ignored => super[:ignored]}]}
458
- else
459
- {:infraspecies => [super[:infraspecies]]}
460
- end
461
- end
462
- }
463
- end
464
-
465
- rule infraspecies
466
- a:infraspecies_string space b:authorship {
467
- def value
468
- a.value + " " + b.value
469
- end
470
-
471
- def canonical
472
- a.canonical
473
- end
474
-
475
- def pos
476
- a.pos.merge(b.pos)
477
- end
478
-
479
- def details
480
- {:infraspecies => a.details[:infraspecies].merge(b.details)}
481
- end
482
- }
483
- /
484
- infraspecies_string
485
- end
486
-
487
- rule infraspecies_string
488
- sel:rank space a:species_word {
489
- def value
490
- sel.apply(a)
491
- end
492
- def canonical
493
- sel.canonical(a)
494
- end
495
-
496
- def pos
497
- sel.pos(a)
498
- end
499
-
500
- def details
501
- sel.details(a)
502
- end
503
- }
504
- /
505
- aid:annotation_identification space a:species_word ![\.] {
506
- def value
507
- aid.apply(a)
508
- end
509
-
510
- def canonical
511
- aid.canonical(a)
512
- end
513
-
514
- def pos
515
- def a.pos
516
- {interval.begin => ["infraspecies", a.interval.end]}
517
- end
518
- aid.pos(a)
519
- end
520
-
521
- def details
522
- def a.details
523
- {:infraspecies => {:string => value, :rank => "n/a"}}
524
- end
525
- aid.details(a)
526
- end
527
- }
528
- /
529
- a:species_word ![\.] {
530
- def value
531
- a.value
532
- end
533
-
534
- def canonical
535
- value
536
- end
537
-
538
- def pos
539
- {interval.begin => ["infraspecies", interval.end]}
540
- end
541
-
542
- def details
543
- {:infraspecies => {:string => value, :rank => "n/a"}}
544
- end
545
- }
546
- end
547
-
548
- rule taxon_concept_rank
549
- ("sec."/"sensu.") {
550
- def value
551
- "sec."
552
- end
553
- def apply(a)
554
- " " + value + " " + a.value
555
- end
556
- def details(a = nil)
557
- {:taxon_concept => a.details}
558
- end
559
- }
560
- end
561
-
562
- rule rank
563
- ("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"nvar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma."/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
564
- {
565
- def value
566
- text_value.strip
567
- end
568
-
569
- def apply(a)
570
- " " + text_value.strip + " " + a.value
571
- end
572
-
573
- def canonical(a)
574
- " " + a.value
575
- end
576
-
577
- def pos(a)
578
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
579
- {interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
580
- end
581
-
582
- def details(a = nil)
583
- {:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
584
- end
585
- }
586
- end
587
-
588
- rule rank_uninomial
589
- ("sect."/"sect "/"subsect."/"subsect "/"trib."/"trib "/"subtrib."/"subtrib "/"ser."/"ser "/"subgen."/"subgen "/"fam."/"fam "/"subfam."/"subfam "/"supertrib."/"supertrib ") {
590
- def value
591
- text_value.strip
592
- end
593
-
594
- def pos(uni)
595
- {interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
596
- end
597
-
598
- def details(uni)
599
- {:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
600
- end
601
- }
602
- end
603
-
604
- rule species
605
- a:species_string space b:authorship {
606
- def value
607
- a.value + " " + b.value
608
- end
609
-
610
- def canonical
611
- a.canonical
612
- end
613
-
614
- def hybrid
615
- a.hybrid rescue false
616
- end
617
-
618
- def pos
619
- a.pos.merge(b.pos)
620
- end
621
-
622
- def details
623
- {:species => a.details[:species].merge(b.details)}
624
- end
625
- }
626
- /
627
- species_string
628
- end
629
-
630
- rule species_string
631
- species_word {
632
- def canonical
633
- value
634
- end
635
-
636
- def pos
637
- {interval.begin => ["species", interval.end]}
638
- end
639
-
640
- def hybrid
641
- false
642
- end
643
-
644
- def details
645
- {:species => {:string => value}}
646
- end
647
- }
648
- /
649
- species_word_hybrid
650
- end
651
-
652
- rule infragenus
653
- left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
654
- def value
655
- "(" + a.value + ")"
656
- end
657
-
658
- def canonical
659
- a.value
660
- end
661
-
662
- def pos
663
- {a.interval.begin => ["infragenus", a.interval.end]}
664
- end
665
-
666
- def details
667
- {:infragenus => {:string => a.value}}
668
- end
669
- }
670
- end
671
-
672
- rule genus
673
- a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
674
- def value
675
- a.value
676
- end
677
-
678
- def pos
679
- {a.interval.begin => ["genus", a.interval.end]}
680
- end
681
-
682
- def canonical
683
- a.value
684
- end
685
-
686
- def details
687
- {:genus => {:string => a.value}}
688
- end
689
- }
690
- end
691
-
692
- rule abbreviated_genus
693
- [A-Z] [a-z]? [a-z]? [\\.] space {
694
- def value
695
- text_value.strip
696
- end
697
-
698
- def canonical
699
- value
700
- end
701
-
702
- def pos
703
- {interval.begin => ["abbreviated_genus", interval.end]}
704
- end
705
-
706
- def details
707
- {:abbreviated_genus => {:string => value}}
708
- end
709
- }
710
- end
711
-
712
- rule uninomial_name
713
- a:uninomial_string space b:infragenus space c:simple_authorship {
714
- def value
715
- a.value + " " + b.value + " " + c.value
716
- end
717
-
718
- def canonical
719
- a.canonical
720
- end
721
-
722
- def pos
723
- a.pos.merge(b.pos).merge(c.pos)
724
- end
725
-
726
- def hybrid
727
- false
728
- end
729
-
730
- def details
731
- {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
732
- end
733
- }
734
- /
735
- a:uninomial_string space b:infragenus {
736
- def value
737
- a.value + " " + b.value
738
- end
739
-
740
- def canonical
741
- a.canonical
742
- end
743
-
744
- def pos
745
- a.pos.merge(b.pos)
746
- end
747
-
748
- def hybrid
749
- false
750
- end
751
-
752
- def details
753
- {:uninomial => a.details[:uninomial].merge(b.details)}
754
- end
755
- }
756
- /
757
- a:uninomial_string space_hard b:authorship {
758
- def value
759
- a.value + " " + b.value
760
- end
761
-
762
- def canonical
763
- a.canonical
764
- end
765
-
766
- def pos
767
- a.pos.merge(b.pos)
768
- end
769
-
770
- def hybrid
771
- false
772
- end
773
-
774
- def details
775
- {:uninomial => a.details[:uninomial].merge(b.details)}
776
- end
777
- }
778
- /
779
- uninomial_string
780
- end
781
-
782
- rule uninomial_string
783
- (cap_latin_word_pair/cap_latin_word) {
784
- def canonical
785
- value
786
- end
787
-
788
- def pos
789
- {interval.begin => ["uninomial", interval.end]}
790
- end
791
-
792
- def hybrid
793
- false
794
- end
795
-
796
- def details
797
- {:uninomial => {:string => value}}
798
- end
799
- }
800
- end
801
-
802
- rule authorship
803
- a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
804
- def value
805
- a.value + " " + b.value + " " + c.value
806
- end
807
-
808
- def pos
809
- a.pos.merge(b.pos).merge(c.pos)
810
- end
811
-
812
- def details
813
- val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
814
- val[:combinationAuthorTeam].merge!(c.details)
815
- val
816
- end
817
- }
818
- /
819
- a:basionym_authorship_with_parenthesis space b:simple_authorship space c:emend_authorship {
820
- def value
821
- a.value + " " + b.value + " " + c.value
822
- end
823
-
824
- def pos
825
- a.pos.merge(b.pos).merge(c.pos)
826
- end
827
-
828
- def details
829
- val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
830
- val[:combinationAuthorTeam].merge!(c.details)
831
- val
832
- end
833
- }
834
- /
835
- a:basionym_authorship_with_parenthesis space b:simple_authorship {
836
- def value
837
- a.value + " " + b.value
838
- end
839
-
840
- def pos
841
- a.pos.merge(b.pos)
842
- end
843
-
844
- def details
845
- {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
846
- end
847
- }
848
- /
849
- basionym_authorship_with_parenthesis
850
- /
851
- a:simple_authorship ","? space b:ex_authorship {
852
- def value
853
- a.value + " " + b.value
854
- end
855
-
856
- def pos
857
- a.pos.merge(b.pos)
858
- end
859
-
860
- def details
861
- val = a.details
862
- val[:authorship] = text_value.strip
863
- val[:basionymAuthorTeam].merge!(b.details)
864
- val
865
- end
866
- }
867
- /
868
- a:simple_authorship space b:emend_authorship {
869
- def value
870
- a.value + " " + b.value
871
- end
872
-
873
- def pos
874
- a.pos.merge(b.pos)
875
- end
876
-
877
- def details
878
- val = a.details
879
- val[:authorship] = text_value.strip
880
- val[:basionymAuthorTeam].merge!(b.details)
881
- val
882
- end
883
- }
884
- /
885
- simple_authorship
886
- end
887
-
888
-
889
- rule basionym_authorship_with_parenthesis
890
- left_paren space a:authors_names space right_paren space [,]? space b:year {
891
- def value
892
- "(" + a.value + " " + b.value + ")"
893
- end
894
-
895
- def pos
896
- a.pos.merge(b.pos)
897
- end
898
-
899
- def details
900
- { :authorship => text_value,
901
- :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
902
- }
903
- end
904
- }
905
- /
906
- left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
907
- def value
908
- "(" + a.value + " " + b.value + ")"
909
- end
910
-
911
- def pos
912
- a.pos.merge(b.pos)
913
- end
914
-
915
- def details
916
- val = a.details
917
- val[:basionymAuthorTeam].merge!(b.details)
918
- val[:authorship] = text_value.strip
919
- val
920
- end
921
- }
922
- /
923
- left_paren space a:simple_authorship space b:emend_authorship space right_paren {
924
- def value
925
- "(" + a.value + " " + b.value + ")"
926
- end
927
-
928
- def pos
929
- a.pos.merge(b.pos)
930
- end
931
-
932
- def details
933
- val = a.details
934
- val[:basionymAuthorTeam].merge!(b.details)
935
- val[:authorship] = text_value.strip
936
- val
937
- end
938
- }
939
- /
940
- left_paren space a:simple_authorship space right_paren {
941
- def value
942
- "(" + a.value + ")"
943
- end
944
-
945
- def pos
946
- a.pos
947
- end
948
-
949
- def details
950
- val = a.details
951
- val[:authorship] = text_value
952
- val
953
- end
954
- }
955
- /
956
- left_paren space a:"?" space right_paren {
957
- def value
958
- "(?)"
959
- end
960
-
961
- def pos
962
- {a.interval.begin => ["unknown_author", a.interval.end]}
963
- end
964
-
965
- def details
966
- {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
967
- end
968
- }
969
- end
970
-
971
- rule ex_authorship
972
- ex_sep space b:simple_authorship space ex_sep space c:simple_authorship {
973
- def value
974
- " ex " + b.value + " ex " + c.value
975
- end
976
-
977
- def pos
978
- b.pos
979
- end
980
-
981
- def details
982
- val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
983
- val
984
- end
985
- }
986
- /
987
- ex_sep space b:simple_authorship {
988
- def value
989
- " ex " + b.value
990
- end
991
-
992
- def pos
993
- b.pos
994
- end
995
-
996
- def details
997
- val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
998
- val
999
- end
1000
- }
1001
- end
1002
-
1003
- rule emend_authorship
1004
- emend space b:simple_authorship {
1005
- def value
1006
- " emend. " + b.value
1007
- end
1008
-
1009
- def pos
1010
- b.pos
1011
- end
1012
-
1013
- def details
1014
- val = {:emendAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
1015
- val
1016
- end
1017
- }
1018
- end
1019
-
1020
- rule simple_authorship
1021
- a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
1022
- def value
1023
- a.value + " " + b.value
1024
- end
1025
-
1026
- def pos
1027
- a.pos.merge(b.pos)
1028
- end
1029
-
1030
- def details
1031
- details_with_arg(:basionymAuthorTeam)
1032
- end
1033
-
1034
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1035
- { :authorship => text_value,
1036
- authorTeamType.to_sym => {
1037
- :authorTeam => a.text_value.strip
1038
- }.merge(a.details).merge(b.details)
1039
- }
1040
- end
1041
- }
1042
- /
1043
- a:authors_names space [,]? space b:year {
1044
- def value
1045
- a.value + " " + b.value
1046
- end
1047
-
1048
- def pos
1049
- a.pos.merge(b.pos)
1050
- end
1051
-
1052
- def details
1053
- details_with_arg(:basionymAuthorTeam)
1054
- end
1055
-
1056
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1057
- { :authorship => text_value,
1058
- authorTeamType.to_sym => {
1059
- :authorTeam => a.text_value.strip
1060
- }.merge(a.details).merge(b.details)
1061
- }
1062
- end
1063
- }
1064
- /
1065
- authors_names {
1066
- def details
1067
- details = details_with_arg(:basionymAuthorTeam)
1068
- details[:basionymAuthorTeam].merge!(super)
1069
- details
1070
- end
1071
-
1072
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1073
- { :authorship => text_value,
1074
- authorTeamType.to_sym => {
1075
- :authorTeam => text_value,
1076
- }
1077
- }
1078
- end
1079
- }
1080
- end
1081
-
1082
- rule authors_names
1083
- a:author_name space sep:author_separator space b:authors_names {
1084
- def value
1085
- sep.apply(a,b)
1086
- end
1087
-
1088
- def pos
1089
- sep.pos(a,b)
1090
- end
1091
-
1092
- def details
1093
- sep.details(a,b)
1094
- end
1095
- }
1096
- /
1097
- author_name
1098
- /
1099
- unknown_auth
1100
- end
1101
-
1102
-
1103
- rule unknown_auth
1104
- ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
1105
- def value
1106
- text_value
1107
- end
1108
-
1109
- def pos
1110
- {interval.begin => ["unknown_author", interval.end]}
1111
- end
1112
-
1113
- def details
1114
- {:author => ["unknown"]}
1115
- end
1116
- }
1117
- end
1118
-
1119
- rule ex_sep
1120
- ("ex"/"in") &[\s]
1121
- end
1122
-
1123
- rule emend
1124
- ("emend."/"emend") &[\s]
1125
- end
1126
-
1127
- rule author_separator
1128
- ("&"/"&"/",&"/", &"/",and"/", and"/"apud"/"and"/"et"/",") {
1129
- def apply(a,b)
1130
- sep = text_value.strip
1131
- sep = " &" if ["&", "&", "and", "et",
1132
- ",&", ", &", ",and", ", and"].include? sep
1133
- sep = " apud" if sep == "apud"
1134
- a.value + sep + " " + b.value
1135
- end
1136
-
1137
- def pos(a,b)
1138
- a.pos.merge(b.pos)
1139
- end
1140
-
1141
- def details(a,b)
1142
- {:author => a.details[:author] + b.details[:author]}
1143
- end
1144
- }
1145
- end
1146
-
1147
- rule author_name
1148
- space a:author_name_without_postfix space b:author_maybe_filius space !latin_word {
1149
- def value
1150
- a.value + " " + b.value
1151
- end
1152
-
1153
- def pos
1154
- a.pos.merge(b.pos)
1155
- end
1156
-
1157
- def details
1158
- {:author => [value]}
1159
- end
1160
- }
1161
- /
1162
- space a:author_name_without_postfix space b:author_filius {
1163
- def value
1164
- a.value + " " + b.value
1165
- end
1166
-
1167
- def pos
1168
- a.pos.merge(b.pos)
1169
- end
1170
-
1171
- def details
1172
- {:author => [value]}
1173
- end
1174
- }
1175
- /
1176
- author_name_without_postfix
1177
- end
1178
-
1179
- rule author_name_without_postfix
1180
- space a:author_prefix_word space b:author_name {
1181
- def value
1182
- a.value + " " + b.value
1183
- end
1184
-
1185
- def pos
1186
- a.pos.merge(b.pos)
1187
- end
1188
-
1189
- def details
1190
- {:author => [value]}
1191
- end
1192
- }
1193
- /
1194
- a:author_word space b:author_name {
1195
- def value
1196
- a.value + " " + b.value
1197
- end
1198
-
1199
- def pos
1200
- a.pos.merge(b.pos)
1201
- end
1202
-
1203
- def details
1204
- {:author => [value]}
1205
- end
1206
- }
1207
- /
1208
- author_word
1209
- end
1210
-
1211
- rule author_word
1212
- "A S. Xu" {
1213
- def value
1214
- text_value.strip
1215
- end
1216
-
1217
- def pos
1218
- {interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
1219
- end
1220
-
1221
- def details
1222
- {:author => [value]}
1223
- end
1224
- }
1225
- /
1226
- ("arg."/"et al.\{\?\}"/"et al."/"et al"/"& al."/"& al") {
1227
- def value
1228
- text_value.strip
1229
- end
1230
-
1231
- def pos
1232
- #cheating because there are several words in some of them
1233
- {interval.begin => ["author_word", interval.end]}
1234
- end
1235
-
1236
- def details
1237
- {:author => [value]}
1238
- end
1239
- }
1240
- /
1241
- ("d" ['’])? ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
1242
- def value
1243
- text_value.gsub(/([\p{Lu}]{3,})/) do |match|
1244
- UnicodeUtils.titlecase(match)
1245
- end
1246
- end
1247
-
1248
- def pos
1249
- {interval.begin => ["author_word", interval.end]}
1250
- end
1251
-
1252
- def details
1253
- {:author => [value]}
1254
- end
1255
- }
1256
- /
1257
- "X" [^0-9\[\]\(\)\s&,]+ {
1258
- def value
1259
- text_value
1260
- end
1261
-
1262
- def pos
1263
- {interval.begin => ["author_word", interval.end]}
1264
- end
1265
-
1266
- def details
1267
- {:author => [value]}
1268
- end
1269
- }
1270
- /
1271
- author_prefix_word
1272
- end
1273
-
1274
- rule author_prefix_word
1275
- space ("ab"/"af"/"bis"/"da"/"der"/"del"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"do"/"la"/"'t"/"ter"/"van"/"von") &space_hard {
1276
- def value
1277
- text_value
1278
- end
1279
-
1280
- def pos
1281
- #cheating because there are several words in some of them
1282
- {interval.begin => ["author_word", interval.end]}
1283
- end
1284
- }
1285
- end
1286
-
1287
- rule author_filius
1288
- ("fil."/"filius") {
1289
- def value
1290
- text_value.strip
1291
- end
1292
-
1293
- def pos
1294
- {interval.begin => ["author_word", interval.end]}
1295
- end
1296
- }
1297
- end
1298
-
1299
- rule author_maybe_filius
1300
- "f." {
1301
- def value
1302
- text_value.strip
1303
- end
1304
-
1305
- def pos
1306
- {interval.begin => ["author_word", interval.end]}
1307
- end
1308
- }
1309
- end
1310
-
1311
- rule cap_latin_word_pair
1312
- a:cap_latin_word "-" b:cap_latin_word {
1313
- def value
1314
- a.value + b.value.downcase
1315
- end
1316
- }
1317
- end
1318
-
1319
- rule cap_latin_word
1320
- a:([A-Z]/cap_digraph) b:latin_word "?" {
1321
- def value
1322
- (a.value rescue a.text_value) + b.value
1323
- end
1324
- }
1325
- /
1326
- a:([A-Z]/cap_digraph) b:latin_word {
1327
- def value
1328
- (a.value rescue a.text_value) + b.value
1329
- end
1330
- }
1331
- /
1332
- a:("AE"/"OE") b:latin_word {
1333
- def value
1334
- a.text_value[0..0] + "e" + b.value
1335
- end
1336
- }
1337
- /
1338
- ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
1339
- def value
1340
- text_value
1341
- end
1342
- }
1343
- end
1344
-
1345
- rule capped_dotted_char
1346
- [A-Z] "." {
1347
- def value
1348
- text_value
1349
- end
1350
- }
1351
- end
1352
-
1353
- rule species_word_hybrid
1354
- a:multiplication_sign space b:species_word {
1355
- def value
1356
- a.value + " " + b.value
1357
- end
1358
-
1359
- def canonical
1360
- b.value
1361
- end
1362
-
1363
- def hybrid
1364
- true
1365
- end
1366
-
1367
- def pos
1368
- {b.interval.begin => ["species", b.interval.end]}
1369
- end
1370
-
1371
- def details
1372
- {:species => {:string => b.value}}
1373
- end
1374
- }
1375
- /
1376
- a:"X" space b:species_word {
1377
- def value
1378
- "× " + b.value
1379
- end
1380
-
1381
- def canonical
1382
- b.value
1383
- end
1384
-
1385
- def hybrid
1386
- true
1387
- end
1388
-
1389
- def pos
1390
- {b.interval.begin => ["species", b.interval.end]}
1391
- end
1392
-
1393
- def details
1394
- {:species => {:string => b.value}}
1395
- end
1396
- }
1397
- /
1398
- a:"x" space_hard b:species_word {
1399
- def value
1400
- "× " + b.value
1401
- end
1402
-
1403
- def canonical
1404
- b.value
1405
- end
1406
-
1407
- def hybrid
1408
- true
1409
- end
1410
-
1411
- def pos
1412
- {b.interval.begin => ["species", b.interval.end]}
1413
- end
1414
-
1415
- def details
1416
- {:species => {:string => b.value}}
1417
- end
1418
- }
1419
- end
1420
-
1421
- rule annotation_identification
1422
- ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
1423
-
1424
- def value
1425
- text_value.strip
1426
- end
1427
-
1428
- def apply(sp)
1429
- ""
1430
- end
1431
-
1432
- def canonical(sp)
1433
- ""
1434
- end
1435
-
1436
- def pos(sp)
1437
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
1438
- {interval.begin => ["annotation_identification", interval.end]}
1439
- end
1440
-
1441
- def details(sp)
1442
- {:annotation_identification => value, :ignored => sp.details}
1443
- end
1444
- }
1445
- /
1446
- ("cf."/"cf ") {
1447
- def value
1448
- text_value.strip
1449
- end
1450
-
1451
- def apply(sp)
1452
- " " + value + " " + sp.value
1453
- end
1454
-
1455
- def canonical(sp)
1456
- " " + sp.canonical
1457
- end
1458
-
1459
- def pos(sp)
1460
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
1461
- {interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
1462
- end
1463
-
1464
- def details(sp)
1465
- {:annotation_identification => value, :species => sp.details}
1466
- end
1467
- }
1468
- end
1469
-
1470
- rule species_word
1471
- a:[0-9]+ "-"? b:latin_word {
1472
- def value
1473
- num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
1474
- a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
1475
- a_value + b.value
1476
- end
1477
- }
1478
- /
1479
- latin_word
1480
- end
1481
-
1482
- rule latin_word
1483
- a:valid_name_letters "-" b:latin_word {
1484
- def value
1485
- a.value + "-" + b.value
1486
- end
1487
- }
1488
- /
1489
- a:valid_name_letter "'" b:latin_word {
1490
- def value
1491
- a.value + b.value
1492
- end
1493
- }
1494
- /
1495
- a:valid_name_letter b:valid_name_letters {
1496
- def value
1497
- a.value + b.value
1498
- end
1499
- }
1500
- end
1501
-
1502
- rule valid_name_letters
1503
- [a-zëæœ]+ {
1504
- def value
1505
- res = ""
1506
- text_value.split("").each do |l|
1507
- l = "ae" if l == "æ"
1508
- l = "oe" if l == "œ"
1509
- # We normalize ë as well. It is legal in botanical code, but it
1510
- # is beneficial to normalize it for the reconsiliation purposes
1511
- l = "e" if l == "ë"
1512
- res << l
1513
- end
1514
- res
1515
- end
1516
- }
1517
- end
1518
-
1519
- rule valid_name_letter
1520
- [a-zëæœ] {
1521
- def value
1522
- res = text_value
1523
- res = "ae" if res == "æ"
1524
- res = "oe" if res == "œ"
1525
- res = "e" if res == "ë"
1526
- res
1527
- end
1528
- }
1529
- end
1530
-
1531
-
1532
- rule cap_digraph
1533
- "Æ" {
1534
- def value
1535
- "Ae"
1536
- end
1537
- }
1538
- /
1539
- "Œ" {
1540
- def value
1541
- "Oe"
1542
- end
1543
- }
1544
- end
1545
-
1546
- rule year
1547
- b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
1548
- def value
1549
- a.value
1550
- end
1551
-
1552
- def pos
1553
- a.pos
1554
- end
1555
-
1556
- def details
1557
- a.details
1558
- end
1559
- }
1560
- /
1561
- year_number_with_character
1562
- /
1563
- year_number
1564
- end
1565
-
1566
- rule year_number_with_character
1567
- a:year_number [a-zA-Z] {
1568
- def value
1569
- a.text_value
1570
- end
1571
-
1572
- def pos
1573
- {interval.begin => ["year", interval.end]}
1574
- end
1575
-
1576
- def details
1577
- {:year => value}
1578
- end
1579
- }
1580
- end
1581
-
1582
- rule year_number
1583
- [12] [7890] [0-9] ([0-9] [\?]?/"?") {
1584
- def value
1585
- text_value
1586
- end
1587
-
1588
- def pos
1589
- {interval.begin => ["year", interval.end]}
1590
- end
1591
-
1592
- def details
1593
- {:year => value}
1594
- end
1595
- }
1596
- end
1597
-
1598
- rule left_paren
1599
- "("
1600
- end
1601
-
1602
- rule right_paren
1603
- ")"
1604
- end
1605
-
1606
- rule hybrid_character
1607
- ("x"/"X") {
1608
- def value
1609
- "×"
1610
- end
1611
- }
1612
- /
1613
- multiplication_sign
1614
- end
1615
-
1616
- rule multiplication_sign
1617
- ("×"/"*") {
1618
- def value
1619
- "×"
1620
- end
1621
- }
1622
- end
1623
-
1624
- rule space
1625
- [\s]*
1626
- end
1627
-
1628
- rule space_hard
1629
- [\s]+
1630
- end
1631
-
1632
- end