biodiversity 3.5.1 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -0
  3. data/.rubocop.yml +9 -6
  4. data/.ruby-version +1 -1
  5. data/.travis.yml +1 -6
  6. data/CHANGELOG +3 -0
  7. data/Gemfile +2 -0
  8. data/README.md +37 -178
  9. data/Rakefile +15 -48
  10. data/biodiversity.gemspec +18 -21
  11. data/clib/linux/libgnparser.h +93 -0
  12. data/clib/linux/libgnparser.so +0 -0
  13. data/clib/mac/libgnparser.h +93 -0
  14. data/clib/mac/libgnparser.so +0 -0
  15. data/lib/biodiversity.rb +4 -9
  16. data/lib/biodiversity/parser.rb +65 -281
  17. data/lib/biodiversity/version.rb +8 -1
  18. data/spec/lib/biodiversity_spec.rb +9 -0
  19. data/spec/lib/parser_spec.rb +38 -0
  20. data/spec/spec_helper.rb +4 -81
  21. metadata +27 -102
  22. data/.byebug_history +0 -18
  23. data/.document +0 -5
  24. data/examples/socket_client.rb +0 -25
  25. data/lib/biodiversity/guid.rb +0 -1
  26. data/lib/biodiversity/guid/lsid.rb +0 -16
  27. data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -528
  28. data/lib/biodiversity/parser/scientific_name_canonical.treetop +0 -120
  29. data/lib/biodiversity/parser/scientific_name_clean.rb +0 -8991
  30. data/lib/biodiversity/parser/scientific_name_clean.treetop +0 -1632
  31. data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1298
  32. data/lib/biodiversity/parser/scientific_name_dirty.treetop +0 -264
  33. data/spec/biodiversity_spec.rb +0 -11
  34. data/spec/files/test_data.txt +0 -490
  35. data/spec/files/todo.txt +0 -55
  36. data/spec/guid/lsid.spec.rb +0 -15
  37. data/spec/parser/scientific_name_canonical_spec.rb +0 -36
  38. data/spec/parser/scientific_name_clean_spec.rb +0 -1137
  39. data/spec/parser/scientific_name_dirty_spec.rb +0 -165
  40. data/spec/parser/scientific_name_spec.rb +0 -193
@@ -1,1632 +0,0 @@
1
- # encoding: UTF-8
2
- require "unicode_utils"
3
-
4
- grammar ScientificNameClean
5
-
6
- rule root
7
- space a:scientific_name_5 space {
8
- def value
9
- a.value.gsub(/\s{2,}/, " ").strip
10
- end
11
-
12
- def canonical
13
- a.canonical.gsub(/\s{2,}/, " ").strip
14
- end
15
-
16
- def pos
17
- a.pos
18
- end
19
-
20
- def hybrid
21
- a.hybrid
22
- end
23
-
24
- def details
25
- a.details.class == Array ? a.details : [a.details]
26
- end
27
-
28
- def parser_run
29
- 1
30
- end
31
- }
32
- end
33
-
34
- rule scientific_name_5
35
- a:multinomial_name space_hard hybrid_character space_hard b:species {
36
- def value
37
- a.value + " × " + b.value
38
- end
39
-
40
- def canonical
41
- a.canonical + " × " + b.canonical
42
- end
43
-
44
- def pos
45
- a.pos.merge(b.pos)
46
- end
47
-
48
- def hybrid
49
- true
50
- end
51
-
52
- def details
53
- [a.details, b.details.merge({:genus => a.details[:genus]})]
54
- end
55
- }
56
- /
57
- a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
58
- def value
59
- a.value + " " + b.apply(c)
60
- end
61
-
62
- def canonical
63
- a.canonical
64
- end
65
-
66
- def pos
67
- a.pos.merge(c.pos)
68
- end
69
-
70
- def hybrid
71
- a.hybrid
72
- end
73
-
74
- def details
75
- a.details.merge(b.details(c))
76
- end
77
- }
78
- /
79
- scientific_name_4
80
- end
81
-
82
- rule scientific_name_4
83
- a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
84
- def value
85
- a.value + " × " + b.value
86
- end
87
-
88
- def canonical
89
- a.canonical + " × " + b.canonical
90
- end
91
-
92
- def pos
93
- a.pos.merge(b.pos)
94
- end
95
-
96
- def hybrid
97
- true
98
- end
99
-
100
- def details
101
- [a.details, b.details]
102
- end
103
- }
104
- /
105
- a:scientific_name_1 space hybrid_character space [\?]? {
106
- def value
107
- a.value + " × ?"
108
- end
109
-
110
- def canonical
111
- a.canonical
112
- end
113
-
114
- def pos
115
- a.pos
116
- end
117
-
118
- def hybrid
119
- true
120
- end
121
-
122
- def details
123
- [a.details, "?"]
124
- end
125
- }
126
- /
127
- scientific_name_3
128
- end
129
-
130
- rule scientific_name_3
131
- a:hybrid_character space b:scientific_name_2 {
132
- def value
133
- a.value + " " + b.value
134
- end
135
-
136
- def canonical
137
- b.canonical
138
- end
139
-
140
- def pos
141
- b.pos
142
- end
143
-
144
- def hybrid
145
- true
146
- end
147
-
148
- def details
149
- b.details
150
- end
151
- }
152
- /
153
- scientific_name_2
154
- end
155
-
156
- rule scientific_name_2
157
- a:scientific_name_1 space b:status_part {
158
- def value
159
- a.value + " " + b.value
160
- end
161
-
162
- def canonical
163
- a.canonical
164
- end
165
-
166
- def pos
167
- a.pos
168
- end
169
-
170
- def hybrid
171
- a.hybrid rescue false
172
- end
173
-
174
- def details
175
- a.details.merge(b.details)
176
- end
177
- }
178
- /
179
- scientific_name_1
180
- end
181
-
182
- rule scientific_name_1
183
- multiuninomial_name
184
- /
185
- multinomial_name
186
- /
187
- uninomial_name
188
- end
189
-
190
-
191
- rule status_part
192
- a:status_word space b:status_part {
193
- def value
194
- a.value + " " + b.value
195
- end
196
- def details
197
- {:status => value}
198
- end
199
- }
200
- /
201
- status_word
202
- end
203
-
204
- rule status_word
205
- latin_word [\.] {
206
- def value
207
- text_value.strip
208
- end
209
- def details
210
- {:status => value}
211
- end
212
- }
213
- #/
214
- #latin_word
215
- end
216
-
217
- rule unparsed
218
- .+ space {
219
-
220
- def value
221
- ""
222
- end
223
-
224
- def hybrid
225
- false
226
- end
227
-
228
- def canonical
229
- ""
230
- end
231
-
232
- def pos
233
- {interval.begin => ["unparsed", interval.end]}
234
- end
235
-
236
- def details
237
- {:unparsed => text_value}
238
- end
239
- }
240
- end
241
-
242
- rule multinomial_name
243
- a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
244
- def value
245
- a.value + " " + b.value + " " + c.value + " " + d.value
246
- end
247
-
248
- def canonical
249
- a.canonical + " " + c.canonical + " " + d.canonical
250
- end
251
-
252
- def pos
253
- a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
254
- end
255
-
256
- def hybrid
257
- c.hybrid rescue false
258
- end
259
-
260
- def details
261
- a.details.merge(b.details).merge(c.details).merge(d.details)
262
- end
263
- }
264
- /
265
- a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
266
- def value
267
- a.value + " " + b.value + " " + c.value + " " + d.value
268
- end
269
-
270
- def canonical
271
- a.canonical + " " + c.canonical + " " + d.canonical
272
- end
273
-
274
- def pos
275
- a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
276
- end
277
-
278
- def hybrid
279
- c.hybrid rescue false
280
- end
281
-
282
- def details
283
- a.details.merge(b.details).merge(c.details).merge(d.details)
284
- end
285
- }
286
- /
287
- a:genus space b:infragenus space aid:annotation_identification? space c:species {
288
- def value
289
- if defined? aid.apply
290
- a.value + " " + b.value + aid.apply(c)
291
- else
292
- a.value + " " + b.value + " " + c.value
293
- end
294
- end
295
-
296
- def canonical
297
- if defined? aid.apply
298
- a.canonical + aid.canonical(c)
299
- else
300
- a.canonical + " " + c.canonical
301
- end
302
- end
303
-
304
- def pos
305
- if defined? aid.apply
306
- a.pos.merge(b.pos).merge(aid.pos(c))
307
- else
308
- a.pos.merge(b.pos).merge(c.pos)
309
- end
310
- end
311
-
312
- def hybrid
313
- c.hybrid rescue false
314
- end
315
-
316
- def details
317
- if defined? aid.apply
318
- a.details.merge(b.details).merge(aid.apply(c))
319
- else
320
- a.details.merge(b.details).merge(c.details)
321
- end
322
- end
323
- }
324
- /
325
- a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
326
- def value
327
- a.value + " " + b.value + " " + c.value
328
- end
329
-
330
- def canonical
331
- a.canonical + " " + b.canonical + " " + c.canonical
332
- end
333
-
334
- def pos
335
- a.pos.merge(b.pos).merge(c.pos)
336
- end
337
-
338
- def hybrid
339
- b.hybrid rescue false
340
- end
341
-
342
- def details
343
- a.details.merge(b.details).merge(c.details)
344
- end
345
- }
346
- /
347
- a:genus space aid:annotation_identification? space b:species {
348
- def value
349
- if defined? aid.apply
350
- a.value + aid.apply(b)
351
- else
352
- a.value + " " + b.value
353
- end
354
- end
355
-
356
- def canonical
357
- if defined? aid.apply
358
- a.canonical + aid.canonical(b)
359
- else
360
- a.canonical + " " + b.canonical
361
- end
362
- end
363
-
364
- def pos
365
- if defined? aid.apply
366
- a.pos.merge(aid.pos(b))
367
- else
368
- a.pos.merge(b.pos)
369
- end
370
- end
371
-
372
- def hybrid
373
- b.hybrid rescue false
374
- end
375
-
376
- def details
377
- if defined? aid.apply
378
- a.details.merge(aid.details(b))
379
- else
380
- a.details.merge(b.details)
381
- end
382
- end
383
- }
384
- /
385
- a:genus space aid:annotation_identification space b:unparsed {
386
- def value
387
- a.value + aid.apply(b)
388
- end
389
-
390
- def canonical
391
- a.canonical + aid.canonical(b)
392
- end
393
-
394
- def pos
395
- a.pos.merge(aid.pos(b))
396
- end
397
-
398
- def hybrid
399
- false
400
- end
401
-
402
- def details
403
- a.details.merge(aid.details(b))
404
- end
405
- }
406
- end
407
-
408
- rule multiuninomial_name
409
- a:uninomial_name space b:rank_uninomial space c:uninomial_name {
410
-
411
- def value
412
- a.value + " " + b.value + " " + c.value
413
- end
414
-
415
- def canonical
416
- a.canonical
417
- end
418
-
419
- def hybrid
420
- false
421
- end
422
-
423
- def pos
424
- a.pos.merge(b.pos(c))
425
- end
426
-
427
- def details
428
- a.details.merge(b.details(c))
429
- end
430
- }
431
- end
432
-
433
- rule infraspecies_mult
434
- a:infraspecies space b:infraspecies_mult {
435
- def value
436
- a.value + " " + b.value
437
- end
438
-
439
- def canonical
440
- a.canonical + " " + b.canonical
441
- end
442
-
443
- def pos
444
- a.pos.merge(b.pos)
445
- end
446
-
447
- def details
448
- a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
449
- b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
450
- a.details.merge({:infraspecies => a_array + b_array})
451
- end
452
- }
453
- /
454
- infraspecies {
455
- def details
456
- if super[:annotation_identification]
457
- {:infraspecies => [{:annotation_identification => super[:annotation_identification], :ignored => super[:ignored]}]}
458
- else
459
- {:infraspecies => [super[:infraspecies]]}
460
- end
461
- end
462
- }
463
- end
464
-
465
- rule infraspecies
466
- a:infraspecies_string space b:authorship {
467
- def value
468
- a.value + " " + b.value
469
- end
470
-
471
- def canonical
472
- a.canonical
473
- end
474
-
475
- def pos
476
- a.pos.merge(b.pos)
477
- end
478
-
479
- def details
480
- {:infraspecies => a.details[:infraspecies].merge(b.details)}
481
- end
482
- }
483
- /
484
- infraspecies_string
485
- end
486
-
487
- rule infraspecies_string
488
- sel:rank space a:species_word {
489
- def value
490
- sel.apply(a)
491
- end
492
- def canonical
493
- sel.canonical(a)
494
- end
495
-
496
- def pos
497
- sel.pos(a)
498
- end
499
-
500
- def details
501
- sel.details(a)
502
- end
503
- }
504
- /
505
- aid:annotation_identification space a:species_word ![\.] {
506
- def value
507
- aid.apply(a)
508
- end
509
-
510
- def canonical
511
- aid.canonical(a)
512
- end
513
-
514
- def pos
515
- def a.pos
516
- {interval.begin => ["infraspecies", a.interval.end]}
517
- end
518
- aid.pos(a)
519
- end
520
-
521
- def details
522
- def a.details
523
- {:infraspecies => {:string => value, :rank => "n/a"}}
524
- end
525
- aid.details(a)
526
- end
527
- }
528
- /
529
- a:species_word ![\.] {
530
- def value
531
- a.value
532
- end
533
-
534
- def canonical
535
- value
536
- end
537
-
538
- def pos
539
- {interval.begin => ["infraspecies", interval.end]}
540
- end
541
-
542
- def details
543
- {:infraspecies => {:string => value, :rank => "n/a"}}
544
- end
545
- }
546
- end
547
-
548
- rule taxon_concept_rank
549
- ("sec."/"sensu.") {
550
- def value
551
- "sec."
552
- end
553
- def apply(a)
554
- " " + value + " " + a.value
555
- end
556
- def details(a = nil)
557
- {:taxon_concept => a.details}
558
- end
559
- }
560
- end
561
-
562
- rule rank
563
- ("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"nvar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma."/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
564
- {
565
- def value
566
- text_value.strip
567
- end
568
-
569
- def apply(a)
570
- " " + text_value.strip + " " + a.value
571
- end
572
-
573
- def canonical(a)
574
- " " + a.value
575
- end
576
-
577
- def pos(a)
578
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
579
- {interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
580
- end
581
-
582
- def details(a = nil)
583
- {:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
584
- end
585
- }
586
- end
587
-
588
- rule rank_uninomial
589
- ("sect."/"sect "/"subsect."/"subsect "/"trib."/"trib "/"subtrib."/"subtrib "/"ser."/"ser "/"subgen."/"subgen "/"fam."/"fam "/"subfam."/"subfam "/"supertrib."/"supertrib ") {
590
- def value
591
- text_value.strip
592
- end
593
-
594
- def pos(uni)
595
- {interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
596
- end
597
-
598
- def details(uni)
599
- {:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
600
- end
601
- }
602
- end
603
-
604
- rule species
605
- a:species_string space b:authorship {
606
- def value
607
- a.value + " " + b.value
608
- end
609
-
610
- def canonical
611
- a.canonical
612
- end
613
-
614
- def hybrid
615
- a.hybrid rescue false
616
- end
617
-
618
- def pos
619
- a.pos.merge(b.pos)
620
- end
621
-
622
- def details
623
- {:species => a.details[:species].merge(b.details)}
624
- end
625
- }
626
- /
627
- species_string
628
- end
629
-
630
- rule species_string
631
- species_word {
632
- def canonical
633
- value
634
- end
635
-
636
- def pos
637
- {interval.begin => ["species", interval.end]}
638
- end
639
-
640
- def hybrid
641
- false
642
- end
643
-
644
- def details
645
- {:species => {:string => value}}
646
- end
647
- }
648
- /
649
- species_word_hybrid
650
- end
651
-
652
- rule infragenus
653
- left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
654
- def value
655
- "(" + a.value + ")"
656
- end
657
-
658
- def canonical
659
- a.value
660
- end
661
-
662
- def pos
663
- {a.interval.begin => ["infragenus", a.interval.end]}
664
- end
665
-
666
- def details
667
- {:infragenus => {:string => a.value}}
668
- end
669
- }
670
- end
671
-
672
- rule genus
673
- a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
674
- def value
675
- a.value
676
- end
677
-
678
- def pos
679
- {a.interval.begin => ["genus", a.interval.end]}
680
- end
681
-
682
- def canonical
683
- a.value
684
- end
685
-
686
- def details
687
- {:genus => {:string => a.value}}
688
- end
689
- }
690
- end
691
-
692
- rule abbreviated_genus
693
- [A-Z] [a-z]? [a-z]? [\\.] space {
694
- def value
695
- text_value.strip
696
- end
697
-
698
- def canonical
699
- value
700
- end
701
-
702
- def pos
703
- {interval.begin => ["abbreviated_genus", interval.end]}
704
- end
705
-
706
- def details
707
- {:abbreviated_genus => {:string => value}}
708
- end
709
- }
710
- end
711
-
712
- rule uninomial_name
713
- a:uninomial_string space b:infragenus space c:simple_authorship {
714
- def value
715
- a.value + " " + b.value + " " + c.value
716
- end
717
-
718
- def canonical
719
- a.canonical
720
- end
721
-
722
- def pos
723
- a.pos.merge(b.pos).merge(c.pos)
724
- end
725
-
726
- def hybrid
727
- false
728
- end
729
-
730
- def details
731
- {:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
732
- end
733
- }
734
- /
735
- a:uninomial_string space b:infragenus {
736
- def value
737
- a.value + " " + b.value
738
- end
739
-
740
- def canonical
741
- a.canonical
742
- end
743
-
744
- def pos
745
- a.pos.merge(b.pos)
746
- end
747
-
748
- def hybrid
749
- false
750
- end
751
-
752
- def details
753
- {:uninomial => a.details[:uninomial].merge(b.details)}
754
- end
755
- }
756
- /
757
- a:uninomial_string space_hard b:authorship {
758
- def value
759
- a.value + " " + b.value
760
- end
761
-
762
- def canonical
763
- a.canonical
764
- end
765
-
766
- def pos
767
- a.pos.merge(b.pos)
768
- end
769
-
770
- def hybrid
771
- false
772
- end
773
-
774
- def details
775
- {:uninomial => a.details[:uninomial].merge(b.details)}
776
- end
777
- }
778
- /
779
- uninomial_string
780
- end
781
-
782
- rule uninomial_string
783
- (cap_latin_word_pair/cap_latin_word) {
784
- def canonical
785
- value
786
- end
787
-
788
- def pos
789
- {interval.begin => ["uninomial", interval.end]}
790
- end
791
-
792
- def hybrid
793
- false
794
- end
795
-
796
- def details
797
- {:uninomial => {:string => value}}
798
- end
799
- }
800
- end
801
-
802
- rule authorship
803
- a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
804
- def value
805
- a.value + " " + b.value + " " + c.value
806
- end
807
-
808
- def pos
809
- a.pos.merge(b.pos).merge(c.pos)
810
- end
811
-
812
- def details
813
- val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
814
- val[:combinationAuthorTeam].merge!(c.details)
815
- val
816
- end
817
- }
818
- /
819
- a:basionym_authorship_with_parenthesis space b:simple_authorship space c:emend_authorship {
820
- def value
821
- a.value + " " + b.value + " " + c.value
822
- end
823
-
824
- def pos
825
- a.pos.merge(b.pos).merge(c.pos)
826
- end
827
-
828
- def details
829
- val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
830
- val[:combinationAuthorTeam].merge!(c.details)
831
- val
832
- end
833
- }
834
- /
835
- a:basionym_authorship_with_parenthesis space b:simple_authorship {
836
- def value
837
- a.value + " " + b.value
838
- end
839
-
840
- def pos
841
- a.pos.merge(b.pos)
842
- end
843
-
844
- def details
845
- {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
846
- end
847
- }
848
- /
849
- basionym_authorship_with_parenthesis
850
- /
851
- a:simple_authorship ","? space b:ex_authorship {
852
- def value
853
- a.value + " " + b.value
854
- end
855
-
856
- def pos
857
- a.pos.merge(b.pos)
858
- end
859
-
860
- def details
861
- val = a.details
862
- val[:authorship] = text_value.strip
863
- val[:basionymAuthorTeam].merge!(b.details)
864
- val
865
- end
866
- }
867
- /
868
- a:simple_authorship space b:emend_authorship {
869
- def value
870
- a.value + " " + b.value
871
- end
872
-
873
- def pos
874
- a.pos.merge(b.pos)
875
- end
876
-
877
- def details
878
- val = a.details
879
- val[:authorship] = text_value.strip
880
- val[:basionymAuthorTeam].merge!(b.details)
881
- val
882
- end
883
- }
884
- /
885
- simple_authorship
886
- end
887
-
888
-
889
- rule basionym_authorship_with_parenthesis
890
- left_paren space a:authors_names space right_paren space [,]? space b:year {
891
- def value
892
- "(" + a.value + " " + b.value + ")"
893
- end
894
-
895
- def pos
896
- a.pos.merge(b.pos)
897
- end
898
-
899
- def details
900
- { :authorship => text_value,
901
- :basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
902
- }
903
- end
904
- }
905
- /
906
- left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
907
- def value
908
- "(" + a.value + " " + b.value + ")"
909
- end
910
-
911
- def pos
912
- a.pos.merge(b.pos)
913
- end
914
-
915
- def details
916
- val = a.details
917
- val[:basionymAuthorTeam].merge!(b.details)
918
- val[:authorship] = text_value.strip
919
- val
920
- end
921
- }
922
- /
923
- left_paren space a:simple_authorship space b:emend_authorship space right_paren {
924
- def value
925
- "(" + a.value + " " + b.value + ")"
926
- end
927
-
928
- def pos
929
- a.pos.merge(b.pos)
930
- end
931
-
932
- def details
933
- val = a.details
934
- val[:basionymAuthorTeam].merge!(b.details)
935
- val[:authorship] = text_value.strip
936
- val
937
- end
938
- }
939
- /
940
- left_paren space a:simple_authorship space right_paren {
941
- def value
942
- "(" + a.value + ")"
943
- end
944
-
945
- def pos
946
- a.pos
947
- end
948
-
949
- def details
950
- val = a.details
951
- val[:authorship] = text_value
952
- val
953
- end
954
- }
955
- /
956
- left_paren space a:"?" space right_paren {
957
- def value
958
- "(?)"
959
- end
960
-
961
- def pos
962
- {a.interval.begin => ["unknown_author", a.interval.end]}
963
- end
964
-
965
- def details
966
- {:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
967
- end
968
- }
969
- end
970
-
971
- rule ex_authorship
972
- ex_sep space b:simple_authorship space ex_sep space c:simple_authorship {
973
- def value
974
- " ex " + b.value + " ex " + c.value
975
- end
976
-
977
- def pos
978
- b.pos
979
- end
980
-
981
- def details
982
- val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
983
- val
984
- end
985
- }
986
- /
987
- ex_sep space b:simple_authorship {
988
- def value
989
- " ex " + b.value
990
- end
991
-
992
- def pos
993
- b.pos
994
- end
995
-
996
- def details
997
- val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
998
- val
999
- end
1000
- }
1001
- end
1002
-
1003
- rule emend_authorship
1004
- emend space b:simple_authorship {
1005
- def value
1006
- " emend. " + b.value
1007
- end
1008
-
1009
- def pos
1010
- b.pos
1011
- end
1012
-
1013
- def details
1014
- val = {:emendAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
1015
- val
1016
- end
1017
- }
1018
- end
1019
-
1020
- rule simple_authorship
1021
- a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
1022
- def value
1023
- a.value + " " + b.value
1024
- end
1025
-
1026
- def pos
1027
- a.pos.merge(b.pos)
1028
- end
1029
-
1030
- def details
1031
- details_with_arg(:basionymAuthorTeam)
1032
- end
1033
-
1034
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1035
- { :authorship => text_value,
1036
- authorTeamType.to_sym => {
1037
- :authorTeam => a.text_value.strip
1038
- }.merge(a.details).merge(b.details)
1039
- }
1040
- end
1041
- }
1042
- /
1043
- a:authors_names space [,]? space b:year {
1044
- def value
1045
- a.value + " " + b.value
1046
- end
1047
-
1048
- def pos
1049
- a.pos.merge(b.pos)
1050
- end
1051
-
1052
- def details
1053
- details_with_arg(:basionymAuthorTeam)
1054
- end
1055
-
1056
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1057
- { :authorship => text_value,
1058
- authorTeamType.to_sym => {
1059
- :authorTeam => a.text_value.strip
1060
- }.merge(a.details).merge(b.details)
1061
- }
1062
- end
1063
- }
1064
- /
1065
- authors_names {
1066
- def details
1067
- details = details_with_arg(:basionymAuthorTeam)
1068
- details[:basionymAuthorTeam].merge!(super)
1069
- details
1070
- end
1071
-
1072
- def details_with_arg(authorTeamType = "basionymAuthorTeam")
1073
- { :authorship => text_value,
1074
- authorTeamType.to_sym => {
1075
- :authorTeam => text_value,
1076
- }
1077
- }
1078
- end
1079
- }
1080
- end
1081
-
1082
- rule authors_names
1083
- a:author_name space sep:author_separator space b:authors_names {
1084
- def value
1085
- sep.apply(a,b)
1086
- end
1087
-
1088
- def pos
1089
- sep.pos(a,b)
1090
- end
1091
-
1092
- def details
1093
- sep.details(a,b)
1094
- end
1095
- }
1096
- /
1097
- author_name
1098
- /
1099
- unknown_auth
1100
- end
1101
-
1102
-
1103
- rule unknown_auth
1104
- ("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
1105
- def value
1106
- text_value
1107
- end
1108
-
1109
- def pos
1110
- {interval.begin => ["unknown_author", interval.end]}
1111
- end
1112
-
1113
- def details
1114
- {:author => ["unknown"]}
1115
- end
1116
- }
1117
- end
1118
-
1119
- rule ex_sep
1120
- ("ex"/"in") &[\s]
1121
- end
1122
-
1123
- rule emend
1124
- ("emend."/"emend") &[\s]
1125
- end
1126
-
1127
- rule author_separator
1128
- ("&"/"&"/",&"/", &"/",and"/", and"/"apud"/"and"/"et"/",") {
1129
- def apply(a,b)
1130
- sep = text_value.strip
1131
- sep = " &" if ["&", "&", "and", "et",
1132
- ",&", ", &", ",and", ", and"].include? sep
1133
- sep = " apud" if sep == "apud"
1134
- a.value + sep + " " + b.value
1135
- end
1136
-
1137
- def pos(a,b)
1138
- a.pos.merge(b.pos)
1139
- end
1140
-
1141
- def details(a,b)
1142
- {:author => a.details[:author] + b.details[:author]}
1143
- end
1144
- }
1145
- end
1146
-
1147
- rule author_name
1148
- space a:author_name_without_postfix space b:author_maybe_filius space !latin_word {
1149
- def value
1150
- a.value + " " + b.value
1151
- end
1152
-
1153
- def pos
1154
- a.pos.merge(b.pos)
1155
- end
1156
-
1157
- def details
1158
- {:author => [value]}
1159
- end
1160
- }
1161
- /
1162
- space a:author_name_without_postfix space b:author_filius {
1163
- def value
1164
- a.value + " " + b.value
1165
- end
1166
-
1167
- def pos
1168
- a.pos.merge(b.pos)
1169
- end
1170
-
1171
- def details
1172
- {:author => [value]}
1173
- end
1174
- }
1175
- /
1176
- author_name_without_postfix
1177
- end
1178
-
1179
- rule author_name_without_postfix
1180
- space a:author_prefix_word space b:author_name {
1181
- def value
1182
- a.value + " " + b.value
1183
- end
1184
-
1185
- def pos
1186
- a.pos.merge(b.pos)
1187
- end
1188
-
1189
- def details
1190
- {:author => [value]}
1191
- end
1192
- }
1193
- /
1194
- a:author_word space b:author_name {
1195
- def value
1196
- a.value + " " + b.value
1197
- end
1198
-
1199
- def pos
1200
- a.pos.merge(b.pos)
1201
- end
1202
-
1203
- def details
1204
- {:author => [value]}
1205
- end
1206
- }
1207
- /
1208
- author_word
1209
- end
1210
-
1211
- rule author_word
1212
- "A S. Xu" {
1213
- def value
1214
- text_value.strip
1215
- end
1216
-
1217
- def pos
1218
- {interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
1219
- end
1220
-
1221
- def details
1222
- {:author => [value]}
1223
- end
1224
- }
1225
- /
1226
- ("arg."/"et al.\{\?\}"/"et al."/"et al"/"& al."/"& al") {
1227
- def value
1228
- text_value.strip
1229
- end
1230
-
1231
- def pos
1232
- #cheating because there are several words in some of them
1233
- {interval.begin => ["author_word", interval.end]}
1234
- end
1235
-
1236
- def details
1237
- {:author => [value]}
1238
- end
1239
- }
1240
- /
1241
- ("d" ['’])? ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
1242
- def value
1243
- text_value.gsub(/([\p{Lu}]{3,})/) do |match|
1244
- UnicodeUtils.titlecase(match)
1245
- end
1246
- end
1247
-
1248
- def pos
1249
- {interval.begin => ["author_word", interval.end]}
1250
- end
1251
-
1252
- def details
1253
- {:author => [value]}
1254
- end
1255
- }
1256
- /
1257
- "X" [^0-9\[\]\(\)\s&,]+ {
1258
- def value
1259
- text_value
1260
- end
1261
-
1262
- def pos
1263
- {interval.begin => ["author_word", interval.end]}
1264
- end
1265
-
1266
- def details
1267
- {:author => [value]}
1268
- end
1269
- }
1270
- /
1271
- author_prefix_word
1272
- end
1273
-
1274
- rule author_prefix_word
1275
- space ("ab"/"af"/"bis"/"da"/"der"/"del"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"do"/"la"/"'t"/"ter"/"van"/"von") &space_hard {
1276
- def value
1277
- text_value
1278
- end
1279
-
1280
- def pos
1281
- #cheating because there are several words in some of them
1282
- {interval.begin => ["author_word", interval.end]}
1283
- end
1284
- }
1285
- end
1286
-
1287
- rule author_filius
1288
- ("fil."/"filius") {
1289
- def value
1290
- text_value.strip
1291
- end
1292
-
1293
- def pos
1294
- {interval.begin => ["author_word", interval.end]}
1295
- end
1296
- }
1297
- end
1298
-
1299
- rule author_maybe_filius
1300
- "f." {
1301
- def value
1302
- text_value.strip
1303
- end
1304
-
1305
- def pos
1306
- {interval.begin => ["author_word", interval.end]}
1307
- end
1308
- }
1309
- end
1310
-
1311
- rule cap_latin_word_pair
1312
- a:cap_latin_word "-" b:cap_latin_word {
1313
- def value
1314
- a.value + b.value.downcase
1315
- end
1316
- }
1317
- end
1318
-
1319
- rule cap_latin_word
1320
- a:([A-Z]/cap_digraph) b:latin_word "?" {
1321
- def value
1322
- (a.value rescue a.text_value) + b.value
1323
- end
1324
- }
1325
- /
1326
- a:([A-Z]/cap_digraph) b:latin_word {
1327
- def value
1328
- (a.value rescue a.text_value) + b.value
1329
- end
1330
- }
1331
- /
1332
- a:("AE"/"OE") b:latin_word {
1333
- def value
1334
- a.text_value[0..0] + "e" + b.value
1335
- end
1336
- }
1337
- /
1338
- ("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
1339
- def value
1340
- text_value
1341
- end
1342
- }
1343
- end
1344
-
1345
- rule capped_dotted_char
1346
- [A-Z] "." {
1347
- def value
1348
- text_value
1349
- end
1350
- }
1351
- end
1352
-
1353
- rule species_word_hybrid
1354
- a:multiplication_sign space b:species_word {
1355
- def value
1356
- a.value + " " + b.value
1357
- end
1358
-
1359
- def canonical
1360
- b.value
1361
- end
1362
-
1363
- def hybrid
1364
- true
1365
- end
1366
-
1367
- def pos
1368
- {b.interval.begin => ["species", b.interval.end]}
1369
- end
1370
-
1371
- def details
1372
- {:species => {:string => b.value}}
1373
- end
1374
- }
1375
- /
1376
- a:"X" space b:species_word {
1377
- def value
1378
- "× " + b.value
1379
- end
1380
-
1381
- def canonical
1382
- b.value
1383
- end
1384
-
1385
- def hybrid
1386
- true
1387
- end
1388
-
1389
- def pos
1390
- {b.interval.begin => ["species", b.interval.end]}
1391
- end
1392
-
1393
- def details
1394
- {:species => {:string => b.value}}
1395
- end
1396
- }
1397
- /
1398
- a:"x" space_hard b:species_word {
1399
- def value
1400
- "× " + b.value
1401
- end
1402
-
1403
- def canonical
1404
- b.value
1405
- end
1406
-
1407
- def hybrid
1408
- true
1409
- end
1410
-
1411
- def pos
1412
- {b.interval.begin => ["species", b.interval.end]}
1413
- end
1414
-
1415
- def details
1416
- {:species => {:string => b.value}}
1417
- end
1418
- }
1419
- end
1420
-
1421
- rule annotation_identification
1422
- ("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
1423
-
1424
- def value
1425
- text_value.strip
1426
- end
1427
-
1428
- def apply(sp)
1429
- ""
1430
- end
1431
-
1432
- def canonical(sp)
1433
- ""
1434
- end
1435
-
1436
- def pos(sp)
1437
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
1438
- {interval.begin => ["annotation_identification", interval.end]}
1439
- end
1440
-
1441
- def details(sp)
1442
- {:annotation_identification => value, :ignored => sp.details}
1443
- end
1444
- }
1445
- /
1446
- ("cf."/"cf ") {
1447
- def value
1448
- text_value.strip
1449
- end
1450
-
1451
- def apply(sp)
1452
- " " + value + " " + sp.value
1453
- end
1454
-
1455
- def canonical(sp)
1456
- " " + sp.canonical
1457
- end
1458
-
1459
- def pos(sp)
1460
- interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
1461
- {interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
1462
- end
1463
-
1464
- def details(sp)
1465
- {:annotation_identification => value, :species => sp.details}
1466
- end
1467
- }
1468
- end
1469
-
1470
- rule species_word
1471
- a:[0-9]+ "-"? b:latin_word {
1472
- def value
1473
- num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
1474
- a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
1475
- a_value + b.value
1476
- end
1477
- }
1478
- /
1479
- latin_word
1480
- end
1481
-
1482
- rule latin_word
1483
- a:valid_name_letters "-" b:latin_word {
1484
- def value
1485
- a.value + "-" + b.value
1486
- end
1487
- }
1488
- /
1489
- a:valid_name_letter "'" b:latin_word {
1490
- def value
1491
- a.value + b.value
1492
- end
1493
- }
1494
- /
1495
- a:valid_name_letter b:valid_name_letters {
1496
- def value
1497
- a.value + b.value
1498
- end
1499
- }
1500
- end
1501
-
1502
- rule valid_name_letters
1503
- [a-zëæœ]+ {
1504
- def value
1505
- res = ""
1506
- text_value.split("").each do |l|
1507
- l = "ae" if l == "æ"
1508
- l = "oe" if l == "œ"
1509
- # We normalize ë as well. It is legal in botanical code, but it
1510
- # is beneficial to normalize it for the reconsiliation purposes
1511
- l = "e" if l == "ë"
1512
- res << l
1513
- end
1514
- res
1515
- end
1516
- }
1517
- end
1518
-
1519
- rule valid_name_letter
1520
- [a-zëæœ] {
1521
- def value
1522
- res = text_value
1523
- res = "ae" if res == "æ"
1524
- res = "oe" if res == "œ"
1525
- res = "e" if res == "ë"
1526
- res
1527
- end
1528
- }
1529
- end
1530
-
1531
-
1532
- rule cap_digraph
1533
- "Æ" {
1534
- def value
1535
- "Ae"
1536
- end
1537
- }
1538
- /
1539
- "Œ" {
1540
- def value
1541
- "Oe"
1542
- end
1543
- }
1544
- end
1545
-
1546
- rule year
1547
- b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
1548
- def value
1549
- a.value
1550
- end
1551
-
1552
- def pos
1553
- a.pos
1554
- end
1555
-
1556
- def details
1557
- a.details
1558
- end
1559
- }
1560
- /
1561
- year_number_with_character
1562
- /
1563
- year_number
1564
- end
1565
-
1566
- rule year_number_with_character
1567
- a:year_number [a-zA-Z] {
1568
- def value
1569
- a.text_value
1570
- end
1571
-
1572
- def pos
1573
- {interval.begin => ["year", interval.end]}
1574
- end
1575
-
1576
- def details
1577
- {:year => value}
1578
- end
1579
- }
1580
- end
1581
-
1582
- rule year_number
1583
- [12] [7890] [0-9] ([0-9] [\?]?/"?") {
1584
- def value
1585
- text_value
1586
- end
1587
-
1588
- def pos
1589
- {interval.begin => ["year", interval.end]}
1590
- end
1591
-
1592
- def details
1593
- {:year => value}
1594
- end
1595
- }
1596
- end
1597
-
1598
- rule left_paren
1599
- "("
1600
- end
1601
-
1602
- rule right_paren
1603
- ")"
1604
- end
1605
-
1606
- rule hybrid_character
1607
- ("x"/"X") {
1608
- def value
1609
- "×"
1610
- end
1611
- }
1612
- /
1613
- multiplication_sign
1614
- end
1615
-
1616
- rule multiplication_sign
1617
- ("×"/"*") {
1618
- def value
1619
- "×"
1620
- end
1621
- }
1622
- end
1623
-
1624
- rule space
1625
- [\s]*
1626
- end
1627
-
1628
- rule space_hard
1629
- [\s]+
1630
- end
1631
-
1632
- end