biodiversity 3.5.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rubocop.yml +9 -6
- data/.ruby-version +1 -1
- data/.travis.yml +1 -6
- data/CHANGELOG +3 -0
- data/Gemfile +2 -0
- data/README.md +37 -178
- data/Rakefile +15 -48
- data/biodiversity.gemspec +18 -21
- data/clib/linux/libgnparser.h +93 -0
- data/clib/linux/libgnparser.so +0 -0
- data/clib/mac/libgnparser.h +93 -0
- data/clib/mac/libgnparser.so +0 -0
- data/lib/biodiversity.rb +4 -9
- data/lib/biodiversity/parser.rb +65 -281
- data/lib/biodiversity/version.rb +8 -1
- data/spec/lib/biodiversity_spec.rb +9 -0
- data/spec/lib/parser_spec.rb +38 -0
- data/spec/spec_helper.rb +4 -81
- metadata +27 -102
- data/.byebug_history +0 -18
- data/.document +0 -5
- data/examples/socket_client.rb +0 -25
- data/lib/biodiversity/guid.rb +0 -1
- data/lib/biodiversity/guid/lsid.rb +0 -16
- data/lib/biodiversity/parser/scientific_name_canonical.rb +0 -528
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +0 -120
- data/lib/biodiversity/parser/scientific_name_clean.rb +0 -8991
- data/lib/biodiversity/parser/scientific_name_clean.treetop +0 -1632
- data/lib/biodiversity/parser/scientific_name_dirty.rb +0 -1298
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +0 -264
- data/spec/biodiversity_spec.rb +0 -11
- data/spec/files/test_data.txt +0 -490
- data/spec/files/todo.txt +0 -55
- data/spec/guid/lsid.spec.rb +0 -15
- data/spec/parser/scientific_name_canonical_spec.rb +0 -36
- data/spec/parser/scientific_name_clean_spec.rb +0 -1137
- data/spec/parser/scientific_name_dirty_spec.rb +0 -165
- data/spec/parser/scientific_name_spec.rb +0 -193
@@ -1,1632 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require "unicode_utils"
|
3
|
-
|
4
|
-
grammar ScientificNameClean
|
5
|
-
|
6
|
-
rule root
|
7
|
-
space a:scientific_name_5 space {
|
8
|
-
def value
|
9
|
-
a.value.gsub(/\s{2,}/, " ").strip
|
10
|
-
end
|
11
|
-
|
12
|
-
def canonical
|
13
|
-
a.canonical.gsub(/\s{2,}/, " ").strip
|
14
|
-
end
|
15
|
-
|
16
|
-
def pos
|
17
|
-
a.pos
|
18
|
-
end
|
19
|
-
|
20
|
-
def hybrid
|
21
|
-
a.hybrid
|
22
|
-
end
|
23
|
-
|
24
|
-
def details
|
25
|
-
a.details.class == Array ? a.details : [a.details]
|
26
|
-
end
|
27
|
-
|
28
|
-
def parser_run
|
29
|
-
1
|
30
|
-
end
|
31
|
-
}
|
32
|
-
end
|
33
|
-
|
34
|
-
rule scientific_name_5
|
35
|
-
a:multinomial_name space_hard hybrid_character space_hard b:species {
|
36
|
-
def value
|
37
|
-
a.value + " × " + b.value
|
38
|
-
end
|
39
|
-
|
40
|
-
def canonical
|
41
|
-
a.canonical + " × " + b.canonical
|
42
|
-
end
|
43
|
-
|
44
|
-
def pos
|
45
|
-
a.pos.merge(b.pos)
|
46
|
-
end
|
47
|
-
|
48
|
-
def hybrid
|
49
|
-
true
|
50
|
-
end
|
51
|
-
|
52
|
-
def details
|
53
|
-
[a.details, b.details.merge({:genus => a.details[:genus]})]
|
54
|
-
end
|
55
|
-
}
|
56
|
-
/
|
57
|
-
a:scientific_name_1 space b:taxon_concept_rank space c:authorship {
|
58
|
-
def value
|
59
|
-
a.value + " " + b.apply(c)
|
60
|
-
end
|
61
|
-
|
62
|
-
def canonical
|
63
|
-
a.canonical
|
64
|
-
end
|
65
|
-
|
66
|
-
def pos
|
67
|
-
a.pos.merge(c.pos)
|
68
|
-
end
|
69
|
-
|
70
|
-
def hybrid
|
71
|
-
a.hybrid
|
72
|
-
end
|
73
|
-
|
74
|
-
def details
|
75
|
-
a.details.merge(b.details(c))
|
76
|
-
end
|
77
|
-
}
|
78
|
-
/
|
79
|
-
scientific_name_4
|
80
|
-
end
|
81
|
-
|
82
|
-
rule scientific_name_4
|
83
|
-
a:scientific_name_1 space hybrid_character space b:scientific_name_1 {
|
84
|
-
def value
|
85
|
-
a.value + " × " + b.value
|
86
|
-
end
|
87
|
-
|
88
|
-
def canonical
|
89
|
-
a.canonical + " × " + b.canonical
|
90
|
-
end
|
91
|
-
|
92
|
-
def pos
|
93
|
-
a.pos.merge(b.pos)
|
94
|
-
end
|
95
|
-
|
96
|
-
def hybrid
|
97
|
-
true
|
98
|
-
end
|
99
|
-
|
100
|
-
def details
|
101
|
-
[a.details, b.details]
|
102
|
-
end
|
103
|
-
}
|
104
|
-
/
|
105
|
-
a:scientific_name_1 space hybrid_character space [\?]? {
|
106
|
-
def value
|
107
|
-
a.value + " × ?"
|
108
|
-
end
|
109
|
-
|
110
|
-
def canonical
|
111
|
-
a.canonical
|
112
|
-
end
|
113
|
-
|
114
|
-
def pos
|
115
|
-
a.pos
|
116
|
-
end
|
117
|
-
|
118
|
-
def hybrid
|
119
|
-
true
|
120
|
-
end
|
121
|
-
|
122
|
-
def details
|
123
|
-
[a.details, "?"]
|
124
|
-
end
|
125
|
-
}
|
126
|
-
/
|
127
|
-
scientific_name_3
|
128
|
-
end
|
129
|
-
|
130
|
-
rule scientific_name_3
|
131
|
-
a:hybrid_character space b:scientific_name_2 {
|
132
|
-
def value
|
133
|
-
a.value + " " + b.value
|
134
|
-
end
|
135
|
-
|
136
|
-
def canonical
|
137
|
-
b.canonical
|
138
|
-
end
|
139
|
-
|
140
|
-
def pos
|
141
|
-
b.pos
|
142
|
-
end
|
143
|
-
|
144
|
-
def hybrid
|
145
|
-
true
|
146
|
-
end
|
147
|
-
|
148
|
-
def details
|
149
|
-
b.details
|
150
|
-
end
|
151
|
-
}
|
152
|
-
/
|
153
|
-
scientific_name_2
|
154
|
-
end
|
155
|
-
|
156
|
-
rule scientific_name_2
|
157
|
-
a:scientific_name_1 space b:status_part {
|
158
|
-
def value
|
159
|
-
a.value + " " + b.value
|
160
|
-
end
|
161
|
-
|
162
|
-
def canonical
|
163
|
-
a.canonical
|
164
|
-
end
|
165
|
-
|
166
|
-
def pos
|
167
|
-
a.pos
|
168
|
-
end
|
169
|
-
|
170
|
-
def hybrid
|
171
|
-
a.hybrid rescue false
|
172
|
-
end
|
173
|
-
|
174
|
-
def details
|
175
|
-
a.details.merge(b.details)
|
176
|
-
end
|
177
|
-
}
|
178
|
-
/
|
179
|
-
scientific_name_1
|
180
|
-
end
|
181
|
-
|
182
|
-
rule scientific_name_1
|
183
|
-
multiuninomial_name
|
184
|
-
/
|
185
|
-
multinomial_name
|
186
|
-
/
|
187
|
-
uninomial_name
|
188
|
-
end
|
189
|
-
|
190
|
-
|
191
|
-
rule status_part
|
192
|
-
a:status_word space b:status_part {
|
193
|
-
def value
|
194
|
-
a.value + " " + b.value
|
195
|
-
end
|
196
|
-
def details
|
197
|
-
{:status => value}
|
198
|
-
end
|
199
|
-
}
|
200
|
-
/
|
201
|
-
status_word
|
202
|
-
end
|
203
|
-
|
204
|
-
rule status_word
|
205
|
-
latin_word [\.] {
|
206
|
-
def value
|
207
|
-
text_value.strip
|
208
|
-
end
|
209
|
-
def details
|
210
|
-
{:status => value}
|
211
|
-
end
|
212
|
-
}
|
213
|
-
#/
|
214
|
-
#latin_word
|
215
|
-
end
|
216
|
-
|
217
|
-
rule unparsed
|
218
|
-
.+ space {
|
219
|
-
|
220
|
-
def value
|
221
|
-
""
|
222
|
-
end
|
223
|
-
|
224
|
-
def hybrid
|
225
|
-
false
|
226
|
-
end
|
227
|
-
|
228
|
-
def canonical
|
229
|
-
""
|
230
|
-
end
|
231
|
-
|
232
|
-
def pos
|
233
|
-
{interval.begin => ["unparsed", interval.end]}
|
234
|
-
end
|
235
|
-
|
236
|
-
def details
|
237
|
-
{:unparsed => text_value}
|
238
|
-
end
|
239
|
-
}
|
240
|
-
end
|
241
|
-
|
242
|
-
rule multinomial_name
|
243
|
-
a:genus space b:infragenus space aid:annotation_identification? space c:species space_hard d:infraspecies_mult {
|
244
|
-
def value
|
245
|
-
a.value + " " + b.value + " " + c.value + " " + d.value
|
246
|
-
end
|
247
|
-
|
248
|
-
def canonical
|
249
|
-
a.canonical + " " + c.canonical + " " + d.canonical
|
250
|
-
end
|
251
|
-
|
252
|
-
def pos
|
253
|
-
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
254
|
-
end
|
255
|
-
|
256
|
-
def hybrid
|
257
|
-
c.hybrid rescue false
|
258
|
-
end
|
259
|
-
|
260
|
-
def details
|
261
|
-
a.details.merge(b.details).merge(c.details).merge(d.details)
|
262
|
-
end
|
263
|
-
}
|
264
|
-
/
|
265
|
-
a:genus space b:infragenus space aid:annotation_identification? space c:species space aid:annotation_identification space d:infraspecies_mult {
|
266
|
-
def value
|
267
|
-
a.value + " " + b.value + " " + c.value + " " + d.value
|
268
|
-
end
|
269
|
-
|
270
|
-
def canonical
|
271
|
-
a.canonical + " " + c.canonical + " " + d.canonical
|
272
|
-
end
|
273
|
-
|
274
|
-
def pos
|
275
|
-
a.pos.merge(b.pos).merge(c.pos).merge(d.pos)
|
276
|
-
end
|
277
|
-
|
278
|
-
def hybrid
|
279
|
-
c.hybrid rescue false
|
280
|
-
end
|
281
|
-
|
282
|
-
def details
|
283
|
-
a.details.merge(b.details).merge(c.details).merge(d.details)
|
284
|
-
end
|
285
|
-
}
|
286
|
-
/
|
287
|
-
a:genus space b:infragenus space aid:annotation_identification? space c:species {
|
288
|
-
def value
|
289
|
-
if defined? aid.apply
|
290
|
-
a.value + " " + b.value + aid.apply(c)
|
291
|
-
else
|
292
|
-
a.value + " " + b.value + " " + c.value
|
293
|
-
end
|
294
|
-
end
|
295
|
-
|
296
|
-
def canonical
|
297
|
-
if defined? aid.apply
|
298
|
-
a.canonical + aid.canonical(c)
|
299
|
-
else
|
300
|
-
a.canonical + " " + c.canonical
|
301
|
-
end
|
302
|
-
end
|
303
|
-
|
304
|
-
def pos
|
305
|
-
if defined? aid.apply
|
306
|
-
a.pos.merge(b.pos).merge(aid.pos(c))
|
307
|
-
else
|
308
|
-
a.pos.merge(b.pos).merge(c.pos)
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
def hybrid
|
313
|
-
c.hybrid rescue false
|
314
|
-
end
|
315
|
-
|
316
|
-
def details
|
317
|
-
if defined? aid.apply
|
318
|
-
a.details.merge(b.details).merge(aid.apply(c))
|
319
|
-
else
|
320
|
-
a.details.merge(b.details).merge(c.details)
|
321
|
-
end
|
322
|
-
end
|
323
|
-
}
|
324
|
-
/
|
325
|
-
a:genus space aid:annotation_identification? space b:species space_hard c:infraspecies_mult {
|
326
|
-
def value
|
327
|
-
a.value + " " + b.value + " " + c.value
|
328
|
-
end
|
329
|
-
|
330
|
-
def canonical
|
331
|
-
a.canonical + " " + b.canonical + " " + c.canonical
|
332
|
-
end
|
333
|
-
|
334
|
-
def pos
|
335
|
-
a.pos.merge(b.pos).merge(c.pos)
|
336
|
-
end
|
337
|
-
|
338
|
-
def hybrid
|
339
|
-
b.hybrid rescue false
|
340
|
-
end
|
341
|
-
|
342
|
-
def details
|
343
|
-
a.details.merge(b.details).merge(c.details)
|
344
|
-
end
|
345
|
-
}
|
346
|
-
/
|
347
|
-
a:genus space aid:annotation_identification? space b:species {
|
348
|
-
def value
|
349
|
-
if defined? aid.apply
|
350
|
-
a.value + aid.apply(b)
|
351
|
-
else
|
352
|
-
a.value + " " + b.value
|
353
|
-
end
|
354
|
-
end
|
355
|
-
|
356
|
-
def canonical
|
357
|
-
if defined? aid.apply
|
358
|
-
a.canonical + aid.canonical(b)
|
359
|
-
else
|
360
|
-
a.canonical + " " + b.canonical
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
def pos
|
365
|
-
if defined? aid.apply
|
366
|
-
a.pos.merge(aid.pos(b))
|
367
|
-
else
|
368
|
-
a.pos.merge(b.pos)
|
369
|
-
end
|
370
|
-
end
|
371
|
-
|
372
|
-
def hybrid
|
373
|
-
b.hybrid rescue false
|
374
|
-
end
|
375
|
-
|
376
|
-
def details
|
377
|
-
if defined? aid.apply
|
378
|
-
a.details.merge(aid.details(b))
|
379
|
-
else
|
380
|
-
a.details.merge(b.details)
|
381
|
-
end
|
382
|
-
end
|
383
|
-
}
|
384
|
-
/
|
385
|
-
a:genus space aid:annotation_identification space b:unparsed {
|
386
|
-
def value
|
387
|
-
a.value + aid.apply(b)
|
388
|
-
end
|
389
|
-
|
390
|
-
def canonical
|
391
|
-
a.canonical + aid.canonical(b)
|
392
|
-
end
|
393
|
-
|
394
|
-
def pos
|
395
|
-
a.pos.merge(aid.pos(b))
|
396
|
-
end
|
397
|
-
|
398
|
-
def hybrid
|
399
|
-
false
|
400
|
-
end
|
401
|
-
|
402
|
-
def details
|
403
|
-
a.details.merge(aid.details(b))
|
404
|
-
end
|
405
|
-
}
|
406
|
-
end
|
407
|
-
|
408
|
-
rule multiuninomial_name
|
409
|
-
a:uninomial_name space b:rank_uninomial space c:uninomial_name {
|
410
|
-
|
411
|
-
def value
|
412
|
-
a.value + " " + b.value + " " + c.value
|
413
|
-
end
|
414
|
-
|
415
|
-
def canonical
|
416
|
-
a.canonical
|
417
|
-
end
|
418
|
-
|
419
|
-
def hybrid
|
420
|
-
false
|
421
|
-
end
|
422
|
-
|
423
|
-
def pos
|
424
|
-
a.pos.merge(b.pos(c))
|
425
|
-
end
|
426
|
-
|
427
|
-
def details
|
428
|
-
a.details.merge(b.details(c))
|
429
|
-
end
|
430
|
-
}
|
431
|
-
end
|
432
|
-
|
433
|
-
rule infraspecies_mult
|
434
|
-
a:infraspecies space b:infraspecies_mult {
|
435
|
-
def value
|
436
|
-
a.value + " " + b.value
|
437
|
-
end
|
438
|
-
|
439
|
-
def canonical
|
440
|
-
a.canonical + " " + b.canonical
|
441
|
-
end
|
442
|
-
|
443
|
-
def pos
|
444
|
-
a.pos.merge(b.pos)
|
445
|
-
end
|
446
|
-
|
447
|
-
def details
|
448
|
-
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
449
|
-
b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
|
450
|
-
a.details.merge({:infraspecies => a_array + b_array})
|
451
|
-
end
|
452
|
-
}
|
453
|
-
/
|
454
|
-
infraspecies {
|
455
|
-
def details
|
456
|
-
if super[:annotation_identification]
|
457
|
-
{:infraspecies => [{:annotation_identification => super[:annotation_identification], :ignored => super[:ignored]}]}
|
458
|
-
else
|
459
|
-
{:infraspecies => [super[:infraspecies]]}
|
460
|
-
end
|
461
|
-
end
|
462
|
-
}
|
463
|
-
end
|
464
|
-
|
465
|
-
rule infraspecies
|
466
|
-
a:infraspecies_string space b:authorship {
|
467
|
-
def value
|
468
|
-
a.value + " " + b.value
|
469
|
-
end
|
470
|
-
|
471
|
-
def canonical
|
472
|
-
a.canonical
|
473
|
-
end
|
474
|
-
|
475
|
-
def pos
|
476
|
-
a.pos.merge(b.pos)
|
477
|
-
end
|
478
|
-
|
479
|
-
def details
|
480
|
-
{:infraspecies => a.details[:infraspecies].merge(b.details)}
|
481
|
-
end
|
482
|
-
}
|
483
|
-
/
|
484
|
-
infraspecies_string
|
485
|
-
end
|
486
|
-
|
487
|
-
rule infraspecies_string
|
488
|
-
sel:rank space a:species_word {
|
489
|
-
def value
|
490
|
-
sel.apply(a)
|
491
|
-
end
|
492
|
-
def canonical
|
493
|
-
sel.canonical(a)
|
494
|
-
end
|
495
|
-
|
496
|
-
def pos
|
497
|
-
sel.pos(a)
|
498
|
-
end
|
499
|
-
|
500
|
-
def details
|
501
|
-
sel.details(a)
|
502
|
-
end
|
503
|
-
}
|
504
|
-
/
|
505
|
-
aid:annotation_identification space a:species_word ![\.] {
|
506
|
-
def value
|
507
|
-
aid.apply(a)
|
508
|
-
end
|
509
|
-
|
510
|
-
def canonical
|
511
|
-
aid.canonical(a)
|
512
|
-
end
|
513
|
-
|
514
|
-
def pos
|
515
|
-
def a.pos
|
516
|
-
{interval.begin => ["infraspecies", a.interval.end]}
|
517
|
-
end
|
518
|
-
aid.pos(a)
|
519
|
-
end
|
520
|
-
|
521
|
-
def details
|
522
|
-
def a.details
|
523
|
-
{:infraspecies => {:string => value, :rank => "n/a"}}
|
524
|
-
end
|
525
|
-
aid.details(a)
|
526
|
-
end
|
527
|
-
}
|
528
|
-
/
|
529
|
-
a:species_word ![\.] {
|
530
|
-
def value
|
531
|
-
a.value
|
532
|
-
end
|
533
|
-
|
534
|
-
def canonical
|
535
|
-
value
|
536
|
-
end
|
537
|
-
|
538
|
-
def pos
|
539
|
-
{interval.begin => ["infraspecies", interval.end]}
|
540
|
-
end
|
541
|
-
|
542
|
-
def details
|
543
|
-
{:infraspecies => {:string => value, :rank => "n/a"}}
|
544
|
-
end
|
545
|
-
}
|
546
|
-
end
|
547
|
-
|
548
|
-
rule taxon_concept_rank
|
549
|
-
("sec."/"sensu.") {
|
550
|
-
def value
|
551
|
-
"sec."
|
552
|
-
end
|
553
|
-
def apply(a)
|
554
|
-
" " + value + " " + a.value
|
555
|
-
end
|
556
|
-
def details(a = nil)
|
557
|
-
{:taxon_concept => a.details}
|
558
|
-
end
|
559
|
-
}
|
560
|
-
end
|
561
|
-
|
562
|
-
rule rank
|
563
|
-
("morph."/"f.sp."/"B "/"ssp."/"ssp "/"mut."/"nat "/"nothosubsp."/"convar."/"nvar."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"var "/"subsp."/"subsp "/"subf."/"race "/"forma."/"forma "/"fma."/"fma "/"form."/"form "/"fo."/"fo "/"f."/"α"/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
564
|
-
{
|
565
|
-
def value
|
566
|
-
text_value.strip
|
567
|
-
end
|
568
|
-
|
569
|
-
def apply(a)
|
570
|
-
" " + text_value.strip + " " + a.value
|
571
|
-
end
|
572
|
-
|
573
|
-
def canonical(a)
|
574
|
-
" " + a.value
|
575
|
-
end
|
576
|
-
|
577
|
-
def pos(a)
|
578
|
-
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
579
|
-
{interval.begin => ["infraspecific_type", interval_end], a.interval.begin => ["infraspecies", a.interval.end]}
|
580
|
-
end
|
581
|
-
|
582
|
-
def details(a = nil)
|
583
|
-
{:infraspecies => {:string => (a.value rescue nil), :rank => text_value.strip}}
|
584
|
-
end
|
585
|
-
}
|
586
|
-
end
|
587
|
-
|
588
|
-
rule rank_uninomial
|
589
|
-
("sect."/"sect "/"subsect."/"subsect "/"trib."/"trib "/"subtrib."/"subtrib "/"ser."/"ser "/"subgen."/"subgen "/"fam."/"fam "/"subfam."/"subfam "/"supertrib."/"supertrib ") {
|
590
|
-
def value
|
591
|
-
text_value.strip
|
592
|
-
end
|
593
|
-
|
594
|
-
def pos(uni)
|
595
|
-
{interval.begin => ["rank_uninomial", interval.end], uni.interval.begin => ["uninomial", uni.interval.end]}
|
596
|
-
end
|
597
|
-
|
598
|
-
def details(uni)
|
599
|
-
{:rank_uninomials => value, :uninomial2 => uni.details[:uninomial]}
|
600
|
-
end
|
601
|
-
}
|
602
|
-
end
|
603
|
-
|
604
|
-
rule species
|
605
|
-
a:species_string space b:authorship {
|
606
|
-
def value
|
607
|
-
a.value + " " + b.value
|
608
|
-
end
|
609
|
-
|
610
|
-
def canonical
|
611
|
-
a.canonical
|
612
|
-
end
|
613
|
-
|
614
|
-
def hybrid
|
615
|
-
a.hybrid rescue false
|
616
|
-
end
|
617
|
-
|
618
|
-
def pos
|
619
|
-
a.pos.merge(b.pos)
|
620
|
-
end
|
621
|
-
|
622
|
-
def details
|
623
|
-
{:species => a.details[:species].merge(b.details)}
|
624
|
-
end
|
625
|
-
}
|
626
|
-
/
|
627
|
-
species_string
|
628
|
-
end
|
629
|
-
|
630
|
-
rule species_string
|
631
|
-
species_word {
|
632
|
-
def canonical
|
633
|
-
value
|
634
|
-
end
|
635
|
-
|
636
|
-
def pos
|
637
|
-
{interval.begin => ["species", interval.end]}
|
638
|
-
end
|
639
|
-
|
640
|
-
def hybrid
|
641
|
-
false
|
642
|
-
end
|
643
|
-
|
644
|
-
def details
|
645
|
-
{:species => {:string => value}}
|
646
|
-
end
|
647
|
-
}
|
648
|
-
/
|
649
|
-
species_word_hybrid
|
650
|
-
end
|
651
|
-
|
652
|
-
rule infragenus
|
653
|
-
left_paren space a:(cap_latin_word/capped_dotted_char) space right_paren {
|
654
|
-
def value
|
655
|
-
"(" + a.value + ")"
|
656
|
-
end
|
657
|
-
|
658
|
-
def canonical
|
659
|
-
a.value
|
660
|
-
end
|
661
|
-
|
662
|
-
def pos
|
663
|
-
{a.interval.begin => ["infragenus", a.interval.end]}
|
664
|
-
end
|
665
|
-
|
666
|
-
def details
|
667
|
-
{:infragenus => {:string => a.value}}
|
668
|
-
end
|
669
|
-
}
|
670
|
-
end
|
671
|
-
|
672
|
-
rule genus
|
673
|
-
a:(abbreviated_genus/uninomial_string) !(space_hard author_prefix_word space_hard author_word) {
|
674
|
-
def value
|
675
|
-
a.value
|
676
|
-
end
|
677
|
-
|
678
|
-
def pos
|
679
|
-
{a.interval.begin => ["genus", a.interval.end]}
|
680
|
-
end
|
681
|
-
|
682
|
-
def canonical
|
683
|
-
a.value
|
684
|
-
end
|
685
|
-
|
686
|
-
def details
|
687
|
-
{:genus => {:string => a.value}}
|
688
|
-
end
|
689
|
-
}
|
690
|
-
end
|
691
|
-
|
692
|
-
rule abbreviated_genus
|
693
|
-
[A-Z] [a-z]? [a-z]? [\\.] space {
|
694
|
-
def value
|
695
|
-
text_value.strip
|
696
|
-
end
|
697
|
-
|
698
|
-
def canonical
|
699
|
-
value
|
700
|
-
end
|
701
|
-
|
702
|
-
def pos
|
703
|
-
{interval.begin => ["abbreviated_genus", interval.end]}
|
704
|
-
end
|
705
|
-
|
706
|
-
def details
|
707
|
-
{:abbreviated_genus => {:string => value}}
|
708
|
-
end
|
709
|
-
}
|
710
|
-
end
|
711
|
-
|
712
|
-
rule uninomial_name
|
713
|
-
a:uninomial_string space b:infragenus space c:simple_authorship {
|
714
|
-
def value
|
715
|
-
a.value + " " + b.value + " " + c.value
|
716
|
-
end
|
717
|
-
|
718
|
-
def canonical
|
719
|
-
a.canonical
|
720
|
-
end
|
721
|
-
|
722
|
-
def pos
|
723
|
-
a.pos.merge(b.pos).merge(c.pos)
|
724
|
-
end
|
725
|
-
|
726
|
-
def hybrid
|
727
|
-
false
|
728
|
-
end
|
729
|
-
|
730
|
-
def details
|
731
|
-
{:uninomial => a.details[:uninomial].merge(b.details).merge(c.details)}
|
732
|
-
end
|
733
|
-
}
|
734
|
-
/
|
735
|
-
a:uninomial_string space b:infragenus {
|
736
|
-
def value
|
737
|
-
a.value + " " + b.value
|
738
|
-
end
|
739
|
-
|
740
|
-
def canonical
|
741
|
-
a.canonical
|
742
|
-
end
|
743
|
-
|
744
|
-
def pos
|
745
|
-
a.pos.merge(b.pos)
|
746
|
-
end
|
747
|
-
|
748
|
-
def hybrid
|
749
|
-
false
|
750
|
-
end
|
751
|
-
|
752
|
-
def details
|
753
|
-
{:uninomial => a.details[:uninomial].merge(b.details)}
|
754
|
-
end
|
755
|
-
}
|
756
|
-
/
|
757
|
-
a:uninomial_string space_hard b:authorship {
|
758
|
-
def value
|
759
|
-
a.value + " " + b.value
|
760
|
-
end
|
761
|
-
|
762
|
-
def canonical
|
763
|
-
a.canonical
|
764
|
-
end
|
765
|
-
|
766
|
-
def pos
|
767
|
-
a.pos.merge(b.pos)
|
768
|
-
end
|
769
|
-
|
770
|
-
def hybrid
|
771
|
-
false
|
772
|
-
end
|
773
|
-
|
774
|
-
def details
|
775
|
-
{:uninomial => a.details[:uninomial].merge(b.details)}
|
776
|
-
end
|
777
|
-
}
|
778
|
-
/
|
779
|
-
uninomial_string
|
780
|
-
end
|
781
|
-
|
782
|
-
rule uninomial_string
|
783
|
-
(cap_latin_word_pair/cap_latin_word) {
|
784
|
-
def canonical
|
785
|
-
value
|
786
|
-
end
|
787
|
-
|
788
|
-
def pos
|
789
|
-
{interval.begin => ["uninomial", interval.end]}
|
790
|
-
end
|
791
|
-
|
792
|
-
def hybrid
|
793
|
-
false
|
794
|
-
end
|
795
|
-
|
796
|
-
def details
|
797
|
-
{:uninomial => {:string => value}}
|
798
|
-
end
|
799
|
-
}
|
800
|
-
end
|
801
|
-
|
802
|
-
rule authorship
|
803
|
-
a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
|
804
|
-
def value
|
805
|
-
a.value + " " + b.value + " " + c.value
|
806
|
-
end
|
807
|
-
|
808
|
-
def pos
|
809
|
-
a.pos.merge(b.pos).merge(c.pos)
|
810
|
-
end
|
811
|
-
|
812
|
-
def details
|
813
|
-
val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
814
|
-
val[:combinationAuthorTeam].merge!(c.details)
|
815
|
-
val
|
816
|
-
end
|
817
|
-
}
|
818
|
-
/
|
819
|
-
a:basionym_authorship_with_parenthesis space b:simple_authorship space c:emend_authorship {
|
820
|
-
def value
|
821
|
-
a.value + " " + b.value + " " + c.value
|
822
|
-
end
|
823
|
-
|
824
|
-
def pos
|
825
|
-
a.pos.merge(b.pos).merge(c.pos)
|
826
|
-
end
|
827
|
-
|
828
|
-
def details
|
829
|
-
val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
830
|
-
val[:combinationAuthorTeam].merge!(c.details)
|
831
|
-
val
|
832
|
-
end
|
833
|
-
}
|
834
|
-
/
|
835
|
-
a:basionym_authorship_with_parenthesis space b:simple_authorship {
|
836
|
-
def value
|
837
|
-
a.value + " " + b.value
|
838
|
-
end
|
839
|
-
|
840
|
-
def pos
|
841
|
-
a.pos.merge(b.pos)
|
842
|
-
end
|
843
|
-
|
844
|
-
def details
|
845
|
-
{:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
846
|
-
end
|
847
|
-
}
|
848
|
-
/
|
849
|
-
basionym_authorship_with_parenthesis
|
850
|
-
/
|
851
|
-
a:simple_authorship ","? space b:ex_authorship {
|
852
|
-
def value
|
853
|
-
a.value + " " + b.value
|
854
|
-
end
|
855
|
-
|
856
|
-
def pos
|
857
|
-
a.pos.merge(b.pos)
|
858
|
-
end
|
859
|
-
|
860
|
-
def details
|
861
|
-
val = a.details
|
862
|
-
val[:authorship] = text_value.strip
|
863
|
-
val[:basionymAuthorTeam].merge!(b.details)
|
864
|
-
val
|
865
|
-
end
|
866
|
-
}
|
867
|
-
/
|
868
|
-
a:simple_authorship space b:emend_authorship {
|
869
|
-
def value
|
870
|
-
a.value + " " + b.value
|
871
|
-
end
|
872
|
-
|
873
|
-
def pos
|
874
|
-
a.pos.merge(b.pos)
|
875
|
-
end
|
876
|
-
|
877
|
-
def details
|
878
|
-
val = a.details
|
879
|
-
val[:authorship] = text_value.strip
|
880
|
-
val[:basionymAuthorTeam].merge!(b.details)
|
881
|
-
val
|
882
|
-
end
|
883
|
-
}
|
884
|
-
/
|
885
|
-
simple_authorship
|
886
|
-
end
|
887
|
-
|
888
|
-
|
889
|
-
rule basionym_authorship_with_parenthesis
|
890
|
-
left_paren space a:authors_names space right_paren space [,]? space b:year {
|
891
|
-
def value
|
892
|
-
"(" + a.value + " " + b.value + ")"
|
893
|
-
end
|
894
|
-
|
895
|
-
def pos
|
896
|
-
a.pos.merge(b.pos)
|
897
|
-
end
|
898
|
-
|
899
|
-
def details
|
900
|
-
{ :authorship => text_value,
|
901
|
-
:basionymAuthorTeam => {:author_team => text_value}.merge(a.details).merge(b.details)
|
902
|
-
}
|
903
|
-
end
|
904
|
-
}
|
905
|
-
/
|
906
|
-
left_paren space a:simple_authorship ","? space b:ex_authorship space right_paren {
|
907
|
-
def value
|
908
|
-
"(" + a.value + " " + b.value + ")"
|
909
|
-
end
|
910
|
-
|
911
|
-
def pos
|
912
|
-
a.pos.merge(b.pos)
|
913
|
-
end
|
914
|
-
|
915
|
-
def details
|
916
|
-
val = a.details
|
917
|
-
val[:basionymAuthorTeam].merge!(b.details)
|
918
|
-
val[:authorship] = text_value.strip
|
919
|
-
val
|
920
|
-
end
|
921
|
-
}
|
922
|
-
/
|
923
|
-
left_paren space a:simple_authorship space b:emend_authorship space right_paren {
|
924
|
-
def value
|
925
|
-
"(" + a.value + " " + b.value + ")"
|
926
|
-
end
|
927
|
-
|
928
|
-
def pos
|
929
|
-
a.pos.merge(b.pos)
|
930
|
-
end
|
931
|
-
|
932
|
-
def details
|
933
|
-
val = a.details
|
934
|
-
val[:basionymAuthorTeam].merge!(b.details)
|
935
|
-
val[:authorship] = text_value.strip
|
936
|
-
val
|
937
|
-
end
|
938
|
-
}
|
939
|
-
/
|
940
|
-
left_paren space a:simple_authorship space right_paren {
|
941
|
-
def value
|
942
|
-
"(" + a.value + ")"
|
943
|
-
end
|
944
|
-
|
945
|
-
def pos
|
946
|
-
a.pos
|
947
|
-
end
|
948
|
-
|
949
|
-
def details
|
950
|
-
val = a.details
|
951
|
-
val[:authorship] = text_value
|
952
|
-
val
|
953
|
-
end
|
954
|
-
}
|
955
|
-
/
|
956
|
-
left_paren space a:"?" space right_paren {
|
957
|
-
def value
|
958
|
-
"(?)"
|
959
|
-
end
|
960
|
-
|
961
|
-
def pos
|
962
|
-
{a.interval.begin => ["unknown_author", a.interval.end]}
|
963
|
-
end
|
964
|
-
|
965
|
-
def details
|
966
|
-
{:authorship => text_value, :basionymAuthorTeam => {:authorTeam => text_value, :author => ["?"]}}
|
967
|
-
end
|
968
|
-
}
|
969
|
-
end
|
970
|
-
|
971
|
-
rule ex_authorship
|
972
|
-
ex_sep space b:simple_authorship space ex_sep space c:simple_authorship {
|
973
|
-
def value
|
974
|
-
" ex " + b.value + " ex " + c.value
|
975
|
-
end
|
976
|
-
|
977
|
-
def pos
|
978
|
-
b.pos
|
979
|
-
end
|
980
|
-
|
981
|
-
def details
|
982
|
-
val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
983
|
-
val
|
984
|
-
end
|
985
|
-
}
|
986
|
-
/
|
987
|
-
ex_sep space b:simple_authorship {
|
988
|
-
def value
|
989
|
-
" ex " + b.value
|
990
|
-
end
|
991
|
-
|
992
|
-
def pos
|
993
|
-
b.pos
|
994
|
-
end
|
995
|
-
|
996
|
-
def details
|
997
|
-
val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
998
|
-
val
|
999
|
-
end
|
1000
|
-
}
|
1001
|
-
end
|
1002
|
-
|
1003
|
-
rule emend_authorship
|
1004
|
-
emend space b:simple_authorship {
|
1005
|
-
def value
|
1006
|
-
" emend. " + b.value
|
1007
|
-
end
|
1008
|
-
|
1009
|
-
def pos
|
1010
|
-
b.pos
|
1011
|
-
end
|
1012
|
-
|
1013
|
-
def details
|
1014
|
-
val = {:emendAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
1015
|
-
val
|
1016
|
-
end
|
1017
|
-
}
|
1018
|
-
end
|
1019
|
-
|
1020
|
-
rule simple_authorship
|
1021
|
-
a:authors_names space [,]? space b:year? [,]? space "non" space authors_names space [,]? space year {
|
1022
|
-
def value
|
1023
|
-
a.value + " " + b.value
|
1024
|
-
end
|
1025
|
-
|
1026
|
-
def pos
|
1027
|
-
a.pos.merge(b.pos)
|
1028
|
-
end
|
1029
|
-
|
1030
|
-
def details
|
1031
|
-
details_with_arg(:basionymAuthorTeam)
|
1032
|
-
end
|
1033
|
-
|
1034
|
-
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
1035
|
-
{ :authorship => text_value,
|
1036
|
-
authorTeamType.to_sym => {
|
1037
|
-
:authorTeam => a.text_value.strip
|
1038
|
-
}.merge(a.details).merge(b.details)
|
1039
|
-
}
|
1040
|
-
end
|
1041
|
-
}
|
1042
|
-
/
|
1043
|
-
a:authors_names space [,]? space b:year {
|
1044
|
-
def value
|
1045
|
-
a.value + " " + b.value
|
1046
|
-
end
|
1047
|
-
|
1048
|
-
def pos
|
1049
|
-
a.pos.merge(b.pos)
|
1050
|
-
end
|
1051
|
-
|
1052
|
-
def details
|
1053
|
-
details_with_arg(:basionymAuthorTeam)
|
1054
|
-
end
|
1055
|
-
|
1056
|
-
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
1057
|
-
{ :authorship => text_value,
|
1058
|
-
authorTeamType.to_sym => {
|
1059
|
-
:authorTeam => a.text_value.strip
|
1060
|
-
}.merge(a.details).merge(b.details)
|
1061
|
-
}
|
1062
|
-
end
|
1063
|
-
}
|
1064
|
-
/
|
1065
|
-
authors_names {
|
1066
|
-
def details
|
1067
|
-
details = details_with_arg(:basionymAuthorTeam)
|
1068
|
-
details[:basionymAuthorTeam].merge!(super)
|
1069
|
-
details
|
1070
|
-
end
|
1071
|
-
|
1072
|
-
def details_with_arg(authorTeamType = "basionymAuthorTeam")
|
1073
|
-
{ :authorship => text_value,
|
1074
|
-
authorTeamType.to_sym => {
|
1075
|
-
:authorTeam => text_value,
|
1076
|
-
}
|
1077
|
-
}
|
1078
|
-
end
|
1079
|
-
}
|
1080
|
-
end
|
1081
|
-
|
1082
|
-
rule authors_names
|
1083
|
-
a:author_name space sep:author_separator space b:authors_names {
|
1084
|
-
def value
|
1085
|
-
sep.apply(a,b)
|
1086
|
-
end
|
1087
|
-
|
1088
|
-
def pos
|
1089
|
-
sep.pos(a,b)
|
1090
|
-
end
|
1091
|
-
|
1092
|
-
def details
|
1093
|
-
sep.details(a,b)
|
1094
|
-
end
|
1095
|
-
}
|
1096
|
-
/
|
1097
|
-
author_name
|
1098
|
-
/
|
1099
|
-
unknown_auth
|
1100
|
-
end
|
1101
|
-
|
1102
|
-
|
1103
|
-
rule unknown_auth
|
1104
|
-
("auct."/"auct"/"hort."/"hort"/"anon."/"anon"/"ht."/"ht") !latin_word {
|
1105
|
-
def value
|
1106
|
-
text_value
|
1107
|
-
end
|
1108
|
-
|
1109
|
-
def pos
|
1110
|
-
{interval.begin => ["unknown_author", interval.end]}
|
1111
|
-
end
|
1112
|
-
|
1113
|
-
def details
|
1114
|
-
{:author => ["unknown"]}
|
1115
|
-
end
|
1116
|
-
}
|
1117
|
-
end
|
1118
|
-
|
1119
|
-
rule ex_sep
|
1120
|
-
("ex"/"in") &[\s]
|
1121
|
-
end
|
1122
|
-
|
1123
|
-
rule emend
|
1124
|
-
("emend."/"emend") &[\s]
|
1125
|
-
end
|
1126
|
-
|
1127
|
-
rule author_separator
|
1128
|
-
("&"/"&"/",&"/", &"/",and"/", and"/"apud"/"and"/"et"/",") {
|
1129
|
-
def apply(a,b)
|
1130
|
-
sep = text_value.strip
|
1131
|
-
sep = " &" if ["&", "&", "and", "et",
|
1132
|
-
",&", ", &", ",and", ", and"].include? sep
|
1133
|
-
sep = " apud" if sep == "apud"
|
1134
|
-
a.value + sep + " " + b.value
|
1135
|
-
end
|
1136
|
-
|
1137
|
-
def pos(a,b)
|
1138
|
-
a.pos.merge(b.pos)
|
1139
|
-
end
|
1140
|
-
|
1141
|
-
def details(a,b)
|
1142
|
-
{:author => a.details[:author] + b.details[:author]}
|
1143
|
-
end
|
1144
|
-
}
|
1145
|
-
end
|
1146
|
-
|
1147
|
-
rule author_name
|
1148
|
-
space a:author_name_without_postfix space b:author_maybe_filius space !latin_word {
|
1149
|
-
def value
|
1150
|
-
a.value + " " + b.value
|
1151
|
-
end
|
1152
|
-
|
1153
|
-
def pos
|
1154
|
-
a.pos.merge(b.pos)
|
1155
|
-
end
|
1156
|
-
|
1157
|
-
def details
|
1158
|
-
{:author => [value]}
|
1159
|
-
end
|
1160
|
-
}
|
1161
|
-
/
|
1162
|
-
space a:author_name_without_postfix space b:author_filius {
|
1163
|
-
def value
|
1164
|
-
a.value + " " + b.value
|
1165
|
-
end
|
1166
|
-
|
1167
|
-
def pos
|
1168
|
-
a.pos.merge(b.pos)
|
1169
|
-
end
|
1170
|
-
|
1171
|
-
def details
|
1172
|
-
{:author => [value]}
|
1173
|
-
end
|
1174
|
-
}
|
1175
|
-
/
|
1176
|
-
author_name_without_postfix
|
1177
|
-
end
|
1178
|
-
|
1179
|
-
rule author_name_without_postfix
|
1180
|
-
space a:author_prefix_word space b:author_name {
|
1181
|
-
def value
|
1182
|
-
a.value + " " + b.value
|
1183
|
-
end
|
1184
|
-
|
1185
|
-
def pos
|
1186
|
-
a.pos.merge(b.pos)
|
1187
|
-
end
|
1188
|
-
|
1189
|
-
def details
|
1190
|
-
{:author => [value]}
|
1191
|
-
end
|
1192
|
-
}
|
1193
|
-
/
|
1194
|
-
a:author_word space b:author_name {
|
1195
|
-
def value
|
1196
|
-
a.value + " " + b.value
|
1197
|
-
end
|
1198
|
-
|
1199
|
-
def pos
|
1200
|
-
a.pos.merge(b.pos)
|
1201
|
-
end
|
1202
|
-
|
1203
|
-
def details
|
1204
|
-
{:author => [value]}
|
1205
|
-
end
|
1206
|
-
}
|
1207
|
-
/
|
1208
|
-
author_word
|
1209
|
-
end
|
1210
|
-
|
1211
|
-
rule author_word
|
1212
|
-
"A S. Xu" {
|
1213
|
-
def value
|
1214
|
-
text_value.strip
|
1215
|
-
end
|
1216
|
-
|
1217
|
-
def pos
|
1218
|
-
{interval.begin => ["author_word", 1], (interval.begin + 2) => ["author_word", 2], (interval.begin + 5) => ["author_word", 2]}
|
1219
|
-
end
|
1220
|
-
|
1221
|
-
def details
|
1222
|
-
{:author => [value]}
|
1223
|
-
end
|
1224
|
-
}
|
1225
|
-
/
|
1226
|
-
("arg."/"et al.\{\?\}"/"et al."/"et al"/"& al."/"& al") {
|
1227
|
-
def value
|
1228
|
-
text_value.strip
|
1229
|
-
end
|
1230
|
-
|
1231
|
-
def pos
|
1232
|
-
#cheating because there are several words in some of them
|
1233
|
-
{interval.begin => ["author_word", interval.end]}
|
1234
|
-
end
|
1235
|
-
|
1236
|
-
def details
|
1237
|
-
{:author => [value]}
|
1238
|
-
end
|
1239
|
-
}
|
1240
|
-
/
|
1241
|
-
("d" ['’])? ("Å"/"Ö"/"Á"/"Ø"/"Ô"/"Š"/"Ś"/"Č"/"Ķ"/"Ł"/"É"/"Ž"/[A-W]/[Y-Z]) [^0-9\[\]\(\)\s&,]* {
|
1242
|
-
def value
|
1243
|
-
text_value.gsub(/([\p{Lu}]{3,})/) do |match|
|
1244
|
-
UnicodeUtils.titlecase(match)
|
1245
|
-
end
|
1246
|
-
end
|
1247
|
-
|
1248
|
-
def pos
|
1249
|
-
{interval.begin => ["author_word", interval.end]}
|
1250
|
-
end
|
1251
|
-
|
1252
|
-
def details
|
1253
|
-
{:author => [value]}
|
1254
|
-
end
|
1255
|
-
}
|
1256
|
-
/
|
1257
|
-
"X" [^0-9\[\]\(\)\s&,]+ {
|
1258
|
-
def value
|
1259
|
-
text_value
|
1260
|
-
end
|
1261
|
-
|
1262
|
-
def pos
|
1263
|
-
{interval.begin => ["author_word", interval.end]}
|
1264
|
-
end
|
1265
|
-
|
1266
|
-
def details
|
1267
|
-
{:author => [value]}
|
1268
|
-
end
|
1269
|
-
}
|
1270
|
-
/
|
1271
|
-
author_prefix_word
|
1272
|
-
end
|
1273
|
-
|
1274
|
-
rule author_prefix_word
|
1275
|
-
space ("ab"/"af"/"bis"/"da"/"der"/"del"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"do"/"la"/"'t"/"ter"/"van"/"von") &space_hard {
|
1276
|
-
def value
|
1277
|
-
text_value
|
1278
|
-
end
|
1279
|
-
|
1280
|
-
def pos
|
1281
|
-
#cheating because there are several words in some of them
|
1282
|
-
{interval.begin => ["author_word", interval.end]}
|
1283
|
-
end
|
1284
|
-
}
|
1285
|
-
end
|
1286
|
-
|
1287
|
-
rule author_filius
|
1288
|
-
("fil."/"filius") {
|
1289
|
-
def value
|
1290
|
-
text_value.strip
|
1291
|
-
end
|
1292
|
-
|
1293
|
-
def pos
|
1294
|
-
{interval.begin => ["author_word", interval.end]}
|
1295
|
-
end
|
1296
|
-
}
|
1297
|
-
end
|
1298
|
-
|
1299
|
-
rule author_maybe_filius
|
1300
|
-
"f." {
|
1301
|
-
def value
|
1302
|
-
text_value.strip
|
1303
|
-
end
|
1304
|
-
|
1305
|
-
def pos
|
1306
|
-
{interval.begin => ["author_word", interval.end]}
|
1307
|
-
end
|
1308
|
-
}
|
1309
|
-
end
|
1310
|
-
|
1311
|
-
rule cap_latin_word_pair
|
1312
|
-
a:cap_latin_word "-" b:cap_latin_word {
|
1313
|
-
def value
|
1314
|
-
a.value + b.value.downcase
|
1315
|
-
end
|
1316
|
-
}
|
1317
|
-
end
|
1318
|
-
|
1319
|
-
rule cap_latin_word
|
1320
|
-
a:([A-Z]/cap_digraph) b:latin_word "?" {
|
1321
|
-
def value
|
1322
|
-
(a.value rescue a.text_value) + b.value
|
1323
|
-
end
|
1324
|
-
}
|
1325
|
-
/
|
1326
|
-
a:([A-Z]/cap_digraph) b:latin_word {
|
1327
|
-
def value
|
1328
|
-
(a.value rescue a.text_value) + b.value
|
1329
|
-
end
|
1330
|
-
}
|
1331
|
-
/
|
1332
|
-
a:("AE"/"OE") b:latin_word {
|
1333
|
-
def value
|
1334
|
-
a.text_value[0..0] + "e" + b.value
|
1335
|
-
end
|
1336
|
-
}
|
1337
|
-
/
|
1338
|
-
("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
|
1339
|
-
def value
|
1340
|
-
text_value
|
1341
|
-
end
|
1342
|
-
}
|
1343
|
-
end
|
1344
|
-
|
1345
|
-
rule capped_dotted_char
|
1346
|
-
[A-Z] "." {
|
1347
|
-
def value
|
1348
|
-
text_value
|
1349
|
-
end
|
1350
|
-
}
|
1351
|
-
end
|
1352
|
-
|
1353
|
-
rule species_word_hybrid
|
1354
|
-
a:multiplication_sign space b:species_word {
|
1355
|
-
def value
|
1356
|
-
a.value + " " + b.value
|
1357
|
-
end
|
1358
|
-
|
1359
|
-
def canonical
|
1360
|
-
b.value
|
1361
|
-
end
|
1362
|
-
|
1363
|
-
def hybrid
|
1364
|
-
true
|
1365
|
-
end
|
1366
|
-
|
1367
|
-
def pos
|
1368
|
-
{b.interval.begin => ["species", b.interval.end]}
|
1369
|
-
end
|
1370
|
-
|
1371
|
-
def details
|
1372
|
-
{:species => {:string => b.value}}
|
1373
|
-
end
|
1374
|
-
}
|
1375
|
-
/
|
1376
|
-
a:"X" space b:species_word {
|
1377
|
-
def value
|
1378
|
-
"× " + b.value
|
1379
|
-
end
|
1380
|
-
|
1381
|
-
def canonical
|
1382
|
-
b.value
|
1383
|
-
end
|
1384
|
-
|
1385
|
-
def hybrid
|
1386
|
-
true
|
1387
|
-
end
|
1388
|
-
|
1389
|
-
def pos
|
1390
|
-
{b.interval.begin => ["species", b.interval.end]}
|
1391
|
-
end
|
1392
|
-
|
1393
|
-
def details
|
1394
|
-
{:species => {:string => b.value}}
|
1395
|
-
end
|
1396
|
-
}
|
1397
|
-
/
|
1398
|
-
a:"x" space_hard b:species_word {
|
1399
|
-
def value
|
1400
|
-
"× " + b.value
|
1401
|
-
end
|
1402
|
-
|
1403
|
-
def canonical
|
1404
|
-
b.value
|
1405
|
-
end
|
1406
|
-
|
1407
|
-
def hybrid
|
1408
|
-
true
|
1409
|
-
end
|
1410
|
-
|
1411
|
-
def pos
|
1412
|
-
{b.interval.begin => ["species", b.interval.end]}
|
1413
|
-
end
|
1414
|
-
|
1415
|
-
def details
|
1416
|
-
{:species => {:string => b.value}}
|
1417
|
-
end
|
1418
|
-
}
|
1419
|
-
end
|
1420
|
-
|
1421
|
-
rule annotation_identification
|
1422
|
-
("sp.nr."/"sp. nr."/"nr."/"nr "/"sp.aff."/"sp. aff."/"sp."/"sp "/"species"/"spp."/"spp "/"aff."/"aff "/"monst."/"? ") {
|
1423
|
-
|
1424
|
-
def value
|
1425
|
-
text_value.strip
|
1426
|
-
end
|
1427
|
-
|
1428
|
-
def apply(sp)
|
1429
|
-
""
|
1430
|
-
end
|
1431
|
-
|
1432
|
-
def canonical(sp)
|
1433
|
-
""
|
1434
|
-
end
|
1435
|
-
|
1436
|
-
def pos(sp)
|
1437
|
-
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
1438
|
-
{interval.begin => ["annotation_identification", interval.end]}
|
1439
|
-
end
|
1440
|
-
|
1441
|
-
def details(sp)
|
1442
|
-
{:annotation_identification => value, :ignored => sp.details}
|
1443
|
-
end
|
1444
|
-
}
|
1445
|
-
/
|
1446
|
-
("cf."/"cf ") {
|
1447
|
-
def value
|
1448
|
-
text_value.strip
|
1449
|
-
end
|
1450
|
-
|
1451
|
-
def apply(sp)
|
1452
|
-
" " + value + " " + sp.value
|
1453
|
-
end
|
1454
|
-
|
1455
|
-
def canonical(sp)
|
1456
|
-
" " + sp.canonical
|
1457
|
-
end
|
1458
|
-
|
1459
|
-
def pos(sp)
|
1460
|
-
interval_end = text_value[-1] == " " ? interval.end - 1 : interval.end
|
1461
|
-
{interval.begin => ["annotation_identification", interval.end]}.merge(sp.pos)
|
1462
|
-
end
|
1463
|
-
|
1464
|
-
def details(sp)
|
1465
|
-
{:annotation_identification => value, :species => sp.details}
|
1466
|
-
end
|
1467
|
-
}
|
1468
|
-
end
|
1469
|
-
|
1470
|
-
rule species_word
|
1471
|
-
a:[0-9]+ "-"? b:latin_word {
|
1472
|
-
def value
|
1473
|
-
num = {"1" => "uni", "2" => "du", "3" => "tri", "4" => "quadri", "5" => "quinque", "6" => "hexa", "7" => "septem", "8" => "octo", "9" => "novem", "10" => "decem", "11" => "undecim", "12" => "duodec", "13" => "tredec", "14" => "quattuordec", "15" => "quinquadec", "16" => "hexadec", "17" => "septendec", "18" => "octodec", "19" => "novemdec", "20" => "viginti", "21" => "unviginti", "22" => "duodeviginti", "23" => "triviginti", "24" => "quattuorviginti", "25" => "quinquatviginti", "26" => "hexaviginti", "27" => "septenviginti", "28" => "octoviginti", "29" => "novemviginti", "30" => "triginta", "38" => "trigintaocto", "100" => "centi"}
|
1474
|
-
a_value = num[a.text_value] ? num[a.text_value] : a.text_value + "-"
|
1475
|
-
a_value + b.value
|
1476
|
-
end
|
1477
|
-
}
|
1478
|
-
/
|
1479
|
-
latin_word
|
1480
|
-
end
|
1481
|
-
|
1482
|
-
rule latin_word
|
1483
|
-
a:valid_name_letters "-" b:latin_word {
|
1484
|
-
def value
|
1485
|
-
a.value + "-" + b.value
|
1486
|
-
end
|
1487
|
-
}
|
1488
|
-
/
|
1489
|
-
a:valid_name_letter "'" b:latin_word {
|
1490
|
-
def value
|
1491
|
-
a.value + b.value
|
1492
|
-
end
|
1493
|
-
}
|
1494
|
-
/
|
1495
|
-
a:valid_name_letter b:valid_name_letters {
|
1496
|
-
def value
|
1497
|
-
a.value + b.value
|
1498
|
-
end
|
1499
|
-
}
|
1500
|
-
end
|
1501
|
-
|
1502
|
-
rule valid_name_letters
|
1503
|
-
[a-zëæœ]+ {
|
1504
|
-
def value
|
1505
|
-
res = ""
|
1506
|
-
text_value.split("").each do |l|
|
1507
|
-
l = "ae" if l == "æ"
|
1508
|
-
l = "oe" if l == "œ"
|
1509
|
-
# We normalize ë as well. It is legal in botanical code, but it
|
1510
|
-
# is beneficial to normalize it for the reconsiliation purposes
|
1511
|
-
l = "e" if l == "ë"
|
1512
|
-
res << l
|
1513
|
-
end
|
1514
|
-
res
|
1515
|
-
end
|
1516
|
-
}
|
1517
|
-
end
|
1518
|
-
|
1519
|
-
rule valid_name_letter
|
1520
|
-
[a-zëæœ] {
|
1521
|
-
def value
|
1522
|
-
res = text_value
|
1523
|
-
res = "ae" if res == "æ"
|
1524
|
-
res = "oe" if res == "œ"
|
1525
|
-
res = "e" if res == "ë"
|
1526
|
-
res
|
1527
|
-
end
|
1528
|
-
}
|
1529
|
-
end
|
1530
|
-
|
1531
|
-
|
1532
|
-
rule cap_digraph
|
1533
|
-
"Æ" {
|
1534
|
-
def value
|
1535
|
-
"Ae"
|
1536
|
-
end
|
1537
|
-
}
|
1538
|
-
/
|
1539
|
-
"Œ" {
|
1540
|
-
def value
|
1541
|
-
"Oe"
|
1542
|
-
end
|
1543
|
-
}
|
1544
|
-
end
|
1545
|
-
|
1546
|
-
rule year
|
1547
|
-
b:left_paren space a:(year_number_with_character/year_number) space c:right_paren {
|
1548
|
-
def value
|
1549
|
-
a.value
|
1550
|
-
end
|
1551
|
-
|
1552
|
-
def pos
|
1553
|
-
a.pos
|
1554
|
-
end
|
1555
|
-
|
1556
|
-
def details
|
1557
|
-
a.details
|
1558
|
-
end
|
1559
|
-
}
|
1560
|
-
/
|
1561
|
-
year_number_with_character
|
1562
|
-
/
|
1563
|
-
year_number
|
1564
|
-
end
|
1565
|
-
|
1566
|
-
rule year_number_with_character
|
1567
|
-
a:year_number [a-zA-Z] {
|
1568
|
-
def value
|
1569
|
-
a.text_value
|
1570
|
-
end
|
1571
|
-
|
1572
|
-
def pos
|
1573
|
-
{interval.begin => ["year", interval.end]}
|
1574
|
-
end
|
1575
|
-
|
1576
|
-
def details
|
1577
|
-
{:year => value}
|
1578
|
-
end
|
1579
|
-
}
|
1580
|
-
end
|
1581
|
-
|
1582
|
-
rule year_number
|
1583
|
-
[12] [7890] [0-9] ([0-9] [\?]?/"?") {
|
1584
|
-
def value
|
1585
|
-
text_value
|
1586
|
-
end
|
1587
|
-
|
1588
|
-
def pos
|
1589
|
-
{interval.begin => ["year", interval.end]}
|
1590
|
-
end
|
1591
|
-
|
1592
|
-
def details
|
1593
|
-
{:year => value}
|
1594
|
-
end
|
1595
|
-
}
|
1596
|
-
end
|
1597
|
-
|
1598
|
-
rule left_paren
|
1599
|
-
"("
|
1600
|
-
end
|
1601
|
-
|
1602
|
-
rule right_paren
|
1603
|
-
")"
|
1604
|
-
end
|
1605
|
-
|
1606
|
-
rule hybrid_character
|
1607
|
-
("x"/"X") {
|
1608
|
-
def value
|
1609
|
-
"×"
|
1610
|
-
end
|
1611
|
-
}
|
1612
|
-
/
|
1613
|
-
multiplication_sign
|
1614
|
-
end
|
1615
|
-
|
1616
|
-
rule multiplication_sign
|
1617
|
-
("×"/"*") {
|
1618
|
-
def value
|
1619
|
-
"×"
|
1620
|
-
end
|
1621
|
-
}
|
1622
|
-
end
|
1623
|
-
|
1624
|
-
rule space
|
1625
|
-
[\s]*
|
1626
|
-
end
|
1627
|
-
|
1628
|
-
rule space_hard
|
1629
|
-
[\s]+
|
1630
|
-
end
|
1631
|
-
|
1632
|
-
end
|