dimus-biodiversity 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/biodiversity/parser.rb +6 -5
- data/lib/biodiversity/parser/scientific_name_canonical.rb +142 -91
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +8 -6
- data/lib/biodiversity/parser/scientific_name_clean.rb +864 -375
- data/lib/biodiversity/parser/scientific_name_clean.treetop +47 -26
- data/lib/biodiversity/parser/scientific_name_dirty.rb +421 -5
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +90 -2
- data/spec/parser/scientific_name.spec.rb +7 -30
- data/spec/parser/scientific_name_canonical.spec.rb +4 -29
- data/spec/parser/scientific_name_clean.spec.rb +31 -27
- data/spec/parser/scientific_name_dirty.spec.rb +19 -45
- metadata +1 -1
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -18,14 +18,13 @@ class ScientificNameParser
|
|
|
18
18
|
|
|
19
19
|
def parse(a_string)
|
|
20
20
|
@verbatim = a_string
|
|
21
|
-
@parser = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string)
|
|
21
|
+
@parser = @clean.parse(a_string) || @dirty.parse(a_string) || @canonical.parse(a_string) || {:verbatim => a_string}
|
|
22
22
|
def @parser.to_json
|
|
23
|
-
parsed =
|
|
24
|
-
res = {
|
|
25
|
-
:parsed => parsed,
|
|
26
|
-
:verbatim => self.text_value }
|
|
23
|
+
parsed = self.class != Hash
|
|
24
|
+
res = {:parsed => parsed}
|
|
27
25
|
if parsed
|
|
28
26
|
res.merge!({
|
|
27
|
+
:verbatim => self.text_value,
|
|
29
28
|
:normalized => self.value,
|
|
30
29
|
:canonical => self.canonical
|
|
31
30
|
})
|
|
@@ -35,6 +34,8 @@ class ScientificNameParser
|
|
|
35
34
|
data = {:namedHybrid => data}
|
|
36
35
|
end
|
|
37
36
|
res.merge!(data)
|
|
37
|
+
else
|
|
38
|
+
res.merge!(self)
|
|
38
39
|
end
|
|
39
40
|
res = {:scientificName => res}
|
|
40
41
|
JSON.generate res
|
|
@@ -58,12 +58,8 @@ module ScientificNameCanonical
|
|
|
58
58
|
elements[4]
|
|
59
59
|
end
|
|
60
60
|
|
|
61
|
-
def space_hard
|
|
62
|
-
elements[5]
|
|
63
|
-
end
|
|
64
|
-
|
|
65
61
|
def garbage
|
|
66
|
-
elements[
|
|
62
|
+
elements[5]
|
|
67
63
|
end
|
|
68
64
|
end
|
|
69
65
|
|
|
@@ -98,12 +94,8 @@ module ScientificNameCanonical
|
|
|
98
94
|
elements[2]
|
|
99
95
|
end
|
|
100
96
|
|
|
101
|
-
def space_hard
|
|
102
|
-
elements[3]
|
|
103
|
-
end
|
|
104
|
-
|
|
105
97
|
def garbage
|
|
106
|
-
elements[
|
|
98
|
+
elements[3]
|
|
107
99
|
end
|
|
108
100
|
end
|
|
109
101
|
|
|
@@ -138,12 +130,8 @@ module ScientificNameCanonical
|
|
|
138
130
|
elements[2]
|
|
139
131
|
end
|
|
140
132
|
|
|
141
|
-
def space_hard
|
|
142
|
-
elements[3]
|
|
143
|
-
end
|
|
144
|
-
|
|
145
133
|
def garbage
|
|
146
|
-
elements[
|
|
134
|
+
elements[3]
|
|
147
135
|
end
|
|
148
136
|
end
|
|
149
137
|
|
|
@@ -190,12 +178,8 @@ module ScientificNameCanonical
|
|
|
190
178
|
r6 = _nt_species
|
|
191
179
|
s1 << r6
|
|
192
180
|
if r6
|
|
193
|
-
r7 =
|
|
181
|
+
r7 = _nt_garbage
|
|
194
182
|
s1 << r7
|
|
195
|
-
if r7
|
|
196
|
-
r8 = _nt_garbage
|
|
197
|
-
s1 << r8
|
|
198
|
-
end
|
|
199
183
|
end
|
|
200
184
|
end
|
|
201
185
|
end
|
|
@@ -212,65 +196,57 @@ module ScientificNameCanonical
|
|
|
212
196
|
if r1
|
|
213
197
|
r0 = r1
|
|
214
198
|
else
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
if
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
if
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
if
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
if r13
|
|
228
|
-
r14 = _nt_garbage
|
|
229
|
-
s9 << r14
|
|
230
|
-
end
|
|
199
|
+
i8, s8 = index, []
|
|
200
|
+
r9 = _nt_genus
|
|
201
|
+
s8 << r9
|
|
202
|
+
if r9
|
|
203
|
+
r10 = _nt_space
|
|
204
|
+
s8 << r10
|
|
205
|
+
if r10
|
|
206
|
+
r11 = _nt_subgenus
|
|
207
|
+
s8 << r11
|
|
208
|
+
if r11
|
|
209
|
+
r12 = _nt_garbage
|
|
210
|
+
s8 << r12
|
|
231
211
|
end
|
|
232
212
|
end
|
|
233
213
|
end
|
|
234
|
-
if
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
214
|
+
if s8.last
|
|
215
|
+
r8 = instantiate_node(SyntaxNode,input, i8...index, s8)
|
|
216
|
+
r8.extend(MultinomialWithGarbage2)
|
|
217
|
+
r8.extend(MultinomialWithGarbage3)
|
|
238
218
|
else
|
|
239
|
-
self.index =
|
|
240
|
-
|
|
219
|
+
self.index = i8
|
|
220
|
+
r8 = nil
|
|
241
221
|
end
|
|
242
|
-
if
|
|
243
|
-
r0 =
|
|
222
|
+
if r8
|
|
223
|
+
r0 = r8
|
|
244
224
|
else
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
if
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
if
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
if
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
if r19
|
|
258
|
-
r20 = _nt_garbage
|
|
259
|
-
s15 << r20
|
|
260
|
-
end
|
|
225
|
+
i13, s13 = index, []
|
|
226
|
+
r14 = _nt_genus
|
|
227
|
+
s13 << r14
|
|
228
|
+
if r14
|
|
229
|
+
r15 = _nt_space
|
|
230
|
+
s13 << r15
|
|
231
|
+
if r15
|
|
232
|
+
r16 = _nt_species
|
|
233
|
+
s13 << r16
|
|
234
|
+
if r16
|
|
235
|
+
r17 = _nt_garbage
|
|
236
|
+
s13 << r17
|
|
261
237
|
end
|
|
262
238
|
end
|
|
263
239
|
end
|
|
264
|
-
if
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
240
|
+
if s13.last
|
|
241
|
+
r13 = instantiate_node(SyntaxNode,input, i13...index, s13)
|
|
242
|
+
r13.extend(MultinomialWithGarbage4)
|
|
243
|
+
r13.extend(MultinomialWithGarbage5)
|
|
268
244
|
else
|
|
269
|
-
self.index =
|
|
270
|
-
|
|
245
|
+
self.index = i13
|
|
246
|
+
r13 = nil
|
|
271
247
|
end
|
|
272
|
-
if
|
|
273
|
-
r0 =
|
|
248
|
+
if r13
|
|
249
|
+
r0 = r13
|
|
274
250
|
else
|
|
275
251
|
self.index = i0
|
|
276
252
|
r0 = nil
|
|
@@ -288,12 +264,8 @@ module ScientificNameCanonical
|
|
|
288
264
|
elements[0]
|
|
289
265
|
end
|
|
290
266
|
|
|
291
|
-
def space_hard
|
|
292
|
-
elements[1]
|
|
293
|
-
end
|
|
294
|
-
|
|
295
267
|
def b
|
|
296
|
-
elements[
|
|
268
|
+
elements[1]
|
|
297
269
|
end
|
|
298
270
|
end
|
|
299
271
|
|
|
@@ -327,12 +299,8 @@ module ScientificNameCanonical
|
|
|
327
299
|
r1 = _nt_uninomial_epitheton
|
|
328
300
|
s0 << r1
|
|
329
301
|
if r1
|
|
330
|
-
r2 =
|
|
302
|
+
r2 = _nt_garbage
|
|
331
303
|
s0 << r2
|
|
332
|
-
if r2
|
|
333
|
-
r3 = _nt_garbage
|
|
334
|
-
s0 << r3
|
|
335
|
-
end
|
|
336
304
|
end
|
|
337
305
|
if s0.last
|
|
338
306
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
@@ -348,6 +316,24 @@ module ScientificNameCanonical
|
|
|
348
316
|
return r0
|
|
349
317
|
end
|
|
350
318
|
|
|
319
|
+
module Garbage0
|
|
320
|
+
def space
|
|
321
|
+
elements[0]
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def space
|
|
325
|
+
elements[2]
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
module Garbage1
|
|
331
|
+
def space_hard
|
|
332
|
+
elements[0]
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
end
|
|
336
|
+
|
|
351
337
|
def _nt_garbage
|
|
352
338
|
start_index = index
|
|
353
339
|
if node_cache[:garbage].has_key?(index)
|
|
@@ -356,25 +342,90 @@ module ScientificNameCanonical
|
|
|
356
342
|
return cached
|
|
357
343
|
end
|
|
358
344
|
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
345
|
+
i0 = index
|
|
346
|
+
i1, s1 = index, []
|
|
347
|
+
r2 = _nt_space
|
|
348
|
+
s1 << r2
|
|
349
|
+
if r2
|
|
350
|
+
if input.index(Regexp.new('["\',.]'), index) == index
|
|
351
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
363
352
|
@index += 1
|
|
364
353
|
else
|
|
365
|
-
|
|
354
|
+
r3 = nil
|
|
366
355
|
end
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
356
|
+
s1 << r3
|
|
357
|
+
if r3
|
|
358
|
+
r4 = _nt_space
|
|
359
|
+
s1 << r4
|
|
360
|
+
if r4
|
|
361
|
+
s5, i5 = [], index
|
|
362
|
+
loop do
|
|
363
|
+
if input.index(Regexp.new('[^щ]'), index) == index
|
|
364
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
365
|
+
@index += 1
|
|
366
|
+
else
|
|
367
|
+
r6 = nil
|
|
368
|
+
end
|
|
369
|
+
if r6
|
|
370
|
+
s5 << r6
|
|
371
|
+
else
|
|
372
|
+
break
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
|
376
|
+
s1 << r5
|
|
377
|
+
end
|
|
371
378
|
end
|
|
372
379
|
end
|
|
373
|
-
if
|
|
374
|
-
|
|
375
|
-
|
|
380
|
+
if s1.last
|
|
381
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
382
|
+
r1.extend(Garbage0)
|
|
376
383
|
else
|
|
377
|
-
|
|
384
|
+
self.index = i1
|
|
385
|
+
r1 = nil
|
|
386
|
+
end
|
|
387
|
+
if r1
|
|
388
|
+
r0 = r1
|
|
389
|
+
else
|
|
390
|
+
i7, s7 = index, []
|
|
391
|
+
r8 = _nt_space_hard
|
|
392
|
+
s7 << r8
|
|
393
|
+
if r8
|
|
394
|
+
s9, i9 = [], index
|
|
395
|
+
loop do
|
|
396
|
+
if input.index(Regexp.new('[^ш]'), index) == index
|
|
397
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
398
|
+
@index += 1
|
|
399
|
+
else
|
|
400
|
+
r10 = nil
|
|
401
|
+
end
|
|
402
|
+
if r10
|
|
403
|
+
s9 << r10
|
|
404
|
+
else
|
|
405
|
+
break
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
if s9.empty?
|
|
409
|
+
self.index = i9
|
|
410
|
+
r9 = nil
|
|
411
|
+
else
|
|
412
|
+
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
|
413
|
+
end
|
|
414
|
+
s7 << r9
|
|
415
|
+
end
|
|
416
|
+
if s7.last
|
|
417
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
|
418
|
+
r7.extend(Garbage1)
|
|
419
|
+
else
|
|
420
|
+
self.index = i7
|
|
421
|
+
r7 = nil
|
|
422
|
+
end
|
|
423
|
+
if r7
|
|
424
|
+
r0 = r7
|
|
425
|
+
else
|
|
426
|
+
self.index = i0
|
|
427
|
+
r0 = nil
|
|
428
|
+
end
|
|
378
429
|
end
|
|
379
430
|
|
|
380
431
|
node_cache[:garbage][start_index] = r0
|
|
@@ -10,7 +10,8 @@ grammar ScientificNameCanonical
|
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
rule multinomial_with_garbage
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
a:genus space b:subgenus space c:species garbage {
|
|
14
15
|
def value
|
|
15
16
|
a.value + " " + b.value + " " + c.value
|
|
16
17
|
end
|
|
@@ -28,7 +29,7 @@ grammar ScientificNameCanonical
|
|
|
28
29
|
end
|
|
29
30
|
}
|
|
30
31
|
/
|
|
31
|
-
a:genus space b:subgenus
|
|
32
|
+
a:genus space b:subgenus garbage {
|
|
32
33
|
def value
|
|
33
34
|
a.value + " " + b.value
|
|
34
35
|
end
|
|
@@ -46,7 +47,7 @@ grammar ScientificNameCanonical
|
|
|
46
47
|
end
|
|
47
48
|
}
|
|
48
49
|
/
|
|
49
|
-
a:genus space b:species
|
|
50
|
+
a:genus space b:species garbage {
|
|
50
51
|
def value
|
|
51
52
|
a.value + " " + b.value
|
|
52
53
|
end
|
|
@@ -66,7 +67,7 @@ grammar ScientificNameCanonical
|
|
|
66
67
|
end
|
|
67
68
|
|
|
68
69
|
rule uninomial_with_garbage
|
|
69
|
-
a:uninomial_epitheton
|
|
70
|
+
a:uninomial_epitheton b:garbage {
|
|
70
71
|
def value
|
|
71
72
|
a.value
|
|
72
73
|
end
|
|
@@ -84,10 +85,11 @@ grammar ScientificNameCanonical
|
|
|
84
85
|
end
|
|
85
86
|
}
|
|
86
87
|
end
|
|
87
|
-
|
|
88
88
|
|
|
89
89
|
rule garbage
|
|
90
|
-
[
|
|
90
|
+
space (["',.]) space [^щ]*
|
|
91
|
+
/
|
|
92
|
+
space_hard [^ш]+
|
|
91
93
|
end
|
|
92
94
|
|
|
93
95
|
end
|
|
@@ -651,16 +651,20 @@ module ScientificNameClean
|
|
|
651
651
|
elements[3]
|
|
652
652
|
end
|
|
653
653
|
|
|
654
|
+
def space
|
|
655
|
+
elements[5]
|
|
656
|
+
end
|
|
657
|
+
|
|
654
658
|
def c
|
|
655
|
-
elements[
|
|
659
|
+
elements[6]
|
|
656
660
|
end
|
|
657
661
|
|
|
658
662
|
def space_hard
|
|
659
|
-
elements[
|
|
663
|
+
elements[7]
|
|
660
664
|
end
|
|
661
665
|
|
|
662
666
|
def d
|
|
663
|
-
elements[
|
|
667
|
+
elements[8]
|
|
664
668
|
end
|
|
665
669
|
end
|
|
666
670
|
|
|
@@ -670,7 +674,7 @@ module ScientificNameClean
|
|
|
670
674
|
end
|
|
671
675
|
|
|
672
676
|
def canonical
|
|
673
|
-
a.canonical + " " + c.canonical + " " + d.canonical
|
|
677
|
+
a.canonical + " " + b.canonical + " " + c.canonical + " " + d.canonical
|
|
674
678
|
end
|
|
675
679
|
|
|
676
680
|
def pos
|
|
@@ -699,8 +703,12 @@ module ScientificNameClean
|
|
|
699
703
|
elements[3]
|
|
700
704
|
end
|
|
701
705
|
|
|
706
|
+
def space
|
|
707
|
+
elements[5]
|
|
708
|
+
end
|
|
709
|
+
|
|
702
710
|
def c
|
|
703
|
-
elements[
|
|
711
|
+
elements[6]
|
|
704
712
|
end
|
|
705
713
|
end
|
|
706
714
|
|
|
@@ -727,20 +735,24 @@ module ScientificNameClean
|
|
|
727
735
|
elements[0]
|
|
728
736
|
end
|
|
729
737
|
|
|
730
|
-
def
|
|
738
|
+
def space
|
|
731
739
|
elements[1]
|
|
732
740
|
end
|
|
733
741
|
|
|
742
|
+
def space
|
|
743
|
+
elements[3]
|
|
744
|
+
end
|
|
745
|
+
|
|
734
746
|
def b
|
|
735
|
-
elements[
|
|
747
|
+
elements[4]
|
|
736
748
|
end
|
|
737
749
|
|
|
738
750
|
def space_hard
|
|
739
|
-
elements[
|
|
751
|
+
elements[5]
|
|
740
752
|
end
|
|
741
753
|
|
|
742
754
|
def c
|
|
743
|
-
elements[
|
|
755
|
+
elements[6]
|
|
744
756
|
end
|
|
745
757
|
end
|
|
746
758
|
|
|
@@ -767,12 +779,16 @@ module ScientificNameClean
|
|
|
767
779
|
elements[0]
|
|
768
780
|
end
|
|
769
781
|
|
|
770
|
-
def
|
|
782
|
+
def space
|
|
771
783
|
elements[1]
|
|
772
784
|
end
|
|
773
785
|
|
|
786
|
+
def space
|
|
787
|
+
elements[3]
|
|
788
|
+
end
|
|
789
|
+
|
|
774
790
|
def b
|
|
775
|
-
elements[
|
|
791
|
+
elements[4]
|
|
776
792
|
end
|
|
777
793
|
end
|
|
778
794
|
|
|
@@ -816,14 +832,27 @@ module ScientificNameClean
|
|
|
816
832
|
r5 = _nt_space
|
|
817
833
|
s1 << r5
|
|
818
834
|
if r5
|
|
819
|
-
|
|
835
|
+
r7 = _nt_species_prefix
|
|
836
|
+
if r7
|
|
837
|
+
r6 = r7
|
|
838
|
+
else
|
|
839
|
+
r6 = instantiate_node(SyntaxNode,input, index...index)
|
|
840
|
+
end
|
|
820
841
|
s1 << r6
|
|
821
842
|
if r6
|
|
822
|
-
|
|
823
|
-
s1 <<
|
|
824
|
-
if
|
|
825
|
-
|
|
826
|
-
s1 <<
|
|
843
|
+
r8 = _nt_space
|
|
844
|
+
s1 << r8
|
|
845
|
+
if r8
|
|
846
|
+
r9 = _nt_species
|
|
847
|
+
s1 << r9
|
|
848
|
+
if r9
|
|
849
|
+
r10 = _nt_space_hard
|
|
850
|
+
s1 << r10
|
|
851
|
+
if r10
|
|
852
|
+
r11 = _nt_infraspecies_mult
|
|
853
|
+
s1 << r11
|
|
854
|
+
end
|
|
855
|
+
end
|
|
827
856
|
end
|
|
828
857
|
end
|
|
829
858
|
end
|
|
@@ -841,87 +870,126 @@ module ScientificNameClean
|
|
|
841
870
|
if r1
|
|
842
871
|
r0 = r1
|
|
843
872
|
else
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
if
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
if
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
if
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
if
|
|
857
|
-
|
|
858
|
-
|
|
873
|
+
i12, s12 = index, []
|
|
874
|
+
r13 = _nt_genus
|
|
875
|
+
s12 << r13
|
|
876
|
+
if r13
|
|
877
|
+
r14 = _nt_space
|
|
878
|
+
s12 << r14
|
|
879
|
+
if r14
|
|
880
|
+
r15 = _nt_subgenus
|
|
881
|
+
s12 << r15
|
|
882
|
+
if r15
|
|
883
|
+
r16 = _nt_space
|
|
884
|
+
s12 << r16
|
|
885
|
+
if r16
|
|
886
|
+
r18 = _nt_species_prefix
|
|
887
|
+
if r18
|
|
888
|
+
r17 = r18
|
|
889
|
+
else
|
|
890
|
+
r17 = instantiate_node(SyntaxNode,input, index...index)
|
|
891
|
+
end
|
|
892
|
+
s12 << r17
|
|
893
|
+
if r17
|
|
894
|
+
r19 = _nt_space
|
|
895
|
+
s12 << r19
|
|
896
|
+
if r19
|
|
897
|
+
r20 = _nt_species
|
|
898
|
+
s12 << r20
|
|
899
|
+
end
|
|
900
|
+
end
|
|
859
901
|
end
|
|
860
902
|
end
|
|
861
903
|
end
|
|
862
904
|
end
|
|
863
|
-
if
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
905
|
+
if s12.last
|
|
906
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
|
907
|
+
r12.extend(MultinomialName2)
|
|
908
|
+
r12.extend(MultinomialName3)
|
|
867
909
|
else
|
|
868
|
-
self.index =
|
|
869
|
-
|
|
910
|
+
self.index = i12
|
|
911
|
+
r12 = nil
|
|
870
912
|
end
|
|
871
|
-
if
|
|
872
|
-
r0 =
|
|
913
|
+
if r12
|
|
914
|
+
r0 = r12
|
|
873
915
|
else
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
if
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
if
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
916
|
+
i21, s21 = index, []
|
|
917
|
+
r22 = _nt_genus
|
|
918
|
+
s21 << r22
|
|
919
|
+
if r22
|
|
920
|
+
r23 = _nt_space
|
|
921
|
+
s21 << r23
|
|
922
|
+
if r23
|
|
923
|
+
r25 = _nt_species_prefix
|
|
924
|
+
if r25
|
|
925
|
+
r24 = r25
|
|
926
|
+
else
|
|
927
|
+
r24 = instantiate_node(SyntaxNode,input, index...index)
|
|
928
|
+
end
|
|
929
|
+
s21 << r24
|
|
930
|
+
if r24
|
|
931
|
+
r26 = _nt_space
|
|
932
|
+
s21 << r26
|
|
933
|
+
if r26
|
|
934
|
+
r27 = _nt_species
|
|
935
|
+
s21 << r27
|
|
936
|
+
if r27
|
|
937
|
+
r28 = _nt_space_hard
|
|
938
|
+
s21 << r28
|
|
939
|
+
if r28
|
|
940
|
+
r29 = _nt_infraspecies_mult
|
|
941
|
+
s21 << r29
|
|
942
|
+
end
|
|
943
|
+
end
|
|
889
944
|
end
|
|
890
945
|
end
|
|
891
946
|
end
|
|
892
947
|
end
|
|
893
|
-
if
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
948
|
+
if s21.last
|
|
949
|
+
r21 = instantiate_node(SyntaxNode,input, i21...index, s21)
|
|
950
|
+
r21.extend(MultinomialName4)
|
|
951
|
+
r21.extend(MultinomialName5)
|
|
897
952
|
else
|
|
898
|
-
self.index =
|
|
899
|
-
|
|
953
|
+
self.index = i21
|
|
954
|
+
r21 = nil
|
|
900
955
|
end
|
|
901
|
-
if
|
|
902
|
-
r0 =
|
|
956
|
+
if r21
|
|
957
|
+
r0 = r21
|
|
903
958
|
else
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
if
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
if
|
|
911
|
-
|
|
912
|
-
|
|
959
|
+
i30, s30 = index, []
|
|
960
|
+
r31 = _nt_genus
|
|
961
|
+
s30 << r31
|
|
962
|
+
if r31
|
|
963
|
+
r32 = _nt_space
|
|
964
|
+
s30 << r32
|
|
965
|
+
if r32
|
|
966
|
+
r34 = _nt_species_prefix
|
|
967
|
+
if r34
|
|
968
|
+
r33 = r34
|
|
969
|
+
else
|
|
970
|
+
r33 = instantiate_node(SyntaxNode,input, index...index)
|
|
971
|
+
end
|
|
972
|
+
s30 << r33
|
|
973
|
+
if r33
|
|
974
|
+
r35 = _nt_space
|
|
975
|
+
s30 << r35
|
|
976
|
+
if r35
|
|
977
|
+
r36 = _nt_species
|
|
978
|
+
s30 << r36
|
|
979
|
+
end
|
|
980
|
+
end
|
|
913
981
|
end
|
|
914
982
|
end
|
|
915
|
-
if
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
983
|
+
if s30.last
|
|
984
|
+
r30 = instantiate_node(SyntaxNode,input, i30...index, s30)
|
|
985
|
+
r30.extend(MultinomialName6)
|
|
986
|
+
r30.extend(MultinomialName7)
|
|
919
987
|
else
|
|
920
|
-
self.index =
|
|
921
|
-
|
|
988
|
+
self.index = i30
|
|
989
|
+
r30 = nil
|
|
922
990
|
end
|
|
923
|
-
if
|
|
924
|
-
r0 =
|
|
991
|
+
if r30
|
|
992
|
+
r0 = r30
|
|
925
993
|
else
|
|
926
994
|
self.index = i0
|
|
927
995
|
r0 = nil
|
|
@@ -963,7 +1031,6 @@ module ScientificNameClean
|
|
|
963
1031
|
end
|
|
964
1032
|
|
|
965
1033
|
def details
|
|
966
|
-
#{:infraspecies => a.details[:infraspceies] << b.details[:infraspecies]}
|
|
967
1034
|
a_array = a.details[:infraspecies].class == Array ? a.details[:infraspecies] : [a.details[:infraspecies]]
|
|
968
1035
|
b_array = b.details[:infraspecies].class == Array ? b.details[:infraspecies] : [b.details[:infraspecies]]
|
|
969
1036
|
a.details.merge({:infraspecies => a_array + b_array})
|
|
@@ -1239,13 +1306,32 @@ module ScientificNameClean
|
|
|
1239
1306
|
return cached
|
|
1240
1307
|
end
|
|
1241
1308
|
|
|
1309
|
+
i0 = index
|
|
1242
1310
|
if input.index("sec.", index) == index
|
|
1243
|
-
|
|
1244
|
-
r0.extend(TaxonConceptRank0)
|
|
1311
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 4))
|
|
1245
1312
|
@index += 4
|
|
1246
1313
|
else
|
|
1247
1314
|
terminal_parse_failure("sec.")
|
|
1248
|
-
|
|
1315
|
+
r1 = nil
|
|
1316
|
+
end
|
|
1317
|
+
if r1
|
|
1318
|
+
r0 = r1
|
|
1319
|
+
r0.extend(TaxonConceptRank0)
|
|
1320
|
+
else
|
|
1321
|
+
if input.index("sensu.", index) == index
|
|
1322
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 6))
|
|
1323
|
+
@index += 6
|
|
1324
|
+
else
|
|
1325
|
+
terminal_parse_failure("sensu.")
|
|
1326
|
+
r2 = nil
|
|
1327
|
+
end
|
|
1328
|
+
if r2
|
|
1329
|
+
r0 = r2
|
|
1330
|
+
r0.extend(TaxonConceptRank0)
|
|
1331
|
+
else
|
|
1332
|
+
self.index = i0
|
|
1333
|
+
r0 = nil
|
|
1334
|
+
end
|
|
1249
1335
|
end
|
|
1250
1336
|
|
|
1251
1337
|
node_cache[:taxon_concept_rank][start_index] = r0
|
|
@@ -2083,16 +2169,45 @@ module ScientificNameClean
|
|
|
2083
2169
|
end
|
|
2084
2170
|
|
|
2085
2171
|
module Genus0
|
|
2172
|
+
def space_hard
|
|
2173
|
+
elements[0]
|
|
2174
|
+
end
|
|
2175
|
+
|
|
2176
|
+
def author_prefix_word
|
|
2177
|
+
elements[1]
|
|
2178
|
+
end
|
|
2179
|
+
|
|
2180
|
+
def space_hard
|
|
2181
|
+
elements[2]
|
|
2182
|
+
end
|
|
2183
|
+
|
|
2184
|
+
def author_word
|
|
2185
|
+
elements[3]
|
|
2186
|
+
end
|
|
2187
|
+
end
|
|
2188
|
+
|
|
2189
|
+
module Genus1
|
|
2190
|
+
def a
|
|
2191
|
+
elements[0]
|
|
2192
|
+
end
|
|
2193
|
+
|
|
2194
|
+
end
|
|
2195
|
+
|
|
2196
|
+
module Genus2
|
|
2197
|
+
def value
|
|
2198
|
+
a.value
|
|
2199
|
+
end
|
|
2200
|
+
|
|
2086
2201
|
def pos
|
|
2087
|
-
{interval.begin => ['genus', interval.end]}
|
|
2202
|
+
{a.interval.begin => ['genus', a.interval.end]}
|
|
2088
2203
|
end
|
|
2089
2204
|
|
|
2090
2205
|
def canonical
|
|
2091
|
-
value
|
|
2206
|
+
a.value
|
|
2092
2207
|
end
|
|
2093
2208
|
|
|
2094
2209
|
def details
|
|
2095
|
-
{:genus => {:epitheton => value}}
|
|
2210
|
+
{:genus => {:epitheton => a.value}}
|
|
2096
2211
|
end
|
|
2097
2212
|
end
|
|
2098
2213
|
|
|
@@ -2104,8 +2219,49 @@ module ScientificNameClean
|
|
|
2104
2219
|
return cached
|
|
2105
2220
|
end
|
|
2106
2221
|
|
|
2107
|
-
|
|
2108
|
-
|
|
2222
|
+
i0, s0 = index, []
|
|
2223
|
+
r1 = _nt_cap_latin_word
|
|
2224
|
+
s0 << r1
|
|
2225
|
+
if r1
|
|
2226
|
+
i2 = index
|
|
2227
|
+
i3, s3 = index, []
|
|
2228
|
+
r4 = _nt_space_hard
|
|
2229
|
+
s3 << r4
|
|
2230
|
+
if r4
|
|
2231
|
+
r5 = _nt_author_prefix_word
|
|
2232
|
+
s3 << r5
|
|
2233
|
+
if r5
|
|
2234
|
+
r6 = _nt_space_hard
|
|
2235
|
+
s3 << r6
|
|
2236
|
+
if r6
|
|
2237
|
+
r7 = _nt_author_word
|
|
2238
|
+
s3 << r7
|
|
2239
|
+
end
|
|
2240
|
+
end
|
|
2241
|
+
end
|
|
2242
|
+
if s3.last
|
|
2243
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
|
2244
|
+
r3.extend(Genus0)
|
|
2245
|
+
else
|
|
2246
|
+
self.index = i3
|
|
2247
|
+
r3 = nil
|
|
2248
|
+
end
|
|
2249
|
+
if r3
|
|
2250
|
+
r2 = nil
|
|
2251
|
+
else
|
|
2252
|
+
self.index = i2
|
|
2253
|
+
r2 = instantiate_node(SyntaxNode,input, index...index)
|
|
2254
|
+
end
|
|
2255
|
+
s0 << r2
|
|
2256
|
+
end
|
|
2257
|
+
if s0.last
|
|
2258
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
2259
|
+
r0.extend(Genus1)
|
|
2260
|
+
r0.extend(Genus2)
|
|
2261
|
+
else
|
|
2262
|
+
self.index = i0
|
|
2263
|
+
r0 = nil
|
|
2264
|
+
end
|
|
2109
2265
|
|
|
2110
2266
|
node_cache[:genus][start_index] = r0
|
|
2111
2267
|
|
|
@@ -2233,11 +2389,11 @@ module ScientificNameClean
|
|
|
2233
2389
|
end
|
|
2234
2390
|
|
|
2235
2391
|
def space
|
|
2236
|
-
elements[
|
|
2392
|
+
elements[4]
|
|
2237
2393
|
end
|
|
2238
2394
|
|
|
2239
2395
|
def c
|
|
2240
|
-
elements[
|
|
2396
|
+
elements[5]
|
|
2241
2397
|
end
|
|
2242
2398
|
end
|
|
2243
2399
|
|
|
@@ -2291,11 +2447,11 @@ module ScientificNameClean
|
|
|
2291
2447
|
end
|
|
2292
2448
|
|
|
2293
2449
|
def space
|
|
2294
|
-
elements[
|
|
2450
|
+
elements[2]
|
|
2295
2451
|
end
|
|
2296
2452
|
|
|
2297
2453
|
def b
|
|
2298
|
-
elements[
|
|
2454
|
+
elements[3]
|
|
2299
2455
|
end
|
|
2300
2456
|
end
|
|
2301
2457
|
|
|
@@ -2335,11 +2491,26 @@ module ScientificNameClean
|
|
|
2335
2491
|
r4 = _nt_simple_authorship
|
|
2336
2492
|
s1 << r4
|
|
2337
2493
|
if r4
|
|
2338
|
-
|
|
2494
|
+
if input.index(",", index) == index
|
|
2495
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
2496
|
+
@index += 1
|
|
2497
|
+
else
|
|
2498
|
+
terminal_parse_failure(",")
|
|
2499
|
+
r6 = nil
|
|
2500
|
+
end
|
|
2501
|
+
if r6
|
|
2502
|
+
r5 = r6
|
|
2503
|
+
else
|
|
2504
|
+
r5 = instantiate_node(SyntaxNode,input, index...index)
|
|
2505
|
+
end
|
|
2339
2506
|
s1 << r5
|
|
2340
2507
|
if r5
|
|
2341
|
-
|
|
2342
|
-
s1 <<
|
|
2508
|
+
r7 = _nt_space
|
|
2509
|
+
s1 << r7
|
|
2510
|
+
if r7
|
|
2511
|
+
r8 = _nt_ex_authorship
|
|
2512
|
+
s1 << r8
|
|
2513
|
+
end
|
|
2343
2514
|
end
|
|
2344
2515
|
end
|
|
2345
2516
|
end
|
|
@@ -2355,57 +2526,72 @@ module ScientificNameClean
|
|
|
2355
2526
|
if r1
|
|
2356
2527
|
r0 = r1
|
|
2357
2528
|
else
|
|
2358
|
-
|
|
2359
|
-
|
|
2360
|
-
|
|
2361
|
-
if
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
if
|
|
2365
|
-
|
|
2366
|
-
|
|
2529
|
+
i9, s9 = index, []
|
|
2530
|
+
r10 = _nt_basionym_authorship_with_parenthesis
|
|
2531
|
+
s9 << r10
|
|
2532
|
+
if r10
|
|
2533
|
+
r11 = _nt_space
|
|
2534
|
+
s9 << r11
|
|
2535
|
+
if r11
|
|
2536
|
+
r12 = _nt_simple_authorship
|
|
2537
|
+
s9 << r12
|
|
2367
2538
|
end
|
|
2368
2539
|
end
|
|
2369
|
-
if
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2540
|
+
if s9.last
|
|
2541
|
+
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
|
2542
|
+
r9.extend(Authorship2)
|
|
2543
|
+
r9.extend(Authorship3)
|
|
2373
2544
|
else
|
|
2374
|
-
self.index =
|
|
2375
|
-
|
|
2545
|
+
self.index = i9
|
|
2546
|
+
r9 = nil
|
|
2376
2547
|
end
|
|
2377
|
-
if
|
|
2378
|
-
r0 =
|
|
2548
|
+
if r9
|
|
2549
|
+
r0 = r9
|
|
2379
2550
|
else
|
|
2380
|
-
|
|
2381
|
-
if
|
|
2382
|
-
r0 =
|
|
2551
|
+
r13 = _nt_basionym_authorship_with_parenthesis
|
|
2552
|
+
if r13
|
|
2553
|
+
r0 = r13
|
|
2383
2554
|
else
|
|
2384
|
-
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
if
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2555
|
+
i14, s14 = index, []
|
|
2556
|
+
r15 = _nt_simple_authorship
|
|
2557
|
+
s14 << r15
|
|
2558
|
+
if r15
|
|
2559
|
+
if input.index(",", index) == index
|
|
2560
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
2561
|
+
@index += 1
|
|
2562
|
+
else
|
|
2563
|
+
terminal_parse_failure(",")
|
|
2564
|
+
r17 = nil
|
|
2565
|
+
end
|
|
2566
|
+
if r17
|
|
2567
|
+
r16 = r17
|
|
2568
|
+
else
|
|
2569
|
+
r16 = instantiate_node(SyntaxNode,input, index...index)
|
|
2570
|
+
end
|
|
2571
|
+
s14 << r16
|
|
2572
|
+
if r16
|
|
2573
|
+
r18 = _nt_space
|
|
2574
|
+
s14 << r18
|
|
2575
|
+
if r18
|
|
2576
|
+
r19 = _nt_ex_authorship
|
|
2577
|
+
s14 << r19
|
|
2578
|
+
end
|
|
2393
2579
|
end
|
|
2394
2580
|
end
|
|
2395
|
-
if
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2581
|
+
if s14.last
|
|
2582
|
+
r14 = instantiate_node(SyntaxNode,input, i14...index, s14)
|
|
2583
|
+
r14.extend(Authorship4)
|
|
2584
|
+
r14.extend(Authorship5)
|
|
2399
2585
|
else
|
|
2400
|
-
self.index =
|
|
2401
|
-
|
|
2586
|
+
self.index = i14
|
|
2587
|
+
r14 = nil
|
|
2402
2588
|
end
|
|
2403
|
-
if
|
|
2404
|
-
r0 =
|
|
2589
|
+
if r14
|
|
2590
|
+
r0 = r14
|
|
2405
2591
|
else
|
|
2406
|
-
|
|
2407
|
-
if
|
|
2408
|
-
r0 =
|
|
2592
|
+
r20 = _nt_simple_authorship
|
|
2593
|
+
if r20
|
|
2594
|
+
r0 = r20
|
|
2409
2595
|
else
|
|
2410
2596
|
self.index = i0
|
|
2411
2597
|
r0 = nil
|
|
@@ -2484,19 +2670,19 @@ module ScientificNameClean
|
|
|
2484
2670
|
end
|
|
2485
2671
|
|
|
2486
2672
|
def space
|
|
2487
|
-
elements[
|
|
2673
|
+
elements[4]
|
|
2488
2674
|
end
|
|
2489
2675
|
|
|
2490
2676
|
def b
|
|
2491
|
-
elements[
|
|
2677
|
+
elements[5]
|
|
2492
2678
|
end
|
|
2493
2679
|
|
|
2494
2680
|
def space
|
|
2495
|
-
elements[
|
|
2681
|
+
elements[6]
|
|
2496
2682
|
end
|
|
2497
2683
|
|
|
2498
2684
|
def right_paren
|
|
2499
|
-
elements[
|
|
2685
|
+
elements[7]
|
|
2500
2686
|
end
|
|
2501
2687
|
end
|
|
2502
2688
|
|
|
@@ -2666,17 +2852,32 @@ module ScientificNameClean
|
|
|
2666
2852
|
r15 = _nt_simple_authorship
|
|
2667
2853
|
s12 << r15
|
|
2668
2854
|
if r15
|
|
2669
|
-
|
|
2855
|
+
if input.index(",", index) == index
|
|
2856
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
2857
|
+
@index += 1
|
|
2858
|
+
else
|
|
2859
|
+
terminal_parse_failure(",")
|
|
2860
|
+
r17 = nil
|
|
2861
|
+
end
|
|
2862
|
+
if r17
|
|
2863
|
+
r16 = r17
|
|
2864
|
+
else
|
|
2865
|
+
r16 = instantiate_node(SyntaxNode,input, index...index)
|
|
2866
|
+
end
|
|
2670
2867
|
s12 << r16
|
|
2671
2868
|
if r16
|
|
2672
|
-
|
|
2673
|
-
s12 <<
|
|
2674
|
-
if
|
|
2675
|
-
|
|
2676
|
-
s12 <<
|
|
2677
|
-
if
|
|
2678
|
-
|
|
2679
|
-
s12 <<
|
|
2869
|
+
r18 = _nt_space
|
|
2870
|
+
s12 << r18
|
|
2871
|
+
if r18
|
|
2872
|
+
r19 = _nt_ex_authorship
|
|
2873
|
+
s12 << r19
|
|
2874
|
+
if r19
|
|
2875
|
+
r20 = _nt_space
|
|
2876
|
+
s12 << r20
|
|
2877
|
+
if r20
|
|
2878
|
+
r21 = _nt_right_paren
|
|
2879
|
+
s12 << r21
|
|
2880
|
+
end
|
|
2680
2881
|
end
|
|
2681
2882
|
end
|
|
2682
2883
|
end
|
|
@@ -2694,71 +2895,71 @@ module ScientificNameClean
|
|
|
2694
2895
|
if r12
|
|
2695
2896
|
r0 = r12
|
|
2696
2897
|
else
|
|
2697
|
-
|
|
2698
|
-
|
|
2699
|
-
|
|
2700
|
-
if
|
|
2701
|
-
|
|
2702
|
-
|
|
2703
|
-
if
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
if
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
if
|
|
2710
|
-
|
|
2711
|
-
|
|
2898
|
+
i22, s22 = index, []
|
|
2899
|
+
r23 = _nt_left_paren
|
|
2900
|
+
s22 << r23
|
|
2901
|
+
if r23
|
|
2902
|
+
r24 = _nt_space
|
|
2903
|
+
s22 << r24
|
|
2904
|
+
if r24
|
|
2905
|
+
r25 = _nt_simple_authorship
|
|
2906
|
+
s22 << r25
|
|
2907
|
+
if r25
|
|
2908
|
+
r26 = _nt_space
|
|
2909
|
+
s22 << r26
|
|
2910
|
+
if r26
|
|
2911
|
+
r27 = _nt_right_paren
|
|
2912
|
+
s22 << r27
|
|
2712
2913
|
end
|
|
2713
2914
|
end
|
|
2714
2915
|
end
|
|
2715
2916
|
end
|
|
2716
|
-
if
|
|
2717
|
-
|
|
2718
|
-
|
|
2719
|
-
|
|
2917
|
+
if s22.last
|
|
2918
|
+
r22 = instantiate_node(SyntaxNode,input, i22...index, s22)
|
|
2919
|
+
r22.extend(BasionymAuthorshipWithParenthesis4)
|
|
2920
|
+
r22.extend(BasionymAuthorshipWithParenthesis5)
|
|
2720
2921
|
else
|
|
2721
|
-
self.index =
|
|
2722
|
-
|
|
2922
|
+
self.index = i22
|
|
2923
|
+
r22 = nil
|
|
2723
2924
|
end
|
|
2724
|
-
if
|
|
2725
|
-
r0 =
|
|
2925
|
+
if r22
|
|
2926
|
+
r0 = r22
|
|
2726
2927
|
else
|
|
2727
|
-
|
|
2728
|
-
|
|
2729
|
-
|
|
2730
|
-
if
|
|
2731
|
-
|
|
2732
|
-
|
|
2733
|
-
if
|
|
2928
|
+
i28, s28 = index, []
|
|
2929
|
+
r29 = _nt_left_paren
|
|
2930
|
+
s28 << r29
|
|
2931
|
+
if r29
|
|
2932
|
+
r30 = _nt_space
|
|
2933
|
+
s28 << r30
|
|
2934
|
+
if r30
|
|
2734
2935
|
if input.index("?", index) == index
|
|
2735
|
-
|
|
2936
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
2736
2937
|
@index += 1
|
|
2737
2938
|
else
|
|
2738
2939
|
terminal_parse_failure("?")
|
|
2739
|
-
|
|
2940
|
+
r31 = nil
|
|
2740
2941
|
end
|
|
2741
|
-
|
|
2742
|
-
if
|
|
2743
|
-
|
|
2744
|
-
|
|
2745
|
-
if
|
|
2746
|
-
|
|
2747
|
-
|
|
2942
|
+
s28 << r31
|
|
2943
|
+
if r31
|
|
2944
|
+
r32 = _nt_space
|
|
2945
|
+
s28 << r32
|
|
2946
|
+
if r32
|
|
2947
|
+
r33 = _nt_right_paren
|
|
2948
|
+
s28 << r33
|
|
2748
2949
|
end
|
|
2749
2950
|
end
|
|
2750
2951
|
end
|
|
2751
2952
|
end
|
|
2752
|
-
if
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2953
|
+
if s28.last
|
|
2954
|
+
r28 = instantiate_node(SyntaxNode,input, i28...index, s28)
|
|
2955
|
+
r28.extend(BasionymAuthorshipWithParenthesis6)
|
|
2956
|
+
r28.extend(BasionymAuthorshipWithParenthesis7)
|
|
2756
2957
|
else
|
|
2757
|
-
self.index =
|
|
2758
|
-
|
|
2958
|
+
self.index = i28
|
|
2959
|
+
r28 = nil
|
|
2759
2960
|
end
|
|
2760
|
-
if
|
|
2761
|
-
r0 =
|
|
2961
|
+
if r28
|
|
2962
|
+
r0 = r28
|
|
2762
2963
|
else
|
|
2763
2964
|
self.index = i0
|
|
2764
2965
|
r0 = nil
|
|
@@ -2850,6 +3051,30 @@ module ScientificNameClean
|
|
|
2850
3051
|
def b
|
|
2851
3052
|
elements[4]
|
|
2852
3053
|
end
|
|
3054
|
+
|
|
3055
|
+
def space
|
|
3056
|
+
elements[6]
|
|
3057
|
+
end
|
|
3058
|
+
|
|
3059
|
+
def space
|
|
3060
|
+
elements[8]
|
|
3061
|
+
end
|
|
3062
|
+
|
|
3063
|
+
def authors_names
|
|
3064
|
+
elements[9]
|
|
3065
|
+
end
|
|
3066
|
+
|
|
3067
|
+
def space
|
|
3068
|
+
elements[10]
|
|
3069
|
+
end
|
|
3070
|
+
|
|
3071
|
+
def space
|
|
3072
|
+
elements[12]
|
|
3073
|
+
end
|
|
3074
|
+
|
|
3075
|
+
def year
|
|
3076
|
+
elements[13]
|
|
3077
|
+
end
|
|
2853
3078
|
end
|
|
2854
3079
|
|
|
2855
3080
|
module SimpleAuthorship1
|
|
@@ -2875,6 +3100,46 @@ module ScientificNameClean
|
|
|
2875
3100
|
end
|
|
2876
3101
|
|
|
2877
3102
|
module SimpleAuthorship2
|
|
3103
|
+
def a
|
|
3104
|
+
elements[0]
|
|
3105
|
+
end
|
|
3106
|
+
|
|
3107
|
+
def space
|
|
3108
|
+
elements[1]
|
|
3109
|
+
end
|
|
3110
|
+
|
|
3111
|
+
def space
|
|
3112
|
+
elements[3]
|
|
3113
|
+
end
|
|
3114
|
+
|
|
3115
|
+
def b
|
|
3116
|
+
elements[4]
|
|
3117
|
+
end
|
|
3118
|
+
end
|
|
3119
|
+
|
|
3120
|
+
module SimpleAuthorship3
|
|
3121
|
+
def value
|
|
3122
|
+
a.value + " " + b.value
|
|
3123
|
+
end
|
|
3124
|
+
|
|
3125
|
+
def pos
|
|
3126
|
+
a.pos.merge(b.pos)
|
|
3127
|
+
end
|
|
3128
|
+
|
|
3129
|
+
def details
|
|
3130
|
+
details_with_arg(:basionymAuthorTeam)
|
|
3131
|
+
end
|
|
3132
|
+
|
|
3133
|
+
def details_with_arg(authorTeamType = 'basionymAuthorTeam')
|
|
3134
|
+
{ :authorship => text_value,
|
|
3135
|
+
authorTeamType.to_sym => {
|
|
3136
|
+
:authorTeam => a.text_value.strip
|
|
3137
|
+
}.merge(a.details).merge(b.details)
|
|
3138
|
+
}
|
|
3139
|
+
end
|
|
3140
|
+
end
|
|
3141
|
+
|
|
3142
|
+
module SimpleAuthorship4
|
|
2878
3143
|
def details
|
|
2879
3144
|
details = details_with_arg(:basionymAuthorTeam)
|
|
2880
3145
|
details[:basionymAuthorTeam].merge!(super)
|
|
@@ -2922,8 +3187,75 @@ module ScientificNameClean
|
|
|
2922
3187
|
r6 = _nt_space
|
|
2923
3188
|
s1 << r6
|
|
2924
3189
|
if r6
|
|
2925
|
-
|
|
3190
|
+
r8 = _nt_year
|
|
3191
|
+
if r8
|
|
3192
|
+
r7 = r8
|
|
3193
|
+
else
|
|
3194
|
+
r7 = instantiate_node(SyntaxNode,input, index...index)
|
|
3195
|
+
end
|
|
2926
3196
|
s1 << r7
|
|
3197
|
+
if r7
|
|
3198
|
+
if input.index(Regexp.new('[,]'), index) == index
|
|
3199
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3200
|
+
@index += 1
|
|
3201
|
+
else
|
|
3202
|
+
r10 = nil
|
|
3203
|
+
end
|
|
3204
|
+
if r10
|
|
3205
|
+
r9 = r10
|
|
3206
|
+
else
|
|
3207
|
+
r9 = instantiate_node(SyntaxNode,input, index...index)
|
|
3208
|
+
end
|
|
3209
|
+
s1 << r9
|
|
3210
|
+
if r9
|
|
3211
|
+
r11 = _nt_space
|
|
3212
|
+
s1 << r11
|
|
3213
|
+
if r11
|
|
3214
|
+
if input.index("non", index) == index
|
|
3215
|
+
r12 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
3216
|
+
@index += 3
|
|
3217
|
+
else
|
|
3218
|
+
terminal_parse_failure("non")
|
|
3219
|
+
r12 = nil
|
|
3220
|
+
end
|
|
3221
|
+
s1 << r12
|
|
3222
|
+
if r12
|
|
3223
|
+
r13 = _nt_space
|
|
3224
|
+
s1 << r13
|
|
3225
|
+
if r13
|
|
3226
|
+
r14 = _nt_authors_names
|
|
3227
|
+
s1 << r14
|
|
3228
|
+
if r14
|
|
3229
|
+
r15 = _nt_space
|
|
3230
|
+
s1 << r15
|
|
3231
|
+
if r15
|
|
3232
|
+
if input.index(Regexp.new('[,]'), index) == index
|
|
3233
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3234
|
+
@index += 1
|
|
3235
|
+
else
|
|
3236
|
+
r17 = nil
|
|
3237
|
+
end
|
|
3238
|
+
if r17
|
|
3239
|
+
r16 = r17
|
|
3240
|
+
else
|
|
3241
|
+
r16 = instantiate_node(SyntaxNode,input, index...index)
|
|
3242
|
+
end
|
|
3243
|
+
s1 << r16
|
|
3244
|
+
if r16
|
|
3245
|
+
r18 = _nt_space
|
|
3246
|
+
s1 << r18
|
|
3247
|
+
if r18
|
|
3248
|
+
r19 = _nt_year
|
|
3249
|
+
s1 << r19
|
|
3250
|
+
end
|
|
3251
|
+
end
|
|
3252
|
+
end
|
|
3253
|
+
end
|
|
3254
|
+
end
|
|
3255
|
+
end
|
|
3256
|
+
end
|
|
3257
|
+
end
|
|
3258
|
+
end
|
|
2927
3259
|
end
|
|
2928
3260
|
end
|
|
2929
3261
|
end
|
|
@@ -2939,13 +3271,54 @@ module ScientificNameClean
|
|
|
2939
3271
|
if r1
|
|
2940
3272
|
r0 = r1
|
|
2941
3273
|
else
|
|
2942
|
-
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
3274
|
+
i20, s20 = index, []
|
|
3275
|
+
r21 = _nt_authors_names
|
|
3276
|
+
s20 << r21
|
|
3277
|
+
if r21
|
|
3278
|
+
r22 = _nt_space
|
|
3279
|
+
s20 << r22
|
|
3280
|
+
if r22
|
|
3281
|
+
if input.index(Regexp.new('[,]'), index) == index
|
|
3282
|
+
r24 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3283
|
+
@index += 1
|
|
3284
|
+
else
|
|
3285
|
+
r24 = nil
|
|
3286
|
+
end
|
|
3287
|
+
if r24
|
|
3288
|
+
r23 = r24
|
|
3289
|
+
else
|
|
3290
|
+
r23 = instantiate_node(SyntaxNode,input, index...index)
|
|
3291
|
+
end
|
|
3292
|
+
s20 << r23
|
|
3293
|
+
if r23
|
|
3294
|
+
r25 = _nt_space
|
|
3295
|
+
s20 << r25
|
|
3296
|
+
if r25
|
|
3297
|
+
r26 = _nt_year
|
|
3298
|
+
s20 << r26
|
|
3299
|
+
end
|
|
3300
|
+
end
|
|
3301
|
+
end
|
|
3302
|
+
end
|
|
3303
|
+
if s20.last
|
|
3304
|
+
r20 = instantiate_node(SyntaxNode,input, i20...index, s20)
|
|
3305
|
+
r20.extend(SimpleAuthorship2)
|
|
3306
|
+
r20.extend(SimpleAuthorship3)
|
|
2946
3307
|
else
|
|
2947
|
-
self.index =
|
|
2948
|
-
|
|
3308
|
+
self.index = i20
|
|
3309
|
+
r20 = nil
|
|
3310
|
+
end
|
|
3311
|
+
if r20
|
|
3312
|
+
r0 = r20
|
|
3313
|
+
else
|
|
3314
|
+
r27 = _nt_authors_names
|
|
3315
|
+
r27.extend(SimpleAuthorship4)
|
|
3316
|
+
if r27
|
|
3317
|
+
r0 = r27
|
|
3318
|
+
else
|
|
3319
|
+
self.index = i0
|
|
3320
|
+
r0 = nil
|
|
3321
|
+
end
|
|
2949
3322
|
end
|
|
2950
3323
|
end
|
|
2951
3324
|
|
|
@@ -3515,201 +3888,189 @@ module ScientificNameClean
|
|
|
3515
3888
|
r0 = r1
|
|
3516
3889
|
else
|
|
3517
3890
|
i2 = index
|
|
3518
|
-
if input.index("
|
|
3519
|
-
r3 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3520
|
-
@index +=
|
|
3891
|
+
if input.index("arg.", index) == index
|
|
3892
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 4))
|
|
3893
|
+
@index += 4
|
|
3521
3894
|
else
|
|
3522
|
-
terminal_parse_failure("
|
|
3895
|
+
terminal_parse_failure("arg.")
|
|
3523
3896
|
r3 = nil
|
|
3524
3897
|
end
|
|
3525
3898
|
if r3
|
|
3526
3899
|
r2 = r3
|
|
3527
3900
|
r2.extend(AuthorWord1)
|
|
3528
3901
|
else
|
|
3529
|
-
if input.index("
|
|
3530
|
-
r4 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3531
|
-
@index +=
|
|
3902
|
+
if input.index("et al.\{\?\}", index) == index
|
|
3903
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 9))
|
|
3904
|
+
@index += 9
|
|
3532
3905
|
else
|
|
3533
|
-
terminal_parse_failure("
|
|
3906
|
+
terminal_parse_failure("et al.\{\?\}")
|
|
3534
3907
|
r4 = nil
|
|
3535
3908
|
end
|
|
3536
3909
|
if r4
|
|
3537
3910
|
r2 = r4
|
|
3538
3911
|
r2.extend(AuthorWord1)
|
|
3539
3912
|
else
|
|
3540
|
-
if input.index("et al
|
|
3541
|
-
r5 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3542
|
-
@index +=
|
|
3913
|
+
if input.index("et al.", index) == index
|
|
3914
|
+
r5 = instantiate_node(SyntaxNode,input, index...(index + 6))
|
|
3915
|
+
@index += 6
|
|
3543
3916
|
else
|
|
3544
|
-
terminal_parse_failure("et al
|
|
3917
|
+
terminal_parse_failure("et al.")
|
|
3545
3918
|
r5 = nil
|
|
3546
3919
|
end
|
|
3547
3920
|
if r5
|
|
3548
3921
|
r2 = r5
|
|
3549
3922
|
r2.extend(AuthorWord1)
|
|
3550
3923
|
else
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
@index += 6
|
|
3554
|
-
else
|
|
3555
|
-
terminal_parse_failure("et al.")
|
|
3556
|
-
r6 = nil
|
|
3557
|
-
end
|
|
3558
|
-
if r6
|
|
3559
|
-
r2 = r6
|
|
3560
|
-
r2.extend(AuthorWord1)
|
|
3561
|
-
else
|
|
3562
|
-
self.index = i2
|
|
3563
|
-
r2 = nil
|
|
3564
|
-
end
|
|
3924
|
+
self.index = i2
|
|
3925
|
+
r2 = nil
|
|
3565
3926
|
end
|
|
3566
3927
|
end
|
|
3567
3928
|
end
|
|
3568
3929
|
if r2
|
|
3569
3930
|
r0 = r2
|
|
3570
3931
|
else
|
|
3571
|
-
|
|
3572
|
-
|
|
3932
|
+
i6, s6 = index, []
|
|
3933
|
+
i7 = index
|
|
3573
3934
|
if input.index("Å", index) == index
|
|
3574
|
-
|
|
3935
|
+
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3575
3936
|
@index += 1
|
|
3576
3937
|
else
|
|
3577
3938
|
terminal_parse_failure("Å")
|
|
3578
|
-
|
|
3939
|
+
r8 = nil
|
|
3579
3940
|
end
|
|
3580
|
-
if
|
|
3581
|
-
|
|
3941
|
+
if r8
|
|
3942
|
+
r7 = r8
|
|
3582
3943
|
else
|
|
3583
3944
|
if input.index("Ö", index) == index
|
|
3584
|
-
|
|
3945
|
+
r9 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3585
3946
|
@index += 1
|
|
3586
3947
|
else
|
|
3587
3948
|
terminal_parse_failure("Ö")
|
|
3588
|
-
|
|
3949
|
+
r9 = nil
|
|
3589
3950
|
end
|
|
3590
|
-
if
|
|
3591
|
-
|
|
3951
|
+
if r9
|
|
3952
|
+
r7 = r9
|
|
3592
3953
|
else
|
|
3593
3954
|
if input.index("Á", index) == index
|
|
3594
|
-
|
|
3955
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3595
3956
|
@index += 1
|
|
3596
3957
|
else
|
|
3597
3958
|
terminal_parse_failure("Á")
|
|
3598
|
-
|
|
3959
|
+
r10 = nil
|
|
3599
3960
|
end
|
|
3600
|
-
if
|
|
3601
|
-
|
|
3961
|
+
if r10
|
|
3962
|
+
r7 = r10
|
|
3602
3963
|
else
|
|
3603
3964
|
if input.index("Ø", index) == index
|
|
3604
|
-
|
|
3965
|
+
r11 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3605
3966
|
@index += 1
|
|
3606
3967
|
else
|
|
3607
3968
|
terminal_parse_failure("Ø")
|
|
3608
|
-
|
|
3969
|
+
r11 = nil
|
|
3609
3970
|
end
|
|
3610
|
-
if
|
|
3611
|
-
|
|
3971
|
+
if r11
|
|
3972
|
+
r7 = r11
|
|
3612
3973
|
else
|
|
3613
3974
|
if input.index("Ô", index) == index
|
|
3614
|
-
|
|
3975
|
+
r12 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3615
3976
|
@index += 1
|
|
3616
3977
|
else
|
|
3617
3978
|
terminal_parse_failure("Ô")
|
|
3618
|
-
|
|
3979
|
+
r12 = nil
|
|
3619
3980
|
end
|
|
3620
|
-
if
|
|
3621
|
-
|
|
3981
|
+
if r12
|
|
3982
|
+
r7 = r12
|
|
3622
3983
|
else
|
|
3623
3984
|
if input.index("Š", index) == index
|
|
3624
|
-
|
|
3985
|
+
r13 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3625
3986
|
@index += 1
|
|
3626
3987
|
else
|
|
3627
3988
|
terminal_parse_failure("Š")
|
|
3628
|
-
|
|
3989
|
+
r13 = nil
|
|
3629
3990
|
end
|
|
3630
|
-
if
|
|
3631
|
-
|
|
3991
|
+
if r13
|
|
3992
|
+
r7 = r13
|
|
3632
3993
|
else
|
|
3633
3994
|
if input.index("Ś", index) == index
|
|
3634
|
-
|
|
3995
|
+
r14 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3635
3996
|
@index += 1
|
|
3636
3997
|
else
|
|
3637
3998
|
terminal_parse_failure("Ś")
|
|
3638
|
-
|
|
3999
|
+
r14 = nil
|
|
3639
4000
|
end
|
|
3640
|
-
if
|
|
3641
|
-
|
|
4001
|
+
if r14
|
|
4002
|
+
r7 = r14
|
|
3642
4003
|
else
|
|
3643
4004
|
if input.index("Č", index) == index
|
|
3644
|
-
|
|
4005
|
+
r15 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3645
4006
|
@index += 1
|
|
3646
4007
|
else
|
|
3647
4008
|
terminal_parse_failure("Č")
|
|
3648
|
-
|
|
4009
|
+
r15 = nil
|
|
3649
4010
|
end
|
|
3650
|
-
if
|
|
3651
|
-
|
|
4011
|
+
if r15
|
|
4012
|
+
r7 = r15
|
|
3652
4013
|
else
|
|
3653
4014
|
if input.index("Ķ", index) == index
|
|
3654
|
-
|
|
4015
|
+
r16 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3655
4016
|
@index += 1
|
|
3656
4017
|
else
|
|
3657
4018
|
terminal_parse_failure("Ķ")
|
|
3658
|
-
|
|
4019
|
+
r16 = nil
|
|
3659
4020
|
end
|
|
3660
|
-
if
|
|
3661
|
-
|
|
4021
|
+
if r16
|
|
4022
|
+
r7 = r16
|
|
3662
4023
|
else
|
|
3663
4024
|
if input.index("Ł", index) == index
|
|
3664
|
-
|
|
4025
|
+
r17 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3665
4026
|
@index += 1
|
|
3666
4027
|
else
|
|
3667
4028
|
terminal_parse_failure("Ł")
|
|
3668
|
-
|
|
4029
|
+
r17 = nil
|
|
3669
4030
|
end
|
|
3670
|
-
if
|
|
3671
|
-
|
|
4031
|
+
if r17
|
|
4032
|
+
r7 = r17
|
|
3672
4033
|
else
|
|
3673
4034
|
if input.index("É", index) == index
|
|
3674
|
-
|
|
4035
|
+
r18 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3675
4036
|
@index += 1
|
|
3676
4037
|
else
|
|
3677
4038
|
terminal_parse_failure("É")
|
|
3678
|
-
|
|
4039
|
+
r18 = nil
|
|
3679
4040
|
end
|
|
3680
|
-
if
|
|
3681
|
-
|
|
4041
|
+
if r18
|
|
4042
|
+
r7 = r18
|
|
3682
4043
|
else
|
|
3683
4044
|
if input.index("Ž", index) == index
|
|
3684
|
-
|
|
4045
|
+
r19 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3685
4046
|
@index += 1
|
|
3686
4047
|
else
|
|
3687
4048
|
terminal_parse_failure("Ž")
|
|
3688
|
-
|
|
4049
|
+
r19 = nil
|
|
3689
4050
|
end
|
|
3690
|
-
if
|
|
3691
|
-
|
|
4051
|
+
if r19
|
|
4052
|
+
r7 = r19
|
|
3692
4053
|
else
|
|
3693
4054
|
if input.index(Regexp.new('[A-W]'), index) == index
|
|
3694
|
-
|
|
4055
|
+
r20 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3695
4056
|
@index += 1
|
|
3696
4057
|
else
|
|
3697
|
-
|
|
4058
|
+
r20 = nil
|
|
3698
4059
|
end
|
|
3699
|
-
if
|
|
3700
|
-
|
|
4060
|
+
if r20
|
|
4061
|
+
r7 = r20
|
|
3701
4062
|
else
|
|
3702
4063
|
if input.index(Regexp.new('[Y-Z]'), index) == index
|
|
3703
|
-
|
|
4064
|
+
r21 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3704
4065
|
@index += 1
|
|
3705
4066
|
else
|
|
3706
|
-
|
|
4067
|
+
r21 = nil
|
|
3707
4068
|
end
|
|
3708
|
-
if
|
|
3709
|
-
|
|
4069
|
+
if r21
|
|
4070
|
+
r7 = r21
|
|
3710
4071
|
else
|
|
3711
|
-
self.index =
|
|
3712
|
-
|
|
4072
|
+
self.index = i7
|
|
4073
|
+
r7 = nil
|
|
3713
4074
|
end
|
|
3714
4075
|
end
|
|
3715
4076
|
end
|
|
@@ -3724,82 +4085,82 @@ module ScientificNameClean
|
|
|
3724
4085
|
end
|
|
3725
4086
|
end
|
|
3726
4087
|
end
|
|
3727
|
-
|
|
3728
|
-
if
|
|
3729
|
-
|
|
4088
|
+
s6 << r7
|
|
4089
|
+
if r7
|
|
4090
|
+
s22, i22 = [], index
|
|
3730
4091
|
loop do
|
|
3731
4092
|
if input.index(Regexp.new('[^0-9\\[\\]\\(\\)\\s&,]'), index) == index
|
|
3732
|
-
|
|
4093
|
+
r23 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3733
4094
|
@index += 1
|
|
3734
4095
|
else
|
|
3735
|
-
|
|
4096
|
+
r23 = nil
|
|
3736
4097
|
end
|
|
3737
|
-
if
|
|
3738
|
-
|
|
4098
|
+
if r23
|
|
4099
|
+
s22 << r23
|
|
3739
4100
|
else
|
|
3740
4101
|
break
|
|
3741
4102
|
end
|
|
3742
4103
|
end
|
|
3743
|
-
|
|
3744
|
-
|
|
4104
|
+
r22 = instantiate_node(SyntaxNode,input, i22...index, s22)
|
|
4105
|
+
s6 << r22
|
|
3745
4106
|
end
|
|
3746
|
-
if
|
|
3747
|
-
|
|
3748
|
-
|
|
3749
|
-
|
|
4107
|
+
if s6.last
|
|
4108
|
+
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
|
|
4109
|
+
r6.extend(AuthorWord2)
|
|
4110
|
+
r6.extend(AuthorWord3)
|
|
3750
4111
|
else
|
|
3751
|
-
self.index =
|
|
3752
|
-
|
|
4112
|
+
self.index = i6
|
|
4113
|
+
r6 = nil
|
|
3753
4114
|
end
|
|
3754
|
-
if
|
|
3755
|
-
r0 =
|
|
4115
|
+
if r6
|
|
4116
|
+
r0 = r6
|
|
3756
4117
|
else
|
|
3757
|
-
|
|
4118
|
+
i24, s24 = index, []
|
|
3758
4119
|
if input.index("X", index) == index
|
|
3759
|
-
|
|
4120
|
+
r25 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3760
4121
|
@index += 1
|
|
3761
4122
|
else
|
|
3762
4123
|
terminal_parse_failure("X")
|
|
3763
|
-
|
|
4124
|
+
r25 = nil
|
|
3764
4125
|
end
|
|
3765
|
-
|
|
3766
|
-
if
|
|
3767
|
-
|
|
4126
|
+
s24 << r25
|
|
4127
|
+
if r25
|
|
4128
|
+
s26, i26 = [], index
|
|
3768
4129
|
loop do
|
|
3769
4130
|
if input.index(Regexp.new('[^0-9\\[\\]\\(\\)\\s&,]'), index) == index
|
|
3770
|
-
|
|
4131
|
+
r27 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
3771
4132
|
@index += 1
|
|
3772
4133
|
else
|
|
3773
|
-
|
|
4134
|
+
r27 = nil
|
|
3774
4135
|
end
|
|
3775
|
-
if
|
|
3776
|
-
|
|
4136
|
+
if r27
|
|
4137
|
+
s26 << r27
|
|
3777
4138
|
else
|
|
3778
4139
|
break
|
|
3779
4140
|
end
|
|
3780
4141
|
end
|
|
3781
|
-
if
|
|
3782
|
-
self.index =
|
|
3783
|
-
|
|
4142
|
+
if s26.empty?
|
|
4143
|
+
self.index = i26
|
|
4144
|
+
r26 = nil
|
|
3784
4145
|
else
|
|
3785
|
-
|
|
4146
|
+
r26 = instantiate_node(SyntaxNode,input, i26...index, s26)
|
|
3786
4147
|
end
|
|
3787
|
-
|
|
4148
|
+
s24 << r26
|
|
3788
4149
|
end
|
|
3789
|
-
if
|
|
3790
|
-
|
|
3791
|
-
|
|
3792
|
-
|
|
4150
|
+
if s24.last
|
|
4151
|
+
r24 = instantiate_node(SyntaxNode,input, i24...index, s24)
|
|
4152
|
+
r24.extend(AuthorWord4)
|
|
4153
|
+
r24.extend(AuthorWord5)
|
|
3793
4154
|
else
|
|
3794
|
-
self.index =
|
|
3795
|
-
|
|
4155
|
+
self.index = i24
|
|
4156
|
+
r24 = nil
|
|
3796
4157
|
end
|
|
3797
|
-
if
|
|
3798
|
-
r0 =
|
|
4158
|
+
if r24
|
|
4159
|
+
r0 = r24
|
|
3799
4160
|
else
|
|
3800
|
-
|
|
3801
|
-
if
|
|
3802
|
-
r0 =
|
|
4161
|
+
r28 = _nt_author_prefix_word
|
|
4162
|
+
if r28
|
|
4163
|
+
r0 = r28
|
|
3803
4164
|
else
|
|
3804
4165
|
self.index = i0
|
|
3805
4166
|
r0 = nil
|
|
@@ -3845,98 +4206,153 @@ module ScientificNameClean
|
|
|
3845
4206
|
s0 << r1
|
|
3846
4207
|
if r1
|
|
3847
4208
|
i2 = index
|
|
3848
|
-
if input.index("
|
|
4209
|
+
if input.index("ab", index) == index
|
|
3849
4210
|
r3 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
3850
4211
|
@index += 2
|
|
3851
4212
|
else
|
|
3852
|
-
terminal_parse_failure("
|
|
4213
|
+
terminal_parse_failure("ab")
|
|
3853
4214
|
r3 = nil
|
|
3854
4215
|
end
|
|
3855
4216
|
if r3
|
|
3856
4217
|
r2 = r3
|
|
3857
4218
|
else
|
|
3858
|
-
if input.index("
|
|
4219
|
+
if input.index("bis", index) == index
|
|
3859
4220
|
r4 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
3860
4221
|
@index += 3
|
|
3861
4222
|
else
|
|
3862
|
-
terminal_parse_failure("
|
|
4223
|
+
terminal_parse_failure("bis")
|
|
3863
4224
|
r4 = nil
|
|
3864
4225
|
end
|
|
3865
4226
|
if r4
|
|
3866
4227
|
r2 = r4
|
|
3867
4228
|
else
|
|
3868
|
-
if input.index("
|
|
3869
|
-
r5 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3870
|
-
@index +=
|
|
4229
|
+
if input.index("da", index) == index
|
|
4230
|
+
r5 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4231
|
+
@index += 2
|
|
3871
4232
|
else
|
|
3872
|
-
terminal_parse_failure("
|
|
4233
|
+
terminal_parse_failure("da")
|
|
3873
4234
|
r5 = nil
|
|
3874
4235
|
end
|
|
3875
4236
|
if r5
|
|
3876
4237
|
r2 = r5
|
|
3877
4238
|
else
|
|
3878
|
-
if input.index("
|
|
3879
|
-
r6 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3880
|
-
@index +=
|
|
4239
|
+
if input.index("der", index) == index
|
|
4240
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
4241
|
+
@index += 3
|
|
3881
4242
|
else
|
|
3882
|
-
terminal_parse_failure("
|
|
4243
|
+
terminal_parse_failure("der")
|
|
3883
4244
|
r6 = nil
|
|
3884
4245
|
end
|
|
3885
4246
|
if r6
|
|
3886
4247
|
r2 = r6
|
|
3887
4248
|
else
|
|
3888
|
-
if input.index("
|
|
3889
|
-
r7 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3890
|
-
@index +=
|
|
4249
|
+
if input.index("den", index) == index
|
|
4250
|
+
r7 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
4251
|
+
@index += 3
|
|
3891
4252
|
else
|
|
3892
|
-
terminal_parse_failure("
|
|
4253
|
+
terminal_parse_failure("den")
|
|
3893
4254
|
r7 = nil
|
|
3894
4255
|
end
|
|
3895
4256
|
if r7
|
|
3896
4257
|
r2 = r7
|
|
3897
4258
|
else
|
|
3898
|
-
if input.index("
|
|
3899
|
-
r8 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3900
|
-
@index +=
|
|
4259
|
+
if input.index("della", index) == index
|
|
4260
|
+
r8 = instantiate_node(SyntaxNode,input, index...(index + 5))
|
|
4261
|
+
@index += 5
|
|
3901
4262
|
else
|
|
3902
|
-
terminal_parse_failure("
|
|
4263
|
+
terminal_parse_failure("della")
|
|
3903
4264
|
r8 = nil
|
|
3904
4265
|
end
|
|
3905
4266
|
if r8
|
|
3906
4267
|
r2 = r8
|
|
3907
4268
|
else
|
|
3908
|
-
if input.index("
|
|
3909
|
-
r9 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3910
|
-
@index +=
|
|
4269
|
+
if input.index("dela", index) == index
|
|
4270
|
+
r9 = instantiate_node(SyntaxNode,input, index...(index + 4))
|
|
4271
|
+
@index += 4
|
|
3911
4272
|
else
|
|
3912
|
-
terminal_parse_failure("
|
|
4273
|
+
terminal_parse_failure("dela")
|
|
3913
4274
|
r9 = nil
|
|
3914
4275
|
end
|
|
3915
4276
|
if r9
|
|
3916
4277
|
r2 = r9
|
|
3917
4278
|
else
|
|
3918
|
-
if input.index("
|
|
3919
|
-
r10 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3920
|
-
@index +=
|
|
4279
|
+
if input.index("de", index) == index
|
|
4280
|
+
r10 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4281
|
+
@index += 2
|
|
3921
4282
|
else
|
|
3922
|
-
terminal_parse_failure("
|
|
4283
|
+
terminal_parse_failure("de")
|
|
3923
4284
|
r10 = nil
|
|
3924
4285
|
end
|
|
3925
4286
|
if r10
|
|
3926
4287
|
r2 = r10
|
|
3927
4288
|
else
|
|
3928
|
-
if input.index("
|
|
3929
|
-
r11 = instantiate_node(SyntaxNode,input, index...(index +
|
|
3930
|
-
@index +=
|
|
4289
|
+
if input.index("di", index) == index
|
|
4290
|
+
r11 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4291
|
+
@index += 2
|
|
3931
4292
|
else
|
|
3932
|
-
terminal_parse_failure("
|
|
4293
|
+
terminal_parse_failure("di")
|
|
3933
4294
|
r11 = nil
|
|
3934
4295
|
end
|
|
3935
4296
|
if r11
|
|
3936
4297
|
r2 = r11
|
|
3937
4298
|
else
|
|
3938
|
-
|
|
3939
|
-
|
|
4299
|
+
if input.index("du", index) == index
|
|
4300
|
+
r12 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4301
|
+
@index += 2
|
|
4302
|
+
else
|
|
4303
|
+
terminal_parse_failure("du")
|
|
4304
|
+
r12 = nil
|
|
4305
|
+
end
|
|
4306
|
+
if r12
|
|
4307
|
+
r2 = r12
|
|
4308
|
+
else
|
|
4309
|
+
if input.index("la", index) == index
|
|
4310
|
+
r13 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4311
|
+
@index += 2
|
|
4312
|
+
else
|
|
4313
|
+
terminal_parse_failure("la")
|
|
4314
|
+
r13 = nil
|
|
4315
|
+
end
|
|
4316
|
+
if r13
|
|
4317
|
+
r2 = r13
|
|
4318
|
+
else
|
|
4319
|
+
if input.index("ter", index) == index
|
|
4320
|
+
r14 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
4321
|
+
@index += 3
|
|
4322
|
+
else
|
|
4323
|
+
terminal_parse_failure("ter")
|
|
4324
|
+
r14 = nil
|
|
4325
|
+
end
|
|
4326
|
+
if r14
|
|
4327
|
+
r2 = r14
|
|
4328
|
+
else
|
|
4329
|
+
if input.index("van", index) == index
|
|
4330
|
+
r15 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
4331
|
+
@index += 3
|
|
4332
|
+
else
|
|
4333
|
+
terminal_parse_failure("van")
|
|
4334
|
+
r15 = nil
|
|
4335
|
+
end
|
|
4336
|
+
if r15
|
|
4337
|
+
r2 = r15
|
|
4338
|
+
else
|
|
4339
|
+
if input.index("von", index) == index
|
|
4340
|
+
r16 = instantiate_node(SyntaxNode,input, index...(index + 3))
|
|
4341
|
+
@index += 3
|
|
4342
|
+
else
|
|
4343
|
+
terminal_parse_failure("von")
|
|
4344
|
+
r16 = nil
|
|
4345
|
+
end
|
|
4346
|
+
if r16
|
|
4347
|
+
r2 = r16
|
|
4348
|
+
else
|
|
4349
|
+
self.index = i2
|
|
4350
|
+
r2 = nil
|
|
4351
|
+
end
|
|
4352
|
+
end
|
|
4353
|
+
end
|
|
4354
|
+
end
|
|
4355
|
+
end
|
|
3940
4356
|
end
|
|
3941
4357
|
end
|
|
3942
4358
|
end
|
|
@@ -3948,15 +4364,15 @@ module ScientificNameClean
|
|
|
3948
4364
|
end
|
|
3949
4365
|
s0 << r2
|
|
3950
4366
|
if r2
|
|
3951
|
-
|
|
3952
|
-
|
|
3953
|
-
if
|
|
3954
|
-
self.index =
|
|
3955
|
-
|
|
4367
|
+
i17 = index
|
|
4368
|
+
r18 = _nt_space_hard
|
|
4369
|
+
if r18
|
|
4370
|
+
self.index = i17
|
|
4371
|
+
r17 = instantiate_node(SyntaxNode,input, index...index)
|
|
3956
4372
|
else
|
|
3957
|
-
|
|
4373
|
+
r17 = nil
|
|
3958
4374
|
end
|
|
3959
|
-
s0 <<
|
|
4375
|
+
s0 << r17
|
|
3960
4376
|
end
|
|
3961
4377
|
end
|
|
3962
4378
|
if s0.last
|
|
@@ -4540,6 +4956,79 @@ module ScientificNameClean
|
|
|
4540
4956
|
return r0
|
|
4541
4957
|
end
|
|
4542
4958
|
|
|
4959
|
+
module SpeciesPrefix0
|
|
4960
|
+
end
|
|
4961
|
+
|
|
4962
|
+
def _nt_species_prefix
|
|
4963
|
+
start_index = index
|
|
4964
|
+
if node_cache[:species_prefix].has_key?(index)
|
|
4965
|
+
cached = node_cache[:species_prefix][index]
|
|
4966
|
+
@index = cached.interval.end if cached
|
|
4967
|
+
return cached
|
|
4968
|
+
end
|
|
4969
|
+
|
|
4970
|
+
i0, s0 = index, []
|
|
4971
|
+
i1 = index
|
|
4972
|
+
if input.index("aff.", index) == index
|
|
4973
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 4))
|
|
4974
|
+
@index += 4
|
|
4975
|
+
else
|
|
4976
|
+
terminal_parse_failure("aff.")
|
|
4977
|
+
r2 = nil
|
|
4978
|
+
end
|
|
4979
|
+
if r2
|
|
4980
|
+
r1 = r2
|
|
4981
|
+
else
|
|
4982
|
+
if input.index("corrig.", index) == index
|
|
4983
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 7))
|
|
4984
|
+
@index += 7
|
|
4985
|
+
else
|
|
4986
|
+
terminal_parse_failure("corrig.")
|
|
4987
|
+
r3 = nil
|
|
4988
|
+
end
|
|
4989
|
+
if r3
|
|
4990
|
+
r1 = r3
|
|
4991
|
+
else
|
|
4992
|
+
if input.index("?", index) == index
|
|
4993
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
4994
|
+
@index += 1
|
|
4995
|
+
else
|
|
4996
|
+
terminal_parse_failure("?")
|
|
4997
|
+
r4 = nil
|
|
4998
|
+
end
|
|
4999
|
+
if r4
|
|
5000
|
+
r1 = r4
|
|
5001
|
+
else
|
|
5002
|
+
self.index = i1
|
|
5003
|
+
r1 = nil
|
|
5004
|
+
end
|
|
5005
|
+
end
|
|
5006
|
+
end
|
|
5007
|
+
s0 << r1
|
|
5008
|
+
if r1
|
|
5009
|
+
i5 = index
|
|
5010
|
+
r6 = _nt_space_hard
|
|
5011
|
+
if r6
|
|
5012
|
+
self.index = i5
|
|
5013
|
+
r5 = instantiate_node(SyntaxNode,input, index...index)
|
|
5014
|
+
else
|
|
5015
|
+
r5 = nil
|
|
5016
|
+
end
|
|
5017
|
+
s0 << r5
|
|
5018
|
+
end
|
|
5019
|
+
if s0.last
|
|
5020
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
5021
|
+
r0.extend(SpeciesPrefix0)
|
|
5022
|
+
else
|
|
5023
|
+
self.index = i0
|
|
5024
|
+
r0 = nil
|
|
5025
|
+
end
|
|
5026
|
+
|
|
5027
|
+
node_cache[:species_prefix][start_index] = r0
|
|
5028
|
+
|
|
5029
|
+
return r0
|
|
5030
|
+
end
|
|
5031
|
+
|
|
4543
5032
|
module SpeciesWord0
|
|
4544
5033
|
def a
|
|
4545
5034
|
elements[0]
|
|
@@ -4673,7 +5162,7 @@ module ScientificNameClean
|
|
|
4673
5162
|
|
|
4674
5163
|
i0 = index
|
|
4675
5164
|
i1, s1 = index, []
|
|
4676
|
-
if input.index(Regexp.new('[a-
|
|
5165
|
+
if input.index(Regexp.new('[a-zëüäöïéåóç]'), index) == index
|
|
4677
5166
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
4678
5167
|
@index += 1
|
|
4679
5168
|
else
|
|
@@ -4839,7 +5328,7 @@ module ScientificNameClean
|
|
|
4839
5328
|
|
|
4840
5329
|
s0, i0 = [], index
|
|
4841
5330
|
loop do
|
|
4842
|
-
if input.index(Regexp.new('[a-z
|
|
5331
|
+
if input.index(Regexp.new('[a-z\\-ëüäöïéåóç]'), index) == index
|
|
4843
5332
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
4844
5333
|
@index += 1
|
|
4845
5334
|
else
|