biodiversity 3.4.2 → 3.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +3 -0
- data/lib/biodiversity/parser/scientific_name_canonical.rb +24 -11
- data/lib/biodiversity/parser/scientific_name_clean.rb +1528 -880
- data/lib/biodiversity/parser/scientific_name_clean.treetop +35 -3
- data/lib/biodiversity/parser/scientific_name_dirty.rb +93 -64
- data/lib/biodiversity/version.rb +1 -1
- data/spec/files/test_data.txt +4 -0
- metadata +2 -2
|
@@ -800,6 +800,22 @@ grammar ScientificNameClean
|
|
|
800
800
|
end
|
|
801
801
|
|
|
802
802
|
rule authorship
|
|
803
|
+
a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
|
|
804
|
+
def value
|
|
805
|
+
a.value + " " + b.value + " " + c.value
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
def pos
|
|
809
|
+
a.pos.merge(b.pos).merge(c.pos)
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
def details
|
|
813
|
+
val = {:authorship => text_value.strip, :combinationAuthorTeam => b.details[:basionymAuthorTeam], :basionymAuthorTeam => a.details[:basionymAuthorTeam]}
|
|
814
|
+
val[:combinationAuthorTeam].merge!(c.details)
|
|
815
|
+
val
|
|
816
|
+
end
|
|
817
|
+
}
|
|
818
|
+
/
|
|
803
819
|
a:basionym_authorship_with_parenthesis space b:simple_authorship ","? space c:ex_authorship {
|
|
804
820
|
def value
|
|
805
821
|
a.value + " " + b.value + " " + c.value
|
|
@@ -919,6 +935,21 @@ grammar ScientificNameClean
|
|
|
919
935
|
end
|
|
920
936
|
|
|
921
937
|
rule ex_authorship
|
|
938
|
+
ex_sep space b:simple_authorship space ex_sep space c:simple_authorship {
|
|
939
|
+
def value
|
|
940
|
+
" ex " + b.value + " ex " + c.value
|
|
941
|
+
end
|
|
942
|
+
|
|
943
|
+
def pos
|
|
944
|
+
b.pos
|
|
945
|
+
end
|
|
946
|
+
|
|
947
|
+
def details
|
|
948
|
+
val = {:exAuthorTeam => {:authorTeam => b.text_value.strip}.merge(b.details[:basionymAuthorTeam])}
|
|
949
|
+
val
|
|
950
|
+
end
|
|
951
|
+
}
|
|
952
|
+
/
|
|
922
953
|
ex_sep space b:simple_authorship {
|
|
923
954
|
def value
|
|
924
955
|
" ex " + b.value
|
|
@@ -1039,10 +1070,11 @@ grammar ScientificNameClean
|
|
|
1039
1070
|
end
|
|
1040
1071
|
|
|
1041
1072
|
rule author_separator
|
|
1042
|
-
("&"/"&"/","/"and"/"et") {
|
|
1073
|
+
("&"/"&"/","/"apud"/"and"/"et") {
|
|
1043
1074
|
def apply(a,b)
|
|
1044
1075
|
sep = text_value.strip
|
|
1045
1076
|
sep = " &" if ["&", "&","and","et"].include? sep
|
|
1077
|
+
sep = " apud" if sep == "apud"
|
|
1046
1078
|
a.value + sep + " " + b.value
|
|
1047
1079
|
end
|
|
1048
1080
|
|
|
@@ -1170,7 +1202,7 @@ grammar ScientificNameClean
|
|
|
1170
1202
|
end
|
|
1171
1203
|
|
|
1172
1204
|
rule author_prefix_word
|
|
1173
|
-
space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"la"/"ter"/"van"/"von") &space_hard {
|
|
1205
|
+
space ("ab"/"af"/"bis"/"da"/"der"/"des"/"den"/"della"/"dela"/"de"/"di"/"du"/"do"/"la"/"ter"/"van"/"von") &space_hard {
|
|
1174
1206
|
def value
|
|
1175
1207
|
text_value
|
|
1176
1208
|
end
|
|
@@ -1183,7 +1215,7 @@ grammar ScientificNameClean
|
|
|
1183
1215
|
end
|
|
1184
1216
|
|
|
1185
1217
|
rule author_postfix_word
|
|
1186
|
-
("f."/"filius") {
|
|
1218
|
+
("f."/"fil."/"filius") {
|
|
1187
1219
|
def value
|
|
1188
1220
|
text_value.strip
|
|
1189
1221
|
end
|
|
@@ -56,7 +56,7 @@ module ScientificNameDirty
|
|
|
56
56
|
if node_cache[:root].has_key?(index)
|
|
57
57
|
cached = node_cache[:root][index]
|
|
58
58
|
if cached
|
|
59
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
59
|
+
node_cache[:root][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
60
60
|
@index = cached.interval.end
|
|
61
61
|
end
|
|
62
62
|
return cached
|
|
@@ -120,7 +120,7 @@ module ScientificNameDirty
|
|
|
120
120
|
if node_cache[:scientific_name_5].has_key?(index)
|
|
121
121
|
cached = node_cache[:scientific_name_5][index]
|
|
122
122
|
if cached
|
|
123
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
123
|
+
node_cache[:scientific_name_5][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
124
124
|
@index = cached.interval.end
|
|
125
125
|
end
|
|
126
126
|
return cached
|
|
@@ -143,10 +143,12 @@ module ScientificNameDirty
|
|
|
143
143
|
r1 = nil
|
|
144
144
|
end
|
|
145
145
|
if r1
|
|
146
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
146
147
|
r0 = r1
|
|
147
148
|
else
|
|
148
149
|
r4 = super
|
|
149
150
|
if r4
|
|
151
|
+
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
|
150
152
|
r0 = r4
|
|
151
153
|
else
|
|
152
154
|
@index = i0
|
|
@@ -236,7 +238,7 @@ module ScientificNameDirty
|
|
|
236
238
|
if node_cache[:infraspecies].has_key?(index)
|
|
237
239
|
cached = node_cache[:infraspecies][index]
|
|
238
240
|
if cached
|
|
239
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
241
|
+
node_cache[:infraspecies][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
240
242
|
@index = cached.interval.end
|
|
241
243
|
end
|
|
242
244
|
return cached
|
|
@@ -263,6 +265,7 @@ module ScientificNameDirty
|
|
|
263
265
|
r1 = nil
|
|
264
266
|
end
|
|
265
267
|
if r1
|
|
268
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
266
269
|
r0 = r1
|
|
267
270
|
else
|
|
268
271
|
i5, s5 = index, []
|
|
@@ -293,10 +296,12 @@ module ScientificNameDirty
|
|
|
293
296
|
r5 = nil
|
|
294
297
|
end
|
|
295
298
|
if r5
|
|
299
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
|
296
300
|
r0 = r5
|
|
297
301
|
else
|
|
298
302
|
r11 = super
|
|
299
303
|
if r11
|
|
304
|
+
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
|
300
305
|
r0 = r11
|
|
301
306
|
else
|
|
302
307
|
@index = i0
|
|
@@ -347,7 +352,7 @@ module ScientificNameDirty
|
|
|
347
352
|
if node_cache[:species].has_key?(index)
|
|
348
353
|
cached = node_cache[:species][index]
|
|
349
354
|
if cached
|
|
350
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
355
|
+
node_cache[:species][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
351
356
|
@index = cached.interval.end
|
|
352
357
|
end
|
|
353
358
|
return cached
|
|
@@ -374,10 +379,12 @@ module ScientificNameDirty
|
|
|
374
379
|
r1 = nil
|
|
375
380
|
end
|
|
376
381
|
if r1
|
|
382
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
377
383
|
r0 = r1
|
|
378
384
|
else
|
|
379
385
|
r5 = super
|
|
380
386
|
if r5
|
|
387
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
|
381
388
|
r0 = r5
|
|
382
389
|
else
|
|
383
390
|
@index = i0
|
|
@@ -418,17 +425,18 @@ module ScientificNameDirty
|
|
|
418
425
|
if node_cache[:latin_word].has_key?(index)
|
|
419
426
|
cached = node_cache[:latin_word][index]
|
|
420
427
|
if cached
|
|
421
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
428
|
+
node_cache[:latin_word][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
422
429
|
@index = cached.interval.end
|
|
423
430
|
end
|
|
424
431
|
return cached
|
|
425
432
|
end
|
|
426
433
|
|
|
427
434
|
i0, s0 = index, []
|
|
428
|
-
if has_terminal?('\
|
|
435
|
+
if has_terminal?(@regexps[gr = '\A[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]'] ||= Regexp.new(gr), :regexp, index)
|
|
429
436
|
r1 = true
|
|
430
437
|
@index += 1
|
|
431
438
|
else
|
|
439
|
+
terminal_parse_failure('[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]')
|
|
432
440
|
r1 = nil
|
|
433
441
|
end
|
|
434
442
|
s0 << r1
|
|
@@ -469,7 +477,7 @@ module ScientificNameDirty
|
|
|
469
477
|
if node_cache[:valid_name_letters].has_key?(index)
|
|
470
478
|
cached = node_cache[:valid_name_letters][index]
|
|
471
479
|
if cached
|
|
472
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
480
|
+
node_cache[:valid_name_letters][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
473
481
|
@index = cached.interval.end
|
|
474
482
|
end
|
|
475
483
|
return cached
|
|
@@ -477,10 +485,11 @@ module ScientificNameDirty
|
|
|
477
485
|
|
|
478
486
|
s0, i0 = [], index
|
|
479
487
|
loop do
|
|
480
|
-
if has_terminal?('\
|
|
488
|
+
if has_terminal?(@regexps[gr = '\A[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]'] ||= Regexp.new(gr), :regexp, index)
|
|
481
489
|
r1 = true
|
|
482
490
|
@index += 1
|
|
483
491
|
else
|
|
492
|
+
terminal_parse_failure('[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]')
|
|
484
493
|
r1 = nil
|
|
485
494
|
end
|
|
486
495
|
if r1
|
|
@@ -495,6 +504,7 @@ module ScientificNameDirty
|
|
|
495
504
|
else
|
|
496
505
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
497
506
|
r0.extend(ValidNameLetters0)
|
|
507
|
+
r0.extend(ValidNameLetters0)
|
|
498
508
|
end
|
|
499
509
|
|
|
500
510
|
node_cache[:valid_name_letters][start_index] = r0
|
|
@@ -514,7 +524,7 @@ module ScientificNameDirty
|
|
|
514
524
|
if node_cache[:right_paren].has_key?(index)
|
|
515
525
|
cached = node_cache[:right_paren][index]
|
|
516
526
|
if cached
|
|
517
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
527
|
+
node_cache[:right_paren][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
518
528
|
@index = cached.interval.end
|
|
519
529
|
end
|
|
520
530
|
return cached
|
|
@@ -522,11 +532,11 @@ module ScientificNameDirty
|
|
|
522
532
|
|
|
523
533
|
i0 = index
|
|
524
534
|
i1, s1 = index, []
|
|
525
|
-
if has_terminal?(")", false, index)
|
|
526
|
-
r2 =
|
|
527
|
-
@index +=
|
|
535
|
+
if (match_len = has_terminal?(")", false, index))
|
|
536
|
+
r2 = true
|
|
537
|
+
@index += match_len
|
|
528
538
|
else
|
|
529
|
-
terminal_parse_failure(")")
|
|
539
|
+
terminal_parse_failure('")"')
|
|
530
540
|
r2 = nil
|
|
531
541
|
end
|
|
532
542
|
s1 << r2
|
|
@@ -534,11 +544,11 @@ module ScientificNameDirty
|
|
|
534
544
|
r3 = _nt_space
|
|
535
545
|
s1 << r3
|
|
536
546
|
if r3
|
|
537
|
-
if has_terminal?(")", false, index)
|
|
538
|
-
r4 =
|
|
539
|
-
@index +=
|
|
547
|
+
if (match_len = has_terminal?(")", false, index))
|
|
548
|
+
r4 = true
|
|
549
|
+
@index += match_len
|
|
540
550
|
else
|
|
541
|
-
terminal_parse_failure(")")
|
|
551
|
+
terminal_parse_failure('")"')
|
|
542
552
|
r4 = nil
|
|
543
553
|
end
|
|
544
554
|
s1 << r4
|
|
@@ -552,10 +562,12 @@ module ScientificNameDirty
|
|
|
552
562
|
r1 = nil
|
|
553
563
|
end
|
|
554
564
|
if r1
|
|
565
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
555
566
|
r0 = r1
|
|
556
567
|
else
|
|
557
568
|
r5 = super
|
|
558
569
|
if r5
|
|
570
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
|
559
571
|
r0 = r5
|
|
560
572
|
else
|
|
561
573
|
@index = i0
|
|
@@ -580,7 +592,7 @@ module ScientificNameDirty
|
|
|
580
592
|
if node_cache[:left_paren].has_key?(index)
|
|
581
593
|
cached = node_cache[:left_paren][index]
|
|
582
594
|
if cached
|
|
583
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
595
|
+
node_cache[:left_paren][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
584
596
|
@index = cached.interval.end
|
|
585
597
|
end
|
|
586
598
|
return cached
|
|
@@ -588,11 +600,11 @@ module ScientificNameDirty
|
|
|
588
600
|
|
|
589
601
|
i0 = index
|
|
590
602
|
i1, s1 = index, []
|
|
591
|
-
if has_terminal?("(", false, index)
|
|
592
|
-
r2 =
|
|
593
|
-
@index +=
|
|
603
|
+
if (match_len = has_terminal?("(", false, index))
|
|
604
|
+
r2 = true
|
|
605
|
+
@index += match_len
|
|
594
606
|
else
|
|
595
|
-
terminal_parse_failure("(")
|
|
607
|
+
terminal_parse_failure('"("')
|
|
596
608
|
r2 = nil
|
|
597
609
|
end
|
|
598
610
|
s1 << r2
|
|
@@ -600,11 +612,11 @@ module ScientificNameDirty
|
|
|
600
612
|
r3 = _nt_space
|
|
601
613
|
s1 << r3
|
|
602
614
|
if r3
|
|
603
|
-
if has_terminal?("(", false, index)
|
|
604
|
-
r4 =
|
|
605
|
-
@index +=
|
|
615
|
+
if (match_len = has_terminal?("(", false, index))
|
|
616
|
+
r4 = true
|
|
617
|
+
@index += match_len
|
|
606
618
|
else
|
|
607
|
-
terminal_parse_failure("(")
|
|
619
|
+
terminal_parse_failure('"("')
|
|
608
620
|
r4 = nil
|
|
609
621
|
end
|
|
610
622
|
s1 << r4
|
|
@@ -618,10 +630,12 @@ module ScientificNameDirty
|
|
|
618
630
|
r1 = nil
|
|
619
631
|
end
|
|
620
632
|
if r1
|
|
633
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
621
634
|
r0 = r1
|
|
622
635
|
else
|
|
623
636
|
r5 = super
|
|
624
637
|
if r5
|
|
638
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
|
625
639
|
r0 = r5
|
|
626
640
|
else
|
|
627
641
|
@index = i0
|
|
@@ -695,7 +709,7 @@ module ScientificNameDirty
|
|
|
695
709
|
if node_cache[:year].has_key?(index)
|
|
696
710
|
cached = node_cache[:year][index]
|
|
697
711
|
if cached
|
|
698
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
712
|
+
node_cache[:year][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
699
713
|
@index = cached.interval.end
|
|
700
714
|
end
|
|
701
715
|
return cached
|
|
@@ -722,6 +736,7 @@ module ScientificNameDirty
|
|
|
722
736
|
r1 = nil
|
|
723
737
|
end
|
|
724
738
|
if r1
|
|
739
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
725
740
|
r0 = r1
|
|
726
741
|
else
|
|
727
742
|
i5, s5 = index, []
|
|
@@ -744,22 +759,27 @@ module ScientificNameDirty
|
|
|
744
759
|
r5 = nil
|
|
745
760
|
end
|
|
746
761
|
if r5
|
|
762
|
+
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
|
747
763
|
r0 = r5
|
|
748
764
|
else
|
|
749
765
|
r9 = _nt_year_number_with_punctuation
|
|
750
766
|
if r9
|
|
767
|
+
r9 = SyntaxNode.new(input, (index-1)...index) if r9 == true
|
|
751
768
|
r0 = r9
|
|
752
769
|
else
|
|
753
770
|
r10 = _nt_approximate_year
|
|
754
771
|
if r10
|
|
772
|
+
r10 = SyntaxNode.new(input, (index-1)...index) if r10 == true
|
|
755
773
|
r0 = r10
|
|
756
774
|
else
|
|
757
775
|
r11 = _nt_double_year
|
|
758
776
|
if r11
|
|
777
|
+
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
|
759
778
|
r0 = r11
|
|
760
779
|
else
|
|
761
780
|
r12 = super
|
|
762
781
|
if r12
|
|
782
|
+
r12 = SyntaxNode.new(input, (index-1)...index) if r12 == true
|
|
763
783
|
r0 = r12
|
|
764
784
|
else
|
|
765
785
|
@index = i0
|
|
@@ -810,18 +830,18 @@ module ScientificNameDirty
|
|
|
810
830
|
if node_cache[:approximate_year].has_key?(index)
|
|
811
831
|
cached = node_cache[:approximate_year][index]
|
|
812
832
|
if cached
|
|
813
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
833
|
+
node_cache[:approximate_year][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
814
834
|
@index = cached.interval.end
|
|
815
835
|
end
|
|
816
836
|
return cached
|
|
817
837
|
end
|
|
818
838
|
|
|
819
839
|
i0, s0 = index, []
|
|
820
|
-
if has_terminal?("[", false, index)
|
|
821
|
-
r1 =
|
|
822
|
-
@index +=
|
|
840
|
+
if (match_len = has_terminal?("[", false, index))
|
|
841
|
+
r1 = true
|
|
842
|
+
@index += match_len
|
|
823
843
|
else
|
|
824
|
-
terminal_parse_failure("[")
|
|
844
|
+
terminal_parse_failure('"["')
|
|
825
845
|
r1 = nil
|
|
826
846
|
end
|
|
827
847
|
s0 << r1
|
|
@@ -837,11 +857,11 @@ module ScientificNameDirty
|
|
|
837
857
|
if r4
|
|
838
858
|
s5, i5 = [], index
|
|
839
859
|
loop do
|
|
840
|
-
if has_terminal?("]", false, index)
|
|
841
|
-
r6 =
|
|
842
|
-
@index +=
|
|
860
|
+
if (match_len = has_terminal?("]", false, index))
|
|
861
|
+
r6 = true
|
|
862
|
+
@index += match_len
|
|
843
863
|
else
|
|
844
|
-
terminal_parse_failure("]")
|
|
864
|
+
terminal_parse_failure('"]"')
|
|
845
865
|
r6 = nil
|
|
846
866
|
end
|
|
847
867
|
if r6
|
|
@@ -901,7 +921,7 @@ module ScientificNameDirty
|
|
|
901
921
|
if node_cache[:double_year].has_key?(index)
|
|
902
922
|
cached = node_cache[:double_year][index]
|
|
903
923
|
if cached
|
|
904
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
924
|
+
node_cache[:double_year][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
905
925
|
@index = cached.interval.end
|
|
906
926
|
end
|
|
907
927
|
return cached
|
|
@@ -911,21 +931,22 @@ module ScientificNameDirty
|
|
|
911
931
|
r1 = _nt_year_number
|
|
912
932
|
s0 << r1
|
|
913
933
|
if r1
|
|
914
|
-
if has_terminal?("-", false, index)
|
|
915
|
-
r2 =
|
|
916
|
-
@index +=
|
|
934
|
+
if (match_len = has_terminal?("-", false, index))
|
|
935
|
+
r2 = true
|
|
936
|
+
@index += match_len
|
|
917
937
|
else
|
|
918
|
-
terminal_parse_failure("-")
|
|
938
|
+
terminal_parse_failure('"-"')
|
|
919
939
|
r2 = nil
|
|
920
940
|
end
|
|
921
941
|
s0 << r2
|
|
922
942
|
if r2
|
|
923
943
|
s3, i3 = [], index
|
|
924
944
|
loop do
|
|
925
|
-
if has_terminal?('\
|
|
945
|
+
if has_terminal?(@regexps[gr = '\A[0-9]'] ||= Regexp.new(gr), :regexp, index)
|
|
926
946
|
r4 = true
|
|
927
947
|
@index += 1
|
|
928
948
|
else
|
|
949
|
+
terminal_parse_failure('[0-9]')
|
|
929
950
|
r4 = nil
|
|
930
951
|
end
|
|
931
952
|
if r4
|
|
@@ -942,10 +963,11 @@ module ScientificNameDirty
|
|
|
942
963
|
end
|
|
943
964
|
s0 << r3
|
|
944
965
|
if r3
|
|
945
|
-
if has_terminal?('\
|
|
966
|
+
if has_terminal?(@regexps[gr = '\A[A-Za-z]'] ||= Regexp.new(gr), :regexp, index)
|
|
946
967
|
r6 = true
|
|
947
968
|
@index += 1
|
|
948
969
|
else
|
|
970
|
+
terminal_parse_failure('[A-Za-z]')
|
|
949
971
|
r6 = nil
|
|
950
972
|
end
|
|
951
973
|
if r6
|
|
@@ -955,10 +977,11 @@ module ScientificNameDirty
|
|
|
955
977
|
end
|
|
956
978
|
s0 << r5
|
|
957
979
|
if r5
|
|
958
|
-
if has_terminal?('\
|
|
980
|
+
if has_terminal?(@regexps[gr = '\A[\\?]'] ||= Regexp.new(gr), :regexp, index)
|
|
959
981
|
r8 = true
|
|
960
982
|
@index += 1
|
|
961
983
|
else
|
|
984
|
+
terminal_parse_failure('[\\?]')
|
|
962
985
|
r8 = nil
|
|
963
986
|
end
|
|
964
987
|
if r8
|
|
@@ -1011,7 +1034,7 @@ module ScientificNameDirty
|
|
|
1011
1034
|
if node_cache[:year_number_with_punctuation].has_key?(index)
|
|
1012
1035
|
cached = node_cache[:year_number_with_punctuation][index]
|
|
1013
1036
|
if cached
|
|
1014
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1037
|
+
node_cache[:year_number_with_punctuation][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1015
1038
|
@index = cached.interval.end
|
|
1016
1039
|
end
|
|
1017
1040
|
return cached
|
|
@@ -1021,11 +1044,11 @@ module ScientificNameDirty
|
|
|
1021
1044
|
r1 = _nt_year_number
|
|
1022
1045
|
s0 << r1
|
|
1023
1046
|
if r1
|
|
1024
|
-
if has_terminal?(".", false, index)
|
|
1025
|
-
r2 =
|
|
1026
|
-
@index +=
|
|
1047
|
+
if (match_len = has_terminal?(".", false, index))
|
|
1048
|
+
r2 = true
|
|
1049
|
+
@index += match_len
|
|
1027
1050
|
else
|
|
1028
|
-
terminal_parse_failure(".")
|
|
1051
|
+
terminal_parse_failure('"."')
|
|
1029
1052
|
r2 = nil
|
|
1030
1053
|
end
|
|
1031
1054
|
s0 << r2
|
|
@@ -1061,18 +1084,18 @@ module ScientificNameDirty
|
|
|
1061
1084
|
if node_cache[:page_number].has_key?(index)
|
|
1062
1085
|
cached = node_cache[:page_number][index]
|
|
1063
1086
|
if cached
|
|
1064
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1087
|
+
node_cache[:page_number][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1065
1088
|
@index = cached.interval.end
|
|
1066
1089
|
end
|
|
1067
1090
|
return cached
|
|
1068
1091
|
end
|
|
1069
1092
|
|
|
1070
1093
|
i0, s0 = index, []
|
|
1071
|
-
if has_terminal?(":", false, index)
|
|
1072
|
-
r1 =
|
|
1073
|
-
@index +=
|
|
1094
|
+
if (match_len = has_terminal?(":", false, index))
|
|
1095
|
+
r1 = true
|
|
1096
|
+
@index += match_len
|
|
1074
1097
|
else
|
|
1075
|
-
terminal_parse_failure(":")
|
|
1098
|
+
terminal_parse_failure('":"')
|
|
1076
1099
|
r1 = nil
|
|
1077
1100
|
end
|
|
1078
1101
|
s0 << r1
|
|
@@ -1082,10 +1105,11 @@ module ScientificNameDirty
|
|
|
1082
1105
|
if r2
|
|
1083
1106
|
s3, i3 = [], index
|
|
1084
1107
|
loop do
|
|
1085
|
-
if has_terminal?('\
|
|
1108
|
+
if has_terminal?(@regexps[gr = '\A[\\d]'] ||= Regexp.new(gr), :regexp, index)
|
|
1086
1109
|
r4 = true
|
|
1087
1110
|
@index += 1
|
|
1088
1111
|
else
|
|
1112
|
+
terminal_parse_failure('[\\d]')
|
|
1089
1113
|
r4 = nil
|
|
1090
1114
|
end
|
|
1091
1115
|
if r4
|
|
@@ -1122,17 +1146,17 @@ module ScientificNameDirty
|
|
|
1122
1146
|
if node_cache[:string_authorship_inconsistencies].has_key?(index)
|
|
1123
1147
|
cached = node_cache[:string_authorship_inconsistencies][index]
|
|
1124
1148
|
if cached
|
|
1125
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1149
|
+
node_cache[:string_authorship_inconsistencies][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1126
1150
|
@index = cached.interval.end
|
|
1127
1151
|
end
|
|
1128
1152
|
return cached
|
|
1129
1153
|
end
|
|
1130
1154
|
|
|
1131
|
-
if has_terminal?("corrig.", false, index)
|
|
1132
|
-
r0 = instantiate_node(SyntaxNode,input, index...(index +
|
|
1133
|
-
@index +=
|
|
1155
|
+
if (match_len = has_terminal?("corrig.", false, index))
|
|
1156
|
+
r0 = instantiate_node(SyntaxNode,input, index...(index + match_len))
|
|
1157
|
+
@index += match_len
|
|
1134
1158
|
else
|
|
1135
|
-
terminal_parse_failure("corrig.")
|
|
1159
|
+
terminal_parse_failure('"corrig."')
|
|
1136
1160
|
r0 = nil
|
|
1137
1161
|
end
|
|
1138
1162
|
|
|
@@ -1164,7 +1188,7 @@ module ScientificNameDirty
|
|
|
1164
1188
|
if node_cache[:garbage].has_key?(index)
|
|
1165
1189
|
cached = node_cache[:garbage][index]
|
|
1166
1190
|
if cached
|
|
1167
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1191
|
+
node_cache[:garbage][index] = cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1168
1192
|
@index = cached.interval.end
|
|
1169
1193
|
end
|
|
1170
1194
|
return cached
|
|
@@ -1175,10 +1199,11 @@ module ScientificNameDirty
|
|
|
1175
1199
|
r2 = _nt_space
|
|
1176
1200
|
s1 << r2
|
|
1177
1201
|
if r2
|
|
1178
|
-
if has_terminal?('\
|
|
1202
|
+
if has_terminal?(@regexps[gr = '\A["\',]'] ||= Regexp.new(gr), :regexp, index)
|
|
1179
1203
|
r3 = true
|
|
1180
1204
|
@index += 1
|
|
1181
1205
|
else
|
|
1206
|
+
terminal_parse_failure('["\',]')
|
|
1182
1207
|
r3 = nil
|
|
1183
1208
|
end
|
|
1184
1209
|
s1 << r3
|
|
@@ -1188,10 +1213,11 @@ module ScientificNameDirty
|
|
|
1188
1213
|
if r4
|
|
1189
1214
|
s5, i5 = [], index
|
|
1190
1215
|
loop do
|
|
1191
|
-
if has_terminal?('\
|
|
1216
|
+
if has_terminal?(@regexps[gr = '\A[^щ]'] ||= Regexp.new(gr), :regexp, index)
|
|
1192
1217
|
r6 = true
|
|
1193
1218
|
@index += 1
|
|
1194
1219
|
else
|
|
1220
|
+
terminal_parse_failure('[^щ]')
|
|
1195
1221
|
r6 = nil
|
|
1196
1222
|
end
|
|
1197
1223
|
if r6
|
|
@@ -1213,6 +1239,7 @@ module ScientificNameDirty
|
|
|
1213
1239
|
r1 = nil
|
|
1214
1240
|
end
|
|
1215
1241
|
if r1
|
|
1242
|
+
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
|
1216
1243
|
r0 = r1
|
|
1217
1244
|
else
|
|
1218
1245
|
i7, s7 = index, []
|
|
@@ -1221,10 +1248,11 @@ module ScientificNameDirty
|
|
|
1221
1248
|
if r8
|
|
1222
1249
|
s9, i9 = [], index
|
|
1223
1250
|
loop do
|
|
1224
|
-
if has_terminal?('\
|
|
1251
|
+
if has_terminal?(@regexps[gr = '\A[^ш]'] ||= Regexp.new(gr), :regexp, index)
|
|
1225
1252
|
r10 = true
|
|
1226
1253
|
@index += 1
|
|
1227
1254
|
else
|
|
1255
|
+
terminal_parse_failure('[^ш]')
|
|
1228
1256
|
r10 = nil
|
|
1229
1257
|
end
|
|
1230
1258
|
if r10
|
|
@@ -1249,6 +1277,7 @@ module ScientificNameDirty
|
|
|
1249
1277
|
r7 = nil
|
|
1250
1278
|
end
|
|
1251
1279
|
if r7
|
|
1280
|
+
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
|
1252
1281
|
r0 = r7
|
|
1253
1282
|
else
|
|
1254
1283
|
@index = i0
|