biodiversity 3.1.7 → 3.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +2 -0
- data/biodiversity.gemspec +1 -1
- data/lib/biodiversity/parser/scientific_name_canonical.rb +10 -21
- data/lib/biodiversity/parser/scientific_name_clean.rb +629 -919
- data/lib/biodiversity/parser/scientific_name_dirty.rb +54 -82
- data/lib/biodiversity/version.rb +1 -1
- metadata +5 -5
@@ -56,7 +56,7 @@ module ScientificNameDirty
|
|
56
56
|
if node_cache[:root].has_key?(index)
|
57
57
|
cached = node_cache[:root][index]
|
58
58
|
if cached
|
59
|
-
|
59
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
60
60
|
@index = cached.interval.end
|
61
61
|
end
|
62
62
|
return cached
|
@@ -120,7 +120,7 @@ module ScientificNameDirty
|
|
120
120
|
if node_cache[:scientific_name_5].has_key?(index)
|
121
121
|
cached = node_cache[:scientific_name_5][index]
|
122
122
|
if cached
|
123
|
-
|
123
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
124
124
|
@index = cached.interval.end
|
125
125
|
end
|
126
126
|
return cached
|
@@ -143,12 +143,10 @@ module ScientificNameDirty
|
|
143
143
|
r1 = nil
|
144
144
|
end
|
145
145
|
if r1
|
146
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
147
146
|
r0 = r1
|
148
147
|
else
|
149
148
|
r4 = super
|
150
149
|
if r4
|
151
|
-
r4 = SyntaxNode.new(input, (index-1)...index) if r4 == true
|
152
150
|
r0 = r4
|
153
151
|
else
|
154
152
|
@index = i0
|
@@ -238,7 +236,7 @@ module ScientificNameDirty
|
|
238
236
|
if node_cache[:infraspecies].has_key?(index)
|
239
237
|
cached = node_cache[:infraspecies][index]
|
240
238
|
if cached
|
241
|
-
|
239
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
242
240
|
@index = cached.interval.end
|
243
241
|
end
|
244
242
|
return cached
|
@@ -265,7 +263,6 @@ module ScientificNameDirty
|
|
265
263
|
r1 = nil
|
266
264
|
end
|
267
265
|
if r1
|
268
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
269
266
|
r0 = r1
|
270
267
|
else
|
271
268
|
i5, s5 = index, []
|
@@ -296,12 +293,10 @@ module ScientificNameDirty
|
|
296
293
|
r5 = nil
|
297
294
|
end
|
298
295
|
if r5
|
299
|
-
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
300
296
|
r0 = r5
|
301
297
|
else
|
302
298
|
r11 = super
|
303
299
|
if r11
|
304
|
-
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
305
300
|
r0 = r11
|
306
301
|
else
|
307
302
|
@index = i0
|
@@ -352,7 +347,7 @@ module ScientificNameDirty
|
|
352
347
|
if node_cache[:species].has_key?(index)
|
353
348
|
cached = node_cache[:species][index]
|
354
349
|
if cached
|
355
|
-
|
350
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
356
351
|
@index = cached.interval.end
|
357
352
|
end
|
358
353
|
return cached
|
@@ -379,12 +374,10 @@ module ScientificNameDirty
|
|
379
374
|
r1 = nil
|
380
375
|
end
|
381
376
|
if r1
|
382
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
383
377
|
r0 = r1
|
384
378
|
else
|
385
379
|
r5 = super
|
386
380
|
if r5
|
387
|
-
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
388
381
|
r0 = r5
|
389
382
|
else
|
390
383
|
@index = i0
|
@@ -425,18 +418,17 @@ module ScientificNameDirty
|
|
425
418
|
if node_cache[:latin_word].has_key?(index)
|
426
419
|
cached = node_cache[:latin_word][index]
|
427
420
|
if cached
|
428
|
-
|
421
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
429
422
|
@index = cached.interval.end
|
430
423
|
end
|
431
424
|
return cached
|
432
425
|
end
|
433
426
|
|
434
427
|
i0, s0 = index, []
|
435
|
-
if has_terminal?(
|
428
|
+
if has_terminal?('\G[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]', true, index)
|
436
429
|
r1 = true
|
437
430
|
@index += 1
|
438
431
|
else
|
439
|
-
terminal_parse_failure('[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]')
|
440
432
|
r1 = nil
|
441
433
|
end
|
442
434
|
s0 << r1
|
@@ -477,7 +469,7 @@ module ScientificNameDirty
|
|
477
469
|
if node_cache[:valid_name_letters].has_key?(index)
|
478
470
|
cached = node_cache[:valid_name_letters][index]
|
479
471
|
if cached
|
480
|
-
|
472
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
481
473
|
@index = cached.interval.end
|
482
474
|
end
|
483
475
|
return cached
|
@@ -485,11 +477,10 @@ module ScientificNameDirty
|
|
485
477
|
|
486
478
|
s0, i0 = [], index
|
487
479
|
loop do
|
488
|
-
if has_terminal?(
|
480
|
+
if has_terminal?('\G[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]', true, index)
|
489
481
|
r1 = true
|
490
482
|
@index += 1
|
491
483
|
else
|
492
|
-
terminal_parse_failure('[a-z\\-æœàâåãäáçčëéèíìïňññóòôøõöúùüŕřŗššşž]')
|
493
484
|
r1 = nil
|
494
485
|
end
|
495
486
|
if r1
|
@@ -523,7 +514,7 @@ module ScientificNameDirty
|
|
523
514
|
if node_cache[:right_paren].has_key?(index)
|
524
515
|
cached = node_cache[:right_paren][index]
|
525
516
|
if cached
|
526
|
-
|
517
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
527
518
|
@index = cached.interval.end
|
528
519
|
end
|
529
520
|
return cached
|
@@ -531,9 +522,9 @@ module ScientificNameDirty
|
|
531
522
|
|
532
523
|
i0 = index
|
533
524
|
i1, s1 = index, []
|
534
|
-
if
|
535
|
-
r2 =
|
536
|
-
@index +=
|
525
|
+
if has_terminal?(")", false, index)
|
526
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
527
|
+
@index += 1
|
537
528
|
else
|
538
529
|
terminal_parse_failure(")")
|
539
530
|
r2 = nil
|
@@ -543,9 +534,9 @@ module ScientificNameDirty
|
|
543
534
|
r3 = _nt_space
|
544
535
|
s1 << r3
|
545
536
|
if r3
|
546
|
-
if
|
547
|
-
r4 =
|
548
|
-
@index +=
|
537
|
+
if has_terminal?(")", false, index)
|
538
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
539
|
+
@index += 1
|
549
540
|
else
|
550
541
|
terminal_parse_failure(")")
|
551
542
|
r4 = nil
|
@@ -561,12 +552,10 @@ module ScientificNameDirty
|
|
561
552
|
r1 = nil
|
562
553
|
end
|
563
554
|
if r1
|
564
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
565
555
|
r0 = r1
|
566
556
|
else
|
567
557
|
r5 = super
|
568
558
|
if r5
|
569
|
-
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
570
559
|
r0 = r5
|
571
560
|
else
|
572
561
|
@index = i0
|
@@ -591,7 +580,7 @@ module ScientificNameDirty
|
|
591
580
|
if node_cache[:left_paren].has_key?(index)
|
592
581
|
cached = node_cache[:left_paren][index]
|
593
582
|
if cached
|
594
|
-
|
583
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
595
584
|
@index = cached.interval.end
|
596
585
|
end
|
597
586
|
return cached
|
@@ -599,9 +588,9 @@ module ScientificNameDirty
|
|
599
588
|
|
600
589
|
i0 = index
|
601
590
|
i1, s1 = index, []
|
602
|
-
if
|
603
|
-
r2 =
|
604
|
-
@index +=
|
591
|
+
if has_terminal?("(", false, index)
|
592
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
593
|
+
@index += 1
|
605
594
|
else
|
606
595
|
terminal_parse_failure("(")
|
607
596
|
r2 = nil
|
@@ -611,9 +600,9 @@ module ScientificNameDirty
|
|
611
600
|
r3 = _nt_space
|
612
601
|
s1 << r3
|
613
602
|
if r3
|
614
|
-
if
|
615
|
-
r4 =
|
616
|
-
@index +=
|
603
|
+
if has_terminal?("(", false, index)
|
604
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
605
|
+
@index += 1
|
617
606
|
else
|
618
607
|
terminal_parse_failure("(")
|
619
608
|
r4 = nil
|
@@ -629,12 +618,10 @@ module ScientificNameDirty
|
|
629
618
|
r1 = nil
|
630
619
|
end
|
631
620
|
if r1
|
632
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
633
621
|
r0 = r1
|
634
622
|
else
|
635
623
|
r5 = super
|
636
624
|
if r5
|
637
|
-
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
638
625
|
r0 = r5
|
639
626
|
else
|
640
627
|
@index = i0
|
@@ -708,7 +695,7 @@ module ScientificNameDirty
|
|
708
695
|
if node_cache[:year].has_key?(index)
|
709
696
|
cached = node_cache[:year][index]
|
710
697
|
if cached
|
711
|
-
|
698
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
712
699
|
@index = cached.interval.end
|
713
700
|
end
|
714
701
|
return cached
|
@@ -735,7 +722,6 @@ module ScientificNameDirty
|
|
735
722
|
r1 = nil
|
736
723
|
end
|
737
724
|
if r1
|
738
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
739
725
|
r0 = r1
|
740
726
|
else
|
741
727
|
i5, s5 = index, []
|
@@ -758,27 +744,22 @@ module ScientificNameDirty
|
|
758
744
|
r5 = nil
|
759
745
|
end
|
760
746
|
if r5
|
761
|
-
r5 = SyntaxNode.new(input, (index-1)...index) if r5 == true
|
762
747
|
r0 = r5
|
763
748
|
else
|
764
749
|
r9 = _nt_year_number_with_punctuation
|
765
750
|
if r9
|
766
|
-
r9 = SyntaxNode.new(input, (index-1)...index) if r9 == true
|
767
751
|
r0 = r9
|
768
752
|
else
|
769
753
|
r10 = _nt_approximate_year
|
770
754
|
if r10
|
771
|
-
r10 = SyntaxNode.new(input, (index-1)...index) if r10 == true
|
772
755
|
r0 = r10
|
773
756
|
else
|
774
757
|
r11 = _nt_double_year
|
775
758
|
if r11
|
776
|
-
r11 = SyntaxNode.new(input, (index-1)...index) if r11 == true
|
777
759
|
r0 = r11
|
778
760
|
else
|
779
761
|
r12 = super
|
780
762
|
if r12
|
781
|
-
r12 = SyntaxNode.new(input, (index-1)...index) if r12 == true
|
782
763
|
r0 = r12
|
783
764
|
else
|
784
765
|
@index = i0
|
@@ -829,16 +810,16 @@ module ScientificNameDirty
|
|
829
810
|
if node_cache[:approximate_year].has_key?(index)
|
830
811
|
cached = node_cache[:approximate_year][index]
|
831
812
|
if cached
|
832
|
-
|
813
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
833
814
|
@index = cached.interval.end
|
834
815
|
end
|
835
816
|
return cached
|
836
817
|
end
|
837
818
|
|
838
819
|
i0, s0 = index, []
|
839
|
-
if
|
840
|
-
r1 =
|
841
|
-
@index +=
|
820
|
+
if has_terminal?("[", false, index)
|
821
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
822
|
+
@index += 1
|
842
823
|
else
|
843
824
|
terminal_parse_failure("[")
|
844
825
|
r1 = nil
|
@@ -856,9 +837,9 @@ module ScientificNameDirty
|
|
856
837
|
if r4
|
857
838
|
s5, i5 = [], index
|
858
839
|
loop do
|
859
|
-
if
|
860
|
-
r6 =
|
861
|
-
@index +=
|
840
|
+
if has_terminal?("]", false, index)
|
841
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
842
|
+
@index += 1
|
862
843
|
else
|
863
844
|
terminal_parse_failure("]")
|
864
845
|
r6 = nil
|
@@ -920,7 +901,7 @@ module ScientificNameDirty
|
|
920
901
|
if node_cache[:double_year].has_key?(index)
|
921
902
|
cached = node_cache[:double_year][index]
|
922
903
|
if cached
|
923
|
-
|
904
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
924
905
|
@index = cached.interval.end
|
925
906
|
end
|
926
907
|
return cached
|
@@ -930,9 +911,9 @@ module ScientificNameDirty
|
|
930
911
|
r1 = _nt_year_number
|
931
912
|
s0 << r1
|
932
913
|
if r1
|
933
|
-
if
|
934
|
-
r2 =
|
935
|
-
@index +=
|
914
|
+
if has_terminal?("-", false, index)
|
915
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
916
|
+
@index += 1
|
936
917
|
else
|
937
918
|
terminal_parse_failure("-")
|
938
919
|
r2 = nil
|
@@ -941,11 +922,10 @@ module ScientificNameDirty
|
|
941
922
|
if r2
|
942
923
|
s3, i3 = [], index
|
943
924
|
loop do
|
944
|
-
if has_terminal?(
|
925
|
+
if has_terminal?('\G[0-9]', true, index)
|
945
926
|
r4 = true
|
946
927
|
@index += 1
|
947
928
|
else
|
948
|
-
terminal_parse_failure('[0-9]')
|
949
929
|
r4 = nil
|
950
930
|
end
|
951
931
|
if r4
|
@@ -962,11 +942,10 @@ module ScientificNameDirty
|
|
962
942
|
end
|
963
943
|
s0 << r3
|
964
944
|
if r3
|
965
|
-
if has_terminal?(
|
945
|
+
if has_terminal?('\G[A-Za-z]', true, index)
|
966
946
|
r6 = true
|
967
947
|
@index += 1
|
968
948
|
else
|
969
|
-
terminal_parse_failure('[A-Za-z]')
|
970
949
|
r6 = nil
|
971
950
|
end
|
972
951
|
if r6
|
@@ -976,11 +955,10 @@ module ScientificNameDirty
|
|
976
955
|
end
|
977
956
|
s0 << r5
|
978
957
|
if r5
|
979
|
-
if has_terminal?(
|
958
|
+
if has_terminal?('\G[\\?]', true, index)
|
980
959
|
r8 = true
|
981
960
|
@index += 1
|
982
961
|
else
|
983
|
-
terminal_parse_failure('[\\?]')
|
984
962
|
r8 = nil
|
985
963
|
end
|
986
964
|
if r8
|
@@ -1033,7 +1011,7 @@ module ScientificNameDirty
|
|
1033
1011
|
if node_cache[:year_number_with_punctuation].has_key?(index)
|
1034
1012
|
cached = node_cache[:year_number_with_punctuation][index]
|
1035
1013
|
if cached
|
1036
|
-
|
1014
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
1037
1015
|
@index = cached.interval.end
|
1038
1016
|
end
|
1039
1017
|
return cached
|
@@ -1043,9 +1021,9 @@ module ScientificNameDirty
|
|
1043
1021
|
r1 = _nt_year_number
|
1044
1022
|
s0 << r1
|
1045
1023
|
if r1
|
1046
|
-
if
|
1047
|
-
r2 =
|
1048
|
-
@index +=
|
1024
|
+
if has_terminal?(".", false, index)
|
1025
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1026
|
+
@index += 1
|
1049
1027
|
else
|
1050
1028
|
terminal_parse_failure(".")
|
1051
1029
|
r2 = nil
|
@@ -1083,16 +1061,16 @@ module ScientificNameDirty
|
|
1083
1061
|
if node_cache[:page_number].has_key?(index)
|
1084
1062
|
cached = node_cache[:page_number][index]
|
1085
1063
|
if cached
|
1086
|
-
|
1064
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
1087
1065
|
@index = cached.interval.end
|
1088
1066
|
end
|
1089
1067
|
return cached
|
1090
1068
|
end
|
1091
1069
|
|
1092
1070
|
i0, s0 = index, []
|
1093
|
-
if
|
1094
|
-
r1 =
|
1095
|
-
@index +=
|
1071
|
+
if has_terminal?(":", false, index)
|
1072
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1073
|
+
@index += 1
|
1096
1074
|
else
|
1097
1075
|
terminal_parse_failure(":")
|
1098
1076
|
r1 = nil
|
@@ -1104,11 +1082,10 @@ module ScientificNameDirty
|
|
1104
1082
|
if r2
|
1105
1083
|
s3, i3 = [], index
|
1106
1084
|
loop do
|
1107
|
-
if has_terminal?(
|
1085
|
+
if has_terminal?('\G[\\d]', true, index)
|
1108
1086
|
r4 = true
|
1109
1087
|
@index += 1
|
1110
1088
|
else
|
1111
|
-
terminal_parse_failure('[\\d]')
|
1112
1089
|
r4 = nil
|
1113
1090
|
end
|
1114
1091
|
if r4
|
@@ -1145,15 +1122,15 @@ module ScientificNameDirty
|
|
1145
1122
|
if node_cache[:string_authorship_inconsistencies].has_key?(index)
|
1146
1123
|
cached = node_cache[:string_authorship_inconsistencies][index]
|
1147
1124
|
if cached
|
1148
|
-
|
1125
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
1149
1126
|
@index = cached.interval.end
|
1150
1127
|
end
|
1151
1128
|
return cached
|
1152
1129
|
end
|
1153
1130
|
|
1154
|
-
if
|
1155
|
-
r0 = instantiate_node(SyntaxNode,input, index...(index +
|
1156
|
-
@index +=
|
1131
|
+
if has_terminal?("corrig.", false, index)
|
1132
|
+
r0 = instantiate_node(SyntaxNode,input, index...(index + 7))
|
1133
|
+
@index += 7
|
1157
1134
|
else
|
1158
1135
|
terminal_parse_failure("corrig.")
|
1159
1136
|
r0 = nil
|
@@ -1187,7 +1164,7 @@ module ScientificNameDirty
|
|
1187
1164
|
if node_cache[:garbage].has_key?(index)
|
1188
1165
|
cached = node_cache[:garbage][index]
|
1189
1166
|
if cached
|
1190
|
-
|
1167
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
1191
1168
|
@index = cached.interval.end
|
1192
1169
|
end
|
1193
1170
|
return cached
|
@@ -1198,11 +1175,10 @@ module ScientificNameDirty
|
|
1198
1175
|
r2 = _nt_space
|
1199
1176
|
s1 << r2
|
1200
1177
|
if r2
|
1201
|
-
if has_terminal?(
|
1178
|
+
if has_terminal?('\G["\',]', true, index)
|
1202
1179
|
r3 = true
|
1203
1180
|
@index += 1
|
1204
1181
|
else
|
1205
|
-
terminal_parse_failure('["\',]')
|
1206
1182
|
r3 = nil
|
1207
1183
|
end
|
1208
1184
|
s1 << r3
|
@@ -1212,11 +1188,10 @@ module ScientificNameDirty
|
|
1212
1188
|
if r4
|
1213
1189
|
s5, i5 = [], index
|
1214
1190
|
loop do
|
1215
|
-
if has_terminal?(
|
1191
|
+
if has_terminal?('\G[^щ]', true, index)
|
1216
1192
|
r6 = true
|
1217
1193
|
@index += 1
|
1218
1194
|
else
|
1219
|
-
terminal_parse_failure('[^щ]')
|
1220
1195
|
r6 = nil
|
1221
1196
|
end
|
1222
1197
|
if r6
|
@@ -1238,7 +1213,6 @@ module ScientificNameDirty
|
|
1238
1213
|
r1 = nil
|
1239
1214
|
end
|
1240
1215
|
if r1
|
1241
|
-
r1 = SyntaxNode.new(input, (index-1)...index) if r1 == true
|
1242
1216
|
r0 = r1
|
1243
1217
|
else
|
1244
1218
|
i7, s7 = index, []
|
@@ -1247,11 +1221,10 @@ module ScientificNameDirty
|
|
1247
1221
|
if r8
|
1248
1222
|
s9, i9 = [], index
|
1249
1223
|
loop do
|
1250
|
-
if has_terminal?(
|
1224
|
+
if has_terminal?('\G[^ш]', true, index)
|
1251
1225
|
r10 = true
|
1252
1226
|
@index += 1
|
1253
1227
|
else
|
1254
|
-
terminal_parse_failure('[^ш]')
|
1255
1228
|
r10 = nil
|
1256
1229
|
end
|
1257
1230
|
if r10
|
@@ -1276,7 +1249,6 @@ module ScientificNameDirty
|
|
1276
1249
|
r7 = nil
|
1277
1250
|
end
|
1278
1251
|
if r7
|
1279
|
-
r7 = SyntaxNode.new(input, (index-1)...index) if r7 == true
|
1280
1252
|
r0 = r7
|
1281
1253
|
else
|
1282
1254
|
@index = i0
|