dimus-biodiversity 0.5.10 → 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +10 -4
- data/VERSION +1 -1
- data/bin/nnparse +2 -1
- data/bin/parserver +14 -0
- data/biodiversity.gemspec +84 -0
- data/lib/biodiversity/parser.rb +11 -6
- data/lib/biodiversity/parser/scientific_name_canonical.rb +17 -17
- data/lib/biodiversity/parser/scientific_name_clean.rb +306 -306
- data/lib/biodiversity/parser/scientific_name_dirty.rb +55 -55
- data/spec/parser/scientific_name.spec.rb +12 -12
- metadata +5 -3
@@ -20,7 +20,7 @@ module ScientificNameDirty
|
|
20
20
|
|
21
21
|
node_cache[:root][start_index] = r0
|
22
22
|
|
23
|
-
|
23
|
+
r0
|
24
24
|
end
|
25
25
|
|
26
26
|
module ScientificName50
|
@@ -72,7 +72,7 @@ module ScientificNameDirty
|
|
72
72
|
r1.extend(ScientificName50)
|
73
73
|
r1.extend(ScientificName51)
|
74
74
|
else
|
75
|
-
|
75
|
+
@index = i1
|
76
76
|
r1 = nil
|
77
77
|
end
|
78
78
|
if r1
|
@@ -82,14 +82,14 @@ module ScientificNameDirty
|
|
82
82
|
if r4
|
83
83
|
r0 = r4
|
84
84
|
else
|
85
|
-
|
85
|
+
@index = i0
|
86
86
|
r0 = nil
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
90
90
|
node_cache[:scientific_name_5][start_index] = r0
|
91
91
|
|
92
|
-
|
92
|
+
r0
|
93
93
|
end
|
94
94
|
|
95
95
|
module Infraspecies0
|
@@ -189,7 +189,7 @@ module ScientificNameDirty
|
|
189
189
|
r1.extend(Infraspecies0)
|
190
190
|
r1.extend(Infraspecies1)
|
191
191
|
else
|
192
|
-
|
192
|
+
@index = i1
|
193
193
|
r1 = nil
|
194
194
|
end
|
195
195
|
if r1
|
@@ -219,7 +219,7 @@ module ScientificNameDirty
|
|
219
219
|
r5.extend(Infraspecies2)
|
220
220
|
r5.extend(Infraspecies3)
|
221
221
|
else
|
222
|
-
|
222
|
+
@index = i5
|
223
223
|
r5 = nil
|
224
224
|
end
|
225
225
|
if r5
|
@@ -229,7 +229,7 @@ module ScientificNameDirty
|
|
229
229
|
if r11
|
230
230
|
r0 = r11
|
231
231
|
else
|
232
|
-
|
232
|
+
@index = i0
|
233
233
|
r0 = nil
|
234
234
|
end
|
235
235
|
end
|
@@ -237,7 +237,7 @@ module ScientificNameDirty
|
|
237
237
|
|
238
238
|
node_cache[:infraspecies][start_index] = r0
|
239
239
|
|
240
|
-
|
240
|
+
r0
|
241
241
|
end
|
242
242
|
|
243
243
|
module Species0
|
@@ -297,7 +297,7 @@ module ScientificNameDirty
|
|
297
297
|
r1.extend(Species0)
|
298
298
|
r1.extend(Species1)
|
299
299
|
else
|
300
|
-
|
300
|
+
@index = i1
|
301
301
|
r1 = nil
|
302
302
|
end
|
303
303
|
if r1
|
@@ -307,14 +307,14 @@ module ScientificNameDirty
|
|
307
307
|
if r5
|
308
308
|
r0 = r5
|
309
309
|
else
|
310
|
-
|
310
|
+
@index = i0
|
311
311
|
r0 = nil
|
312
312
|
end
|
313
313
|
end
|
314
314
|
|
315
315
|
node_cache[:species][start_index] = r0
|
316
316
|
|
317
|
-
|
317
|
+
r0
|
318
318
|
end
|
319
319
|
|
320
320
|
module RightParen0
|
@@ -334,7 +334,7 @@ module ScientificNameDirty
|
|
334
334
|
|
335
335
|
i0 = index
|
336
336
|
i1, s1 = index, []
|
337
|
-
if
|
337
|
+
if has_terminal?(")", false, index)
|
338
338
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
339
339
|
@index += 1
|
340
340
|
else
|
@@ -346,7 +346,7 @@ module ScientificNameDirty
|
|
346
346
|
r3 = _nt_space
|
347
347
|
s1 << r3
|
348
348
|
if r3
|
349
|
-
if
|
349
|
+
if has_terminal?(")", false, index)
|
350
350
|
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
351
351
|
@index += 1
|
352
352
|
else
|
@@ -360,7 +360,7 @@ module ScientificNameDirty
|
|
360
360
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
361
361
|
r1.extend(RightParen0)
|
362
362
|
else
|
363
|
-
|
363
|
+
@index = i1
|
364
364
|
r1 = nil
|
365
365
|
end
|
366
366
|
if r1
|
@@ -370,14 +370,14 @@ module ScientificNameDirty
|
|
370
370
|
if r5
|
371
371
|
r0 = r5
|
372
372
|
else
|
373
|
-
|
373
|
+
@index = i0
|
374
374
|
r0 = nil
|
375
375
|
end
|
376
376
|
end
|
377
377
|
|
378
378
|
node_cache[:right_paren][start_index] = r0
|
379
379
|
|
380
|
-
|
380
|
+
r0
|
381
381
|
end
|
382
382
|
|
383
383
|
module LeftParen0
|
@@ -397,7 +397,7 @@ module ScientificNameDirty
|
|
397
397
|
|
398
398
|
i0 = index
|
399
399
|
i1, s1 = index, []
|
400
|
-
if
|
400
|
+
if has_terminal?("(", false, index)
|
401
401
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
402
402
|
@index += 1
|
403
403
|
else
|
@@ -409,7 +409,7 @@ module ScientificNameDirty
|
|
409
409
|
r3 = _nt_space
|
410
410
|
s1 << r3
|
411
411
|
if r3
|
412
|
-
if
|
412
|
+
if has_terminal?("(", false, index)
|
413
413
|
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
414
414
|
@index += 1
|
415
415
|
else
|
@@ -423,7 +423,7 @@ module ScientificNameDirty
|
|
423
423
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
424
424
|
r1.extend(LeftParen0)
|
425
425
|
else
|
426
|
-
|
426
|
+
@index = i1
|
427
427
|
r1 = nil
|
428
428
|
end
|
429
429
|
if r1
|
@@ -433,14 +433,14 @@ module ScientificNameDirty
|
|
433
433
|
if r5
|
434
434
|
r0 = r5
|
435
435
|
else
|
436
|
-
|
436
|
+
@index = i0
|
437
437
|
r0 = nil
|
438
438
|
end
|
439
439
|
end
|
440
440
|
|
441
441
|
node_cache[:left_paren][start_index] = r0
|
442
442
|
|
443
|
-
|
443
|
+
r0
|
444
444
|
end
|
445
445
|
|
446
446
|
module Year0
|
@@ -524,7 +524,7 @@ module ScientificNameDirty
|
|
524
524
|
r1.extend(Year0)
|
525
525
|
r1.extend(Year1)
|
526
526
|
else
|
527
|
-
|
527
|
+
@index = i1
|
528
528
|
r1 = nil
|
529
529
|
end
|
530
530
|
if r1
|
@@ -546,7 +546,7 @@ module ScientificNameDirty
|
|
546
546
|
r5.extend(Year2)
|
547
547
|
r5.extend(Year3)
|
548
548
|
else
|
549
|
-
|
549
|
+
@index = i5
|
550
550
|
r5 = nil
|
551
551
|
end
|
552
552
|
if r5
|
@@ -568,7 +568,7 @@ module ScientificNameDirty
|
|
568
568
|
if r12
|
569
569
|
r0 = r12
|
570
570
|
else
|
571
|
-
|
571
|
+
@index = i0
|
572
572
|
r0 = nil
|
573
573
|
end
|
574
574
|
end
|
@@ -579,7 +579,7 @@ module ScientificNameDirty
|
|
579
579
|
|
580
580
|
node_cache[:year][start_index] = r0
|
581
581
|
|
582
|
-
|
582
|
+
r0
|
583
583
|
end
|
584
584
|
|
585
585
|
module ApproximateYear0
|
@@ -620,7 +620,7 @@ module ScientificNameDirty
|
|
620
620
|
end
|
621
621
|
|
622
622
|
i0, s0 = index, []
|
623
|
-
if
|
623
|
+
if has_terminal?("[", false, index)
|
624
624
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
625
625
|
@index += 1
|
626
626
|
else
|
@@ -640,7 +640,7 @@ module ScientificNameDirty
|
|
640
640
|
if r4
|
641
641
|
s5, i5 = [], index
|
642
642
|
loop do
|
643
|
-
if
|
643
|
+
if has_terminal?("]", false, index)
|
644
644
|
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
645
645
|
@index += 1
|
646
646
|
else
|
@@ -654,7 +654,7 @@ module ScientificNameDirty
|
|
654
654
|
end
|
655
655
|
end
|
656
656
|
if s5.empty?
|
657
|
-
|
657
|
+
@index = i5
|
658
658
|
r5 = nil
|
659
659
|
else
|
660
660
|
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
@@ -669,13 +669,13 @@ module ScientificNameDirty
|
|
669
669
|
r0.extend(ApproximateYear0)
|
670
670
|
r0.extend(ApproximateYear1)
|
671
671
|
else
|
672
|
-
|
672
|
+
@index = i0
|
673
673
|
r0 = nil
|
674
674
|
end
|
675
675
|
|
676
676
|
node_cache[:approximate_year][start_index] = r0
|
677
677
|
|
678
|
-
|
678
|
+
r0
|
679
679
|
end
|
680
680
|
|
681
681
|
module DoubleYear0
|
@@ -711,7 +711,7 @@ module ScientificNameDirty
|
|
711
711
|
r1 = _nt_year_number
|
712
712
|
s0 << r1
|
713
713
|
if r1
|
714
|
-
if
|
714
|
+
if has_terminal?("-", false, index)
|
715
715
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
716
716
|
@index += 1
|
717
717
|
else
|
@@ -722,7 +722,7 @@ module ScientificNameDirty
|
|
722
722
|
if r2
|
723
723
|
s3, i3 = [], index
|
724
724
|
loop do
|
725
|
-
if
|
725
|
+
if has_terminal?('\G[0-9]', true, index)
|
726
726
|
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
727
727
|
@index += 1
|
728
728
|
else
|
@@ -735,14 +735,14 @@ module ScientificNameDirty
|
|
735
735
|
end
|
736
736
|
end
|
737
737
|
if s3.empty?
|
738
|
-
|
738
|
+
@index = i3
|
739
739
|
r3 = nil
|
740
740
|
else
|
741
741
|
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
742
742
|
end
|
743
743
|
s0 << r3
|
744
744
|
if r3
|
745
|
-
if
|
745
|
+
if has_terminal?('\G[A-Za-z]', true, index)
|
746
746
|
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
747
747
|
@index += 1
|
748
748
|
else
|
@@ -755,7 +755,7 @@ module ScientificNameDirty
|
|
755
755
|
end
|
756
756
|
s0 << r5
|
757
757
|
if r5
|
758
|
-
if
|
758
|
+
if has_terminal?('\G[\\?]', true, index)
|
759
759
|
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
760
760
|
@index += 1
|
761
761
|
else
|
@@ -776,13 +776,13 @@ module ScientificNameDirty
|
|
776
776
|
r0.extend(DoubleYear0)
|
777
777
|
r0.extend(DoubleYear1)
|
778
778
|
else
|
779
|
-
|
779
|
+
@index = i0
|
780
780
|
r0 = nil
|
781
781
|
end
|
782
782
|
|
783
783
|
node_cache[:double_year][start_index] = r0
|
784
784
|
|
785
|
-
|
785
|
+
r0
|
786
786
|
end
|
787
787
|
|
788
788
|
module YearNumberWithPunctuation0
|
@@ -818,7 +818,7 @@ module ScientificNameDirty
|
|
818
818
|
r1 = _nt_year_number
|
819
819
|
s0 << r1
|
820
820
|
if r1
|
821
|
-
if
|
821
|
+
if has_terminal?(".", false, index)
|
822
822
|
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
823
823
|
@index += 1
|
824
824
|
else
|
@@ -832,13 +832,13 @@ module ScientificNameDirty
|
|
832
832
|
r0.extend(YearNumberWithPunctuation0)
|
833
833
|
r0.extend(YearNumberWithPunctuation1)
|
834
834
|
else
|
835
|
-
|
835
|
+
@index = i0
|
836
836
|
r0 = nil
|
837
837
|
end
|
838
838
|
|
839
839
|
node_cache[:year_number_with_punctuation][start_index] = r0
|
840
840
|
|
841
|
-
|
841
|
+
r0
|
842
842
|
end
|
843
843
|
|
844
844
|
module PageNumber0
|
@@ -862,7 +862,7 @@ module ScientificNameDirty
|
|
862
862
|
end
|
863
863
|
|
864
864
|
i0, s0 = index, []
|
865
|
-
if
|
865
|
+
if has_terminal?(":", false, index)
|
866
866
|
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
867
867
|
@index += 1
|
868
868
|
else
|
@@ -876,7 +876,7 @@ module ScientificNameDirty
|
|
876
876
|
if r2
|
877
877
|
s3, i3 = [], index
|
878
878
|
loop do
|
879
|
-
if
|
879
|
+
if has_terminal?('\G[\\d]', true, index)
|
880
880
|
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
881
881
|
@index += 1
|
882
882
|
else
|
@@ -889,7 +889,7 @@ module ScientificNameDirty
|
|
889
889
|
end
|
890
890
|
end
|
891
891
|
if s3.empty?
|
892
|
-
|
892
|
+
@index = i3
|
893
893
|
r3 = nil
|
894
894
|
else
|
895
895
|
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
@@ -902,13 +902,13 @@ module ScientificNameDirty
|
|
902
902
|
r0.extend(PageNumber0)
|
903
903
|
r0.extend(PageNumber1)
|
904
904
|
else
|
905
|
-
|
905
|
+
@index = i0
|
906
906
|
r0 = nil
|
907
907
|
end
|
908
908
|
|
909
909
|
node_cache[:page_number][start_index] = r0
|
910
910
|
|
911
|
-
|
911
|
+
r0
|
912
912
|
end
|
913
913
|
|
914
914
|
def _nt_epitheton_authorship_inconsistencies
|
@@ -919,7 +919,7 @@ module ScientificNameDirty
|
|
919
919
|
return cached
|
920
920
|
end
|
921
921
|
|
922
|
-
if
|
922
|
+
if has_terminal?("corrig.", false, index)
|
923
923
|
r0 = instantiate_node(SyntaxNode,input, index...(index + 7))
|
924
924
|
@index += 7
|
925
925
|
else
|
@@ -929,7 +929,7 @@ module ScientificNameDirty
|
|
929
929
|
|
930
930
|
node_cache[:epitheton_authorship_inconsistencies][start_index] = r0
|
931
931
|
|
932
|
-
|
932
|
+
r0
|
933
933
|
end
|
934
934
|
|
935
935
|
module Garbage0
|
@@ -963,7 +963,7 @@ module ScientificNameDirty
|
|
963
963
|
r2 = _nt_space
|
964
964
|
s1 << r2
|
965
965
|
if r2
|
966
|
-
if
|
966
|
+
if has_terminal?('\G["\',.]', true, index)
|
967
967
|
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
968
968
|
@index += 1
|
969
969
|
else
|
@@ -976,7 +976,7 @@ module ScientificNameDirty
|
|
976
976
|
if r4
|
977
977
|
s5, i5 = [], index
|
978
978
|
loop do
|
979
|
-
if
|
979
|
+
if has_terminal?('\G[^щ]', true, index)
|
980
980
|
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
981
981
|
@index += 1
|
982
982
|
else
|
@@ -997,7 +997,7 @@ module ScientificNameDirty
|
|
997
997
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
998
998
|
r1.extend(Garbage0)
|
999
999
|
else
|
1000
|
-
|
1000
|
+
@index = i1
|
1001
1001
|
r1 = nil
|
1002
1002
|
end
|
1003
1003
|
if r1
|
@@ -1009,7 +1009,7 @@ module ScientificNameDirty
|
|
1009
1009
|
if r8
|
1010
1010
|
s9, i9 = [], index
|
1011
1011
|
loop do
|
1012
|
-
if
|
1012
|
+
if has_terminal?('\G[^ш]', true, index)
|
1013
1013
|
r10 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
1014
1014
|
@index += 1
|
1015
1015
|
else
|
@@ -1022,7 +1022,7 @@ module ScientificNameDirty
|
|
1022
1022
|
end
|
1023
1023
|
end
|
1024
1024
|
if s9.empty?
|
1025
|
-
|
1025
|
+
@index = i9
|
1026
1026
|
r9 = nil
|
1027
1027
|
else
|
1028
1028
|
r9 = instantiate_node(SyntaxNode,input, i9...index, s9)
|
@@ -1033,20 +1033,20 @@ module ScientificNameDirty
|
|
1033
1033
|
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
1034
1034
|
r7.extend(Garbage1)
|
1035
1035
|
else
|
1036
|
-
|
1036
|
+
@index = i7
|
1037
1037
|
r7 = nil
|
1038
1038
|
end
|
1039
1039
|
if r7
|
1040
1040
|
r0 = r7
|
1041
1041
|
else
|
1042
|
-
|
1042
|
+
@index = i0
|
1043
1043
|
r0 = nil
|
1044
1044
|
end
|
1045
1045
|
end
|
1046
1046
|
|
1047
1047
|
node_cache[:garbage][start_index] = r0
|
1048
1048
|
|
1049
|
-
|
1049
|
+
r0
|
1050
1050
|
end
|
1051
1051
|
|
1052
1052
|
end
|