biodiversity19 0.5.15 → 0.5.16
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Rakefile +20 -4
- data/VERSION +1 -1
- data/bin/nnparse +2 -2
- data/{biodiversity.gemspec → biodiversity19.gemspec} +11 -8
- data/lib/biodiversity/parser/scientific_name_canonical.rb +9 -3
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +8 -1
- data/lib/biodiversity/parser/scientific_name_clean.rb +362 -386
- data/lib/biodiversity/parser/scientific_name_clean.treetop +39 -45
- data/lib/biodiversity/parser/scientific_name_dirty.rb +215 -2
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +62 -1
- data/lib/biodiversity/parser.rb +1 -0
- data/spec/parser/scientific_name_canonical.spec.rb +1 -2
- data/spec/parser/scientific_name_clean.spec.rb +45 -23
- data/spec/parser/scientific_name_dirty.spec.rb +17 -1
- data/spec/parser/test_data.txt +148 -148
- metadata +23 -11
@@ -3,7 +3,7 @@ module ScientificNameClean
|
|
3
3
|
include Treetop::Runtime
|
4
4
|
|
5
5
|
def root
|
6
|
-
@root
|
6
|
+
@root ||= :root
|
7
7
|
end
|
8
8
|
|
9
9
|
module Root0
|
@@ -40,6 +40,10 @@ module ScientificNameClean
|
|
40
40
|
def details
|
41
41
|
a.details.class == Array ? a.details : [a.details]
|
42
42
|
end
|
43
|
+
|
44
|
+
def parser_run
|
45
|
+
1
|
46
|
+
end
|
43
47
|
end
|
44
48
|
|
45
49
|
def _nt_root
|
@@ -2336,39 +2340,51 @@ module ScientificNameClean
|
|
2336
2340
|
end
|
2337
2341
|
|
2338
2342
|
i0, s0 = index, []
|
2339
|
-
|
2343
|
+
i1 = index
|
2344
|
+
r2 = _nt_cap_latin_word_pair
|
2345
|
+
if r2
|
2346
|
+
r1 = r2
|
2347
|
+
else
|
2348
|
+
r3 = _nt_cap_latin_word
|
2349
|
+
if r3
|
2350
|
+
r1 = r3
|
2351
|
+
else
|
2352
|
+
@index = i1
|
2353
|
+
r1 = nil
|
2354
|
+
end
|
2355
|
+
end
|
2340
2356
|
s0 << r1
|
2341
2357
|
if r1
|
2342
|
-
|
2343
|
-
|
2344
|
-
|
2345
|
-
|
2346
|
-
if
|
2347
|
-
|
2348
|
-
|
2349
|
-
if
|
2350
|
-
|
2351
|
-
|
2352
|
-
if
|
2353
|
-
|
2354
|
-
|
2358
|
+
i4 = index
|
2359
|
+
i5, s5 = index, []
|
2360
|
+
r6 = _nt_space_hard
|
2361
|
+
s5 << r6
|
2362
|
+
if r6
|
2363
|
+
r7 = _nt_author_prefix_word
|
2364
|
+
s5 << r7
|
2365
|
+
if r7
|
2366
|
+
r8 = _nt_space_hard
|
2367
|
+
s5 << r8
|
2368
|
+
if r8
|
2369
|
+
r9 = _nt_author_word
|
2370
|
+
s5 << r9
|
2355
2371
|
end
|
2356
2372
|
end
|
2357
2373
|
end
|
2358
|
-
if
|
2359
|
-
|
2360
|
-
|
2374
|
+
if s5.last
|
2375
|
+
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
2376
|
+
r5.extend(Genus0)
|
2361
2377
|
else
|
2362
|
-
@index =
|
2363
|
-
|
2378
|
+
@index = i5
|
2379
|
+
r5 = nil
|
2364
2380
|
end
|
2365
|
-
if
|
2366
|
-
|
2381
|
+
if r5
|
2382
|
+
r4 = nil
|
2367
2383
|
else
|
2368
|
-
@index =
|
2369
|
-
|
2384
|
+
@index = i4
|
2385
|
+
r4 = instantiate_node(SyntaxNode,input, index...index)
|
2370
2386
|
end
|
2371
|
-
s0 <<
|
2387
|
+
s0 << r4
|
2372
2388
|
end
|
2373
2389
|
if s0.last
|
2374
2390
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
@@ -2497,8 +2513,21 @@ module ScientificNameClean
|
|
2497
2513
|
return cached
|
2498
2514
|
end
|
2499
2515
|
|
2500
|
-
|
2501
|
-
|
2516
|
+
i0 = index
|
2517
|
+
r1 = _nt_cap_latin_word_pair
|
2518
|
+
if r1
|
2519
|
+
r0 = r1
|
2520
|
+
r0.extend(UninomialString0)
|
2521
|
+
else
|
2522
|
+
r2 = _nt_cap_latin_word
|
2523
|
+
if r2
|
2524
|
+
r0 = r2
|
2525
|
+
r0.extend(UninomialString0)
|
2526
|
+
else
|
2527
|
+
@index = i0
|
2528
|
+
r0 = nil
|
2529
|
+
end
|
2530
|
+
end
|
2502
2531
|
|
2503
2532
|
node_cache[:uninomial_string][start_index] = r0
|
2504
2533
|
|
@@ -4552,6 +4581,64 @@ module ScientificNameClean
|
|
4552
4581
|
r0
|
4553
4582
|
end
|
4554
4583
|
|
4584
|
+
module CapLatinWordPair0
|
4585
|
+
def a
|
4586
|
+
elements[0]
|
4587
|
+
end
|
4588
|
+
|
4589
|
+
def b
|
4590
|
+
elements[2]
|
4591
|
+
end
|
4592
|
+
end
|
4593
|
+
|
4594
|
+
module CapLatinWordPair1
|
4595
|
+
def value
|
4596
|
+
a.value + b.value.downcase
|
4597
|
+
end
|
4598
|
+
end
|
4599
|
+
|
4600
|
+
def _nt_cap_latin_word_pair
|
4601
|
+
start_index = index
|
4602
|
+
if node_cache[:cap_latin_word_pair].has_key?(index)
|
4603
|
+
cached = node_cache[:cap_latin_word_pair][index]
|
4604
|
+
if cached
|
4605
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
4606
|
+
@index = cached.interval.end
|
4607
|
+
end
|
4608
|
+
return cached
|
4609
|
+
end
|
4610
|
+
|
4611
|
+
i0, s0 = index, []
|
4612
|
+
r1 = _nt_cap_latin_word
|
4613
|
+
s0 << r1
|
4614
|
+
if r1
|
4615
|
+
if has_terminal?("-", false, index)
|
4616
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
4617
|
+
@index += 1
|
4618
|
+
else
|
4619
|
+
terminal_parse_failure("-")
|
4620
|
+
r2 = nil
|
4621
|
+
end
|
4622
|
+
s0 << r2
|
4623
|
+
if r2
|
4624
|
+
r3 = _nt_cap_latin_word
|
4625
|
+
s0 << r3
|
4626
|
+
end
|
4627
|
+
end
|
4628
|
+
if s0.last
|
4629
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
4630
|
+
r0.extend(CapLatinWordPair0)
|
4631
|
+
r0.extend(CapLatinWordPair1)
|
4632
|
+
else
|
4633
|
+
@index = i0
|
4634
|
+
r0 = nil
|
4635
|
+
end
|
4636
|
+
|
4637
|
+
node_cache[:cap_latin_word_pair][start_index] = r0
|
4638
|
+
|
4639
|
+
r0
|
4640
|
+
end
|
4641
|
+
|
4555
4642
|
module CapLatinWord0
|
4556
4643
|
def a
|
4557
4644
|
elements[0]
|
@@ -4586,6 +4673,22 @@ module ScientificNameClean
|
|
4586
4673
|
end
|
4587
4674
|
|
4588
4675
|
module CapLatinWord4
|
4676
|
+
def a
|
4677
|
+
elements[0]
|
4678
|
+
end
|
4679
|
+
|
4680
|
+
def b
|
4681
|
+
elements[1]
|
4682
|
+
end
|
4683
|
+
end
|
4684
|
+
|
4685
|
+
module CapLatinWord5
|
4686
|
+
def value
|
4687
|
+
a.text_value[0..0] + 'e' + b.value
|
4688
|
+
end
|
4689
|
+
end
|
4690
|
+
|
4691
|
+
module CapLatinWord6
|
4589
4692
|
def value
|
4590
4693
|
text_value
|
4591
4694
|
end
|
@@ -4683,218 +4786,261 @@ module ScientificNameClean
|
|
4683
4786
|
if r7
|
4684
4787
|
r0 = r7
|
4685
4788
|
else
|
4686
|
-
i12 = index
|
4687
|
-
|
4688
|
-
|
4789
|
+
i12, s12 = index, []
|
4790
|
+
i13 = index
|
4791
|
+
if has_terminal?("AE", false, index)
|
4792
|
+
r14 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4689
4793
|
@index += 2
|
4690
4794
|
else
|
4691
|
-
terminal_parse_failure("
|
4692
|
-
|
4795
|
+
terminal_parse_failure("AE")
|
4796
|
+
r14 = nil
|
4797
|
+
end
|
4798
|
+
if r14
|
4799
|
+
r13 = r14
|
4800
|
+
else
|
4801
|
+
if has_terminal?("OE", false, index)
|
4802
|
+
r15 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4803
|
+
@index += 2
|
4804
|
+
else
|
4805
|
+
terminal_parse_failure("OE")
|
4806
|
+
r15 = nil
|
4807
|
+
end
|
4808
|
+
if r15
|
4809
|
+
r13 = r15
|
4810
|
+
else
|
4811
|
+
@index = i13
|
4812
|
+
r13 = nil
|
4813
|
+
end
|
4693
4814
|
end
|
4815
|
+
s12 << r13
|
4694
4816
|
if r13
|
4695
|
-
|
4817
|
+
r16 = _nt_latin_word
|
4818
|
+
s12 << r16
|
4819
|
+
end
|
4820
|
+
if s12.last
|
4821
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
4696
4822
|
r12.extend(CapLatinWord4)
|
4823
|
+
r12.extend(CapLatinWord5)
|
4697
4824
|
else
|
4698
|
-
|
4699
|
-
|
4825
|
+
@index = i12
|
4826
|
+
r12 = nil
|
4827
|
+
end
|
4828
|
+
if r12
|
4829
|
+
r0 = r12
|
4830
|
+
else
|
4831
|
+
i17 = index
|
4832
|
+
if has_terminal?("Ca", false, index)
|
4833
|
+
r18 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4700
4834
|
@index += 2
|
4701
4835
|
else
|
4702
|
-
terminal_parse_failure("
|
4703
|
-
|
4836
|
+
terminal_parse_failure("Ca")
|
4837
|
+
r18 = nil
|
4704
4838
|
end
|
4705
|
-
if
|
4706
|
-
|
4707
|
-
|
4839
|
+
if r18
|
4840
|
+
r17 = r18
|
4841
|
+
r17.extend(CapLatinWord6)
|
4708
4842
|
else
|
4709
|
-
if has_terminal?("
|
4710
|
-
|
4843
|
+
if has_terminal?("Ea", false, index)
|
4844
|
+
r19 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4711
4845
|
@index += 2
|
4712
4846
|
else
|
4713
|
-
terminal_parse_failure("
|
4714
|
-
|
4847
|
+
terminal_parse_failure("Ea")
|
4848
|
+
r19 = nil
|
4715
4849
|
end
|
4716
|
-
if
|
4717
|
-
|
4718
|
-
|
4850
|
+
if r19
|
4851
|
+
r17 = r19
|
4852
|
+
r17.extend(CapLatinWord6)
|
4719
4853
|
else
|
4720
|
-
if has_terminal?("
|
4721
|
-
|
4854
|
+
if has_terminal?("Ge", false, index)
|
4855
|
+
r20 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4722
4856
|
@index += 2
|
4723
4857
|
else
|
4724
|
-
terminal_parse_failure("
|
4725
|
-
|
4858
|
+
terminal_parse_failure("Ge")
|
4859
|
+
r20 = nil
|
4726
4860
|
end
|
4727
|
-
if
|
4728
|
-
|
4729
|
-
|
4861
|
+
if r20
|
4862
|
+
r17 = r20
|
4863
|
+
r17.extend(CapLatinWord6)
|
4730
4864
|
else
|
4731
|
-
if has_terminal?("
|
4732
|
-
|
4865
|
+
if has_terminal?("Ia", false, index)
|
4866
|
+
r21 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4733
4867
|
@index += 2
|
4734
4868
|
else
|
4735
|
-
terminal_parse_failure("
|
4736
|
-
|
4869
|
+
terminal_parse_failure("Ia")
|
4870
|
+
r21 = nil
|
4737
4871
|
end
|
4738
|
-
if
|
4739
|
-
|
4740
|
-
|
4872
|
+
if r21
|
4873
|
+
r17 = r21
|
4874
|
+
r17.extend(CapLatinWord6)
|
4741
4875
|
else
|
4742
4876
|
if has_terminal?("Io", false, index)
|
4743
|
-
|
4877
|
+
r22 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4744
4878
|
@index += 2
|
4745
4879
|
else
|
4746
4880
|
terminal_parse_failure("Io")
|
4747
|
-
|
4881
|
+
r22 = nil
|
4748
4882
|
end
|
4749
|
-
if
|
4750
|
-
|
4751
|
-
|
4883
|
+
if r22
|
4884
|
+
r17 = r22
|
4885
|
+
r17.extend(CapLatinWord6)
|
4752
4886
|
else
|
4753
|
-
if has_terminal?("
|
4754
|
-
|
4887
|
+
if has_terminal?("Io", false, index)
|
4888
|
+
r23 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4755
4889
|
@index += 2
|
4756
4890
|
else
|
4757
|
-
terminal_parse_failure("
|
4758
|
-
|
4891
|
+
terminal_parse_failure("Io")
|
4892
|
+
r23 = nil
|
4759
4893
|
end
|
4760
|
-
if
|
4761
|
-
|
4762
|
-
|
4894
|
+
if r23
|
4895
|
+
r17 = r23
|
4896
|
+
r17.extend(CapLatinWord6)
|
4763
4897
|
else
|
4764
|
-
if has_terminal?("
|
4765
|
-
|
4898
|
+
if has_terminal?("Ix", false, index)
|
4899
|
+
r24 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4766
4900
|
@index += 2
|
4767
4901
|
else
|
4768
|
-
terminal_parse_failure("
|
4769
|
-
|
4902
|
+
terminal_parse_failure("Ix")
|
4903
|
+
r24 = nil
|
4770
4904
|
end
|
4771
|
-
if
|
4772
|
-
|
4773
|
-
|
4905
|
+
if r24
|
4906
|
+
r17 = r24
|
4907
|
+
r17.extend(CapLatinWord6)
|
4774
4908
|
else
|
4775
|
-
if has_terminal?("
|
4776
|
-
|
4909
|
+
if has_terminal?("Lo", false, index)
|
4910
|
+
r25 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4777
4911
|
@index += 2
|
4778
4912
|
else
|
4779
|
-
terminal_parse_failure("
|
4780
|
-
|
4913
|
+
terminal_parse_failure("Lo")
|
4914
|
+
r25 = nil
|
4781
4915
|
end
|
4782
|
-
if
|
4783
|
-
|
4784
|
-
|
4916
|
+
if r25
|
4917
|
+
r17 = r25
|
4918
|
+
r17.extend(CapLatinWord6)
|
4785
4919
|
else
|
4786
|
-
if has_terminal?("
|
4787
|
-
|
4920
|
+
if has_terminal?("Oa", false, index)
|
4921
|
+
r26 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4788
4922
|
@index += 2
|
4789
4923
|
else
|
4790
|
-
terminal_parse_failure("
|
4791
|
-
|
4924
|
+
terminal_parse_failure("Oa")
|
4925
|
+
r26 = nil
|
4792
4926
|
end
|
4793
|
-
if
|
4794
|
-
|
4795
|
-
|
4927
|
+
if r26
|
4928
|
+
r17 = r26
|
4929
|
+
r17.extend(CapLatinWord6)
|
4796
4930
|
else
|
4797
|
-
if has_terminal?("
|
4798
|
-
|
4931
|
+
if has_terminal?("Ra", false, index)
|
4932
|
+
r27 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4799
4933
|
@index += 2
|
4800
4934
|
else
|
4801
|
-
terminal_parse_failure("
|
4802
|
-
|
4935
|
+
terminal_parse_failure("Ra")
|
4936
|
+
r27 = nil
|
4803
4937
|
end
|
4804
|
-
if
|
4805
|
-
|
4806
|
-
|
4938
|
+
if r27
|
4939
|
+
r17 = r27
|
4940
|
+
r17.extend(CapLatinWord6)
|
4807
4941
|
else
|
4808
|
-
if has_terminal?("
|
4809
|
-
|
4942
|
+
if has_terminal?("Ty", false, index)
|
4943
|
+
r28 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4810
4944
|
@index += 2
|
4811
4945
|
else
|
4812
|
-
terminal_parse_failure("
|
4813
|
-
|
4946
|
+
terminal_parse_failure("Ty")
|
4947
|
+
r28 = nil
|
4814
4948
|
end
|
4815
|
-
if
|
4816
|
-
|
4817
|
-
|
4949
|
+
if r28
|
4950
|
+
r17 = r28
|
4951
|
+
r17.extend(CapLatinWord6)
|
4818
4952
|
else
|
4819
|
-
if has_terminal?("
|
4820
|
-
|
4953
|
+
if has_terminal?("Ua", false, index)
|
4954
|
+
r29 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4821
4955
|
@index += 2
|
4822
4956
|
else
|
4823
|
-
terminal_parse_failure("
|
4824
|
-
|
4957
|
+
terminal_parse_failure("Ua")
|
4958
|
+
r29 = nil
|
4825
4959
|
end
|
4826
|
-
if
|
4827
|
-
|
4828
|
-
|
4960
|
+
if r29
|
4961
|
+
r17 = r29
|
4962
|
+
r17.extend(CapLatinWord6)
|
4829
4963
|
else
|
4830
|
-
if has_terminal?("
|
4831
|
-
|
4964
|
+
if has_terminal?("Aa", false, index)
|
4965
|
+
r30 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4832
4966
|
@index += 2
|
4833
4967
|
else
|
4834
|
-
terminal_parse_failure("
|
4835
|
-
|
4968
|
+
terminal_parse_failure("Aa")
|
4969
|
+
r30 = nil
|
4836
4970
|
end
|
4837
|
-
if
|
4838
|
-
|
4839
|
-
|
4971
|
+
if r30
|
4972
|
+
r17 = r30
|
4973
|
+
r17.extend(CapLatinWord6)
|
4840
4974
|
else
|
4841
|
-
if has_terminal?("
|
4842
|
-
|
4975
|
+
if has_terminal?("Ja", false, index)
|
4976
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4843
4977
|
@index += 2
|
4844
4978
|
else
|
4845
|
-
terminal_parse_failure("
|
4846
|
-
|
4979
|
+
terminal_parse_failure("Ja")
|
4980
|
+
r31 = nil
|
4847
4981
|
end
|
4848
|
-
if
|
4849
|
-
|
4850
|
-
|
4982
|
+
if r31
|
4983
|
+
r17 = r31
|
4984
|
+
r17.extend(CapLatinWord6)
|
4851
4985
|
else
|
4852
|
-
if has_terminal?("
|
4853
|
-
|
4986
|
+
if has_terminal?("Zu", false, index)
|
4987
|
+
r32 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4854
4988
|
@index += 2
|
4855
4989
|
else
|
4856
|
-
terminal_parse_failure("
|
4857
|
-
|
4990
|
+
terminal_parse_failure("Zu")
|
4991
|
+
r32 = nil
|
4858
4992
|
end
|
4859
|
-
if
|
4860
|
-
|
4861
|
-
|
4993
|
+
if r32
|
4994
|
+
r17 = r32
|
4995
|
+
r17.extend(CapLatinWord6)
|
4862
4996
|
else
|
4863
|
-
if has_terminal?("
|
4864
|
-
|
4997
|
+
if has_terminal?("La", false, index)
|
4998
|
+
r33 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4865
4999
|
@index += 2
|
4866
5000
|
else
|
4867
|
-
terminal_parse_failure("
|
4868
|
-
|
5001
|
+
terminal_parse_failure("La")
|
5002
|
+
r33 = nil
|
4869
5003
|
end
|
4870
|
-
if
|
4871
|
-
|
4872
|
-
|
5004
|
+
if r33
|
5005
|
+
r17 = r33
|
5006
|
+
r17.extend(CapLatinWord6)
|
4873
5007
|
else
|
4874
|
-
if has_terminal?("
|
4875
|
-
|
5008
|
+
if has_terminal?("Qu", false, index)
|
5009
|
+
r34 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4876
5010
|
@index += 2
|
4877
5011
|
else
|
4878
|
-
terminal_parse_failure("
|
4879
|
-
|
5012
|
+
terminal_parse_failure("Qu")
|
5013
|
+
r34 = nil
|
4880
5014
|
end
|
4881
|
-
if
|
4882
|
-
|
4883
|
-
|
5015
|
+
if r34
|
5016
|
+
r17 = r34
|
5017
|
+
r17.extend(CapLatinWord6)
|
4884
5018
|
else
|
4885
|
-
if has_terminal?("
|
4886
|
-
|
5019
|
+
if has_terminal?("As", false, index)
|
5020
|
+
r35 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
4887
5021
|
@index += 2
|
4888
5022
|
else
|
4889
|
-
terminal_parse_failure("
|
4890
|
-
|
5023
|
+
terminal_parse_failure("As")
|
5024
|
+
r35 = nil
|
4891
5025
|
end
|
4892
|
-
if
|
4893
|
-
|
4894
|
-
|
5026
|
+
if r35
|
5027
|
+
r17 = r35
|
5028
|
+
r17.extend(CapLatinWord6)
|
4895
5029
|
else
|
4896
|
-
|
4897
|
-
|
5030
|
+
if has_terminal?("Ba", false, index)
|
5031
|
+
r36 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
5032
|
+
@index += 2
|
5033
|
+
else
|
5034
|
+
terminal_parse_failure("Ba")
|
5035
|
+
r36 = nil
|
5036
|
+
end
|
5037
|
+
if r36
|
5038
|
+
r17 = r36
|
5039
|
+
r17.extend(CapLatinWord6)
|
5040
|
+
else
|
5041
|
+
@index = i17
|
5042
|
+
r17 = nil
|
5043
|
+
end
|
4898
5044
|
end
|
4899
5045
|
end
|
4900
5046
|
end
|
@@ -4913,12 +5059,12 @@ module ScientificNameClean
|
|
4913
5059
|
end
|
4914
5060
|
end
|
4915
5061
|
end
|
4916
|
-
|
4917
|
-
|
4918
|
-
|
4919
|
-
|
4920
|
-
|
4921
|
-
|
5062
|
+
if r17
|
5063
|
+
r0 = r17
|
5064
|
+
else
|
5065
|
+
@index = i0
|
5066
|
+
r0 = nil
|
5067
|
+
end
|
4922
5068
|
end
|
4923
5069
|
end
|
4924
5070
|
end
|
@@ -5319,23 +5465,10 @@ module ScientificNameClean
|
|
5319
5465
|
|
5320
5466
|
module LatinWord1
|
5321
5467
|
def value
|
5322
|
-
a.text_value
|
5323
|
-
|
5324
|
-
|
5325
|
-
|
5326
|
-
module LatinWord2
|
5327
|
-
def a
|
5328
|
-
elements[0]
|
5329
|
-
end
|
5330
|
-
|
5331
|
-
def b
|
5332
|
-
elements[1]
|
5333
|
-
end
|
5334
|
-
end
|
5335
|
-
|
5336
|
-
module LatinWord3
|
5337
|
-
def value
|
5338
|
-
a.value + b.value
|
5468
|
+
l = a.text_value
|
5469
|
+
l = 'ae' if l == 'æ'
|
5470
|
+
l = 'oe' if l == 'œ'
|
5471
|
+
l + b.value
|
5339
5472
|
end
|
5340
5473
|
end
|
5341
5474
|
|
@@ -5350,164 +5483,44 @@ module ScientificNameClean
|
|
5350
5483
|
return cached
|
5351
5484
|
end
|
5352
5485
|
|
5353
|
-
i0 = index
|
5354
|
-
|
5355
|
-
|
5356
|
-
r2 = true
|
5486
|
+
i0, s0 = index, []
|
5487
|
+
if has_terminal?('\G[a-zëæœ]', true, index)
|
5488
|
+
r1 = true
|
5357
5489
|
@index += 1
|
5358
5490
|
else
|
5359
|
-
r2 = nil
|
5360
|
-
end
|
5361
|
-
s1 << r2
|
5362
|
-
if r2
|
5363
|
-
r3 = _nt_full_name_letters
|
5364
|
-
s1 << r3
|
5365
|
-
end
|
5366
|
-
if s1.last
|
5367
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
5368
|
-
r1.extend(LatinWord0)
|
5369
|
-
r1.extend(LatinWord1)
|
5370
|
-
else
|
5371
|
-
@index = i1
|
5372
5491
|
r1 = nil
|
5373
5492
|
end
|
5493
|
+
s0 << r1
|
5374
5494
|
if r1
|
5375
|
-
|
5376
|
-
|
5377
|
-
i4, s4 = index, []
|
5378
|
-
r5 = _nt_digraph
|
5379
|
-
s4 << r5
|
5380
|
-
if r5
|
5381
|
-
r6 = _nt_full_name_letters
|
5382
|
-
s4 << r6
|
5383
|
-
end
|
5384
|
-
if s4.last
|
5385
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
5386
|
-
r4.extend(LatinWord2)
|
5387
|
-
r4.extend(LatinWord3)
|
5388
|
-
else
|
5389
|
-
@index = i4
|
5390
|
-
r4 = nil
|
5391
|
-
end
|
5392
|
-
if r4
|
5393
|
-
r0 = r4
|
5394
|
-
else
|
5395
|
-
@index = i0
|
5396
|
-
r0 = nil
|
5397
|
-
end
|
5398
|
-
end
|
5399
|
-
|
5400
|
-
node_cache[:latin_word][start_index] = r0
|
5401
|
-
|
5402
|
-
r0
|
5403
|
-
end
|
5404
|
-
|
5405
|
-
module FullNameLetters0
|
5406
|
-
def a
|
5407
|
-
elements[0]
|
5408
|
-
end
|
5409
|
-
|
5410
|
-
def b
|
5411
|
-
elements[1]
|
5412
|
-
end
|
5413
|
-
end
|
5414
|
-
|
5415
|
-
module FullNameLetters1
|
5416
|
-
def value
|
5417
|
-
a.value + b.value
|
5418
|
-
end
|
5419
|
-
end
|
5420
|
-
|
5421
|
-
module FullNameLetters2
|
5422
|
-
def a
|
5423
|
-
elements[0]
|
5424
|
-
end
|
5425
|
-
|
5426
|
-
def b
|
5427
|
-
elements[1]
|
5428
|
-
end
|
5429
|
-
|
5430
|
-
def c
|
5431
|
-
elements[2]
|
5432
|
-
end
|
5433
|
-
end
|
5434
|
-
|
5435
|
-
module FullNameLetters3
|
5436
|
-
def value
|
5437
|
-
a.value + b.value + c.value
|
5438
|
-
end
|
5439
|
-
end
|
5440
|
-
|
5441
|
-
def _nt_full_name_letters
|
5442
|
-
start_index = index
|
5443
|
-
if node_cache[:full_name_letters].has_key?(index)
|
5444
|
-
cached = node_cache[:full_name_letters][index]
|
5445
|
-
if cached
|
5446
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
5447
|
-
@index = cached.interval.end
|
5448
|
-
end
|
5449
|
-
return cached
|
5450
|
-
end
|
5451
|
-
|
5452
|
-
i0 = index
|
5453
|
-
i1, s1 = index, []
|
5454
|
-
r2 = _nt_digraph
|
5455
|
-
s1 << r2
|
5456
|
-
if r2
|
5457
|
-
r3 = _nt_full_name_letters
|
5458
|
-
s1 << r3
|
5459
|
-
end
|
5460
|
-
if s1.last
|
5461
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
5462
|
-
r1.extend(FullNameLetters0)
|
5463
|
-
r1.extend(FullNameLetters1)
|
5464
|
-
else
|
5465
|
-
@index = i1
|
5466
|
-
r1 = nil
|
5495
|
+
r2 = _nt_valid_name_letters
|
5496
|
+
s0 << r2
|
5467
5497
|
end
|
5468
|
-
if
|
5469
|
-
r0 =
|
5498
|
+
if s0.last
|
5499
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
5500
|
+
r0.extend(LatinWord0)
|
5501
|
+
r0.extend(LatinWord1)
|
5470
5502
|
else
|
5471
|
-
|
5472
|
-
|
5473
|
-
s4 << r5
|
5474
|
-
if r5
|
5475
|
-
r6 = _nt_digraph
|
5476
|
-
s4 << r6
|
5477
|
-
if r6
|
5478
|
-
r7 = _nt_full_name_letters
|
5479
|
-
s4 << r7
|
5480
|
-
end
|
5481
|
-
end
|
5482
|
-
if s4.last
|
5483
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
5484
|
-
r4.extend(FullNameLetters2)
|
5485
|
-
r4.extend(FullNameLetters3)
|
5486
|
-
else
|
5487
|
-
@index = i4
|
5488
|
-
r4 = nil
|
5489
|
-
end
|
5490
|
-
if r4
|
5491
|
-
r0 = r4
|
5492
|
-
else
|
5493
|
-
r8 = _nt_valid_name_letters
|
5494
|
-
if r8
|
5495
|
-
r0 = r8
|
5496
|
-
else
|
5497
|
-
@index = i0
|
5498
|
-
r0 = nil
|
5499
|
-
end
|
5500
|
-
end
|
5503
|
+
@index = i0
|
5504
|
+
r0 = nil
|
5501
5505
|
end
|
5502
5506
|
|
5503
|
-
node_cache[:
|
5507
|
+
node_cache[:latin_word][start_index] = r0
|
5504
5508
|
|
5505
5509
|
r0
|
5506
5510
|
end
|
5507
5511
|
|
5508
5512
|
module ValidNameLetters0
|
5509
5513
|
def value
|
5510
|
-
|
5514
|
+
res = ''
|
5515
|
+
text_value.split('').each do |l|
|
5516
|
+
l = 'ae' if l == 'æ'
|
5517
|
+
l = 'oe' if l == 'œ'
|
5518
|
+
# not sure if we should normalize ë as well. It is legal in botanical code, but it
|
5519
|
+
# might be beneficial to normalize it for the reconsiliation purposes
|
5520
|
+
# l = 'e' if l == 'ë'
|
5521
|
+
res << l
|
5522
|
+
end
|
5523
|
+
res
|
5511
5524
|
end
|
5512
5525
|
end
|
5513
5526
|
|
@@ -5524,7 +5537,7 @@ module ScientificNameClean
|
|
5524
5537
|
|
5525
5538
|
s0, i0 = [], index
|
5526
5539
|
loop do
|
5527
|
-
if has_terminal?('\G[a-z
|
5540
|
+
if has_terminal?('\G[a-z\\-ëæœ]', true, index)
|
5528
5541
|
r1 = true
|
5529
5542
|
@index += 1
|
5530
5543
|
else
|
@@ -5605,62 +5618,6 @@ module ScientificNameClean
|
|
5605
5618
|
r0
|
5606
5619
|
end
|
5607
5620
|
|
5608
|
-
module Digraph0
|
5609
|
-
def value
|
5610
|
-
'ae'
|
5611
|
-
end
|
5612
|
-
end
|
5613
|
-
|
5614
|
-
module Digraph1
|
5615
|
-
def value
|
5616
|
-
'oe'
|
5617
|
-
end
|
5618
|
-
end
|
5619
|
-
|
5620
|
-
def _nt_digraph
|
5621
|
-
start_index = index
|
5622
|
-
if node_cache[:digraph].has_key?(index)
|
5623
|
-
cached = node_cache[:digraph][index]
|
5624
|
-
if cached
|
5625
|
-
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
5626
|
-
@index = cached.interval.end
|
5627
|
-
end
|
5628
|
-
return cached
|
5629
|
-
end
|
5630
|
-
|
5631
|
-
i0 = index
|
5632
|
-
if has_terminal?("æ", false, index)
|
5633
|
-
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
5634
|
-
r1.extend(Digraph0)
|
5635
|
-
@index += 1
|
5636
|
-
else
|
5637
|
-
terminal_parse_failure("æ")
|
5638
|
-
r1 = nil
|
5639
|
-
end
|
5640
|
-
if r1
|
5641
|
-
r0 = r1
|
5642
|
-
else
|
5643
|
-
if has_terminal?("œ", false, index)
|
5644
|
-
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
5645
|
-
r2.extend(Digraph1)
|
5646
|
-
@index += 1
|
5647
|
-
else
|
5648
|
-
terminal_parse_failure("œ")
|
5649
|
-
r2 = nil
|
5650
|
-
end
|
5651
|
-
if r2
|
5652
|
-
r0 = r2
|
5653
|
-
else
|
5654
|
-
@index = i0
|
5655
|
-
r0 = nil
|
5656
|
-
end
|
5657
|
-
end
|
5658
|
-
|
5659
|
-
node_cache[:digraph][start_index] = r0
|
5660
|
-
|
5661
|
-
r0
|
5662
|
-
end
|
5663
|
-
|
5664
5621
|
module Year0
|
5665
5622
|
def b
|
5666
5623
|
elements[0]
|
@@ -6036,7 +5993,7 @@ module ScientificNameClean
|
|
6036
5993
|
|
6037
5994
|
module MultiplicationSign0
|
6038
5995
|
def value
|
6039
|
-
|
5996
|
+
"×"
|
6040
5997
|
end
|
6041
5998
|
end
|
6042
5999
|
|
@@ -6051,13 +6008,32 @@ module ScientificNameClean
|
|
6051
6008
|
return cached
|
6052
6009
|
end
|
6053
6010
|
|
6011
|
+
i0 = index
|
6054
6012
|
if has_terminal?("×", false, index)
|
6055
|
-
|
6056
|
-
r0.extend(MultiplicationSign0)
|
6013
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
6057
6014
|
@index += 1
|
6058
6015
|
else
|
6059
6016
|
terminal_parse_failure("×")
|
6060
|
-
|
6017
|
+
r1 = nil
|
6018
|
+
end
|
6019
|
+
if r1
|
6020
|
+
r0 = r1
|
6021
|
+
r0.extend(MultiplicationSign0)
|
6022
|
+
else
|
6023
|
+
if has_terminal?("*", false, index)
|
6024
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
6025
|
+
@index += 1
|
6026
|
+
else
|
6027
|
+
terminal_parse_failure("*")
|
6028
|
+
r2 = nil
|
6029
|
+
end
|
6030
|
+
if r2
|
6031
|
+
r0 = r2
|
6032
|
+
r0.extend(MultiplicationSign0)
|
6033
|
+
else
|
6034
|
+
@index = i0
|
6035
|
+
r0 = nil
|
6036
|
+
end
|
6061
6037
|
end
|
6062
6038
|
|
6063
6039
|
node_cache[:multiplication_sign][start_index] = r0
|