biodiversity 0.5.15 → 0.5.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Rakefile +19 -3
- data/VERSION +1 -1
- data/bin/nnparse +2 -2
- data/biodiversity.gemspec +5 -2
- data/lib/biodiversity/parser.rb +1 -0
- data/lib/biodiversity/parser/scientific_name_canonical.rb +32 -13
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +8 -1
- data/lib/biodiversity/parser/scientific_name_clean.rb +651 -524
- data/lib/biodiversity/parser/scientific_name_clean.treetop +39 -45
- data/lib/biodiversity/parser/scientific_name_dirty.rb +280 -27
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +62 -1
- data/spec/parser/scientific_name_canonical.spec.rb +1 -2
- data/spec/parser/scientific_name_clean.spec.rb +45 -23
- data/spec/parser/scientific_name_dirty.spec.rb +17 -1
- data/spec/parser/test_data.txt +148 -148
- metadata +17 -5
data/.gitignore
CHANGED
data/Rakefile
CHANGED
|
@@ -36,8 +36,24 @@ rescue LoadError
|
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
task :tt do
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
['scientific_name_clean', 'scientific_name_dirty', 'scientific_name_canonical'].each do |f|
|
|
40
|
+
system("tt #{dir}/lib/biodiversity/parser/#{f}.treetop")
|
|
41
|
+
rf = "#{dir}/lib/biodiversity/parser/#{f}.rb"
|
|
42
|
+
rfn = open(rf + ".tmp", 'w')
|
|
43
|
+
skip_head = false
|
|
44
|
+
f = open(rf)
|
|
45
|
+
f.each_with_index do |l, i|
|
|
46
|
+
skip_head = l.match(/^# Autogenerated/) if i == 0
|
|
47
|
+
if skip_head && (l.strip == '' || l.match(/^# Autogenerated/))
|
|
48
|
+
next
|
|
49
|
+
else
|
|
50
|
+
skip_head = false
|
|
51
|
+
rfn.write(l)
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
rfn.close
|
|
55
|
+
f.close
|
|
56
|
+
`mv #{rf}.tmp #{rf}`
|
|
57
|
+
end
|
|
42
58
|
end
|
|
43
59
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.5.
|
|
1
|
+
0.5.16
|
data/bin/nnparse
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
require 'rubygems'
|
|
3
|
-
gem '
|
|
3
|
+
gem 'biodiversity' rescue nil
|
|
4
4
|
|
|
5
5
|
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__) + "/../lib"))
|
|
6
6
|
require 'biodiversity'
|
|
@@ -31,7 +31,7 @@ IO.foreach(input) do |line|
|
|
|
31
31
|
$KCODE = 'NONE'
|
|
32
32
|
end
|
|
33
33
|
p.parse(name)
|
|
34
|
-
parsed_data = p.parsed.all_json rescue {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}.to_json
|
|
34
|
+
parsed_data = p.parsed.all_json rescue {'scientificName' => {'parsed' => false, 'verbatim' => name, 'error' => 'Parser error'}}.to_json
|
|
35
35
|
if ruby_min_version < 19
|
|
36
36
|
$KCODE = old_kcode
|
|
37
37
|
end
|
data/biodiversity.gemspec
CHANGED
|
@@ -5,11 +5,11 @@
|
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{biodiversity}
|
|
8
|
-
s.version = "0.5.
|
|
8
|
+
s.version = "0.5.16"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Dmitry Mozzherin"]
|
|
12
|
-
s.date = %q{2010-
|
|
12
|
+
s.date = %q{2010-04-08}
|
|
13
13
|
s.default_executable = %q{nnparse}
|
|
14
14
|
s.description = %q{Tools for biodiversity informatics}
|
|
15
15
|
s.email = %q{dmozzherin@gmail.com}
|
|
@@ -72,13 +72,16 @@ Gem::Specification.new do |s|
|
|
|
72
72
|
|
|
73
73
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
|
74
74
|
s.add_runtime_dependency(%q<treetop>, [">= 0"])
|
|
75
|
+
s.add_runtime_dependency(%q<json>, [">= 0"])
|
|
75
76
|
s.add_development_dependency(%q<rspec>, [">= 0"])
|
|
76
77
|
else
|
|
77
78
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
79
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
78
80
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
79
81
|
end
|
|
80
82
|
else
|
|
81
83
|
s.add_dependency(%q<treetop>, [">= 0"])
|
|
84
|
+
s.add_dependency(%q<json>, [">= 0"])
|
|
82
85
|
s.add_dependency(%q<rspec>, [">= 0"])
|
|
83
86
|
end
|
|
84
87
|
end
|
data/lib/biodiversity/parser.rb
CHANGED
|
@@ -6,8 +6,6 @@ module ScientificNameCanonical
|
|
|
6
6
|
@root || :root
|
|
7
7
|
end
|
|
8
8
|
|
|
9
|
-
include ScientificNameClean
|
|
10
|
-
|
|
11
9
|
include ScientificNameDirty
|
|
12
10
|
|
|
13
11
|
module Root0
|
|
@@ -18,6 +16,10 @@ module ScientificNameCanonical
|
|
|
18
16
|
def details
|
|
19
17
|
[super]
|
|
20
18
|
end
|
|
19
|
+
|
|
20
|
+
def parser_run
|
|
21
|
+
3
|
|
22
|
+
end
|
|
21
23
|
end
|
|
22
24
|
|
|
23
25
|
module Root1
|
|
@@ -28,13 +30,20 @@ module ScientificNameCanonical
|
|
|
28
30
|
def details
|
|
29
31
|
[super]
|
|
30
32
|
end
|
|
33
|
+
|
|
34
|
+
def parser_run
|
|
35
|
+
3
|
|
36
|
+
end
|
|
31
37
|
end
|
|
32
38
|
|
|
33
39
|
def _nt_root
|
|
34
40
|
start_index = index
|
|
35
41
|
if node_cache[:root].has_key?(index)
|
|
36
42
|
cached = node_cache[:root][index]
|
|
37
|
-
|
|
43
|
+
if cached
|
|
44
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
45
|
+
@index = cached.interval.end
|
|
46
|
+
end
|
|
38
47
|
return cached
|
|
39
48
|
end
|
|
40
49
|
|
|
@@ -64,7 +73,7 @@ module ScientificNameCanonical
|
|
|
64
73
|
elements[0]
|
|
65
74
|
end
|
|
66
75
|
|
|
67
|
-
def
|
|
76
|
+
def space1
|
|
68
77
|
elements[1]
|
|
69
78
|
end
|
|
70
79
|
|
|
@@ -72,7 +81,7 @@ module ScientificNameCanonical
|
|
|
72
81
|
elements[2]
|
|
73
82
|
end
|
|
74
83
|
|
|
75
|
-
def
|
|
84
|
+
def space2
|
|
76
85
|
elements[3]
|
|
77
86
|
end
|
|
78
87
|
|
|
@@ -179,7 +188,10 @@ module ScientificNameCanonical
|
|
|
179
188
|
start_index = index
|
|
180
189
|
if node_cache[:multinomial_with_garbage].has_key?(index)
|
|
181
190
|
cached = node_cache[:multinomial_with_garbage][index]
|
|
182
|
-
|
|
191
|
+
if cached
|
|
192
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
193
|
+
@index = cached.interval.end
|
|
194
|
+
end
|
|
183
195
|
return cached
|
|
184
196
|
end
|
|
185
197
|
|
|
@@ -313,7 +325,10 @@ module ScientificNameCanonical
|
|
|
313
325
|
start_index = index
|
|
314
326
|
if node_cache[:uninomial_with_garbage].has_key?(index)
|
|
315
327
|
cached = node_cache[:uninomial_with_garbage][index]
|
|
316
|
-
|
|
328
|
+
if cached
|
|
329
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
330
|
+
@index = cached.interval.end
|
|
331
|
+
end
|
|
317
332
|
return cached
|
|
318
333
|
end
|
|
319
334
|
|
|
@@ -339,11 +354,11 @@ module ScientificNameCanonical
|
|
|
339
354
|
end
|
|
340
355
|
|
|
341
356
|
module Garbage0
|
|
342
|
-
def
|
|
357
|
+
def space1
|
|
343
358
|
elements[0]
|
|
344
359
|
end
|
|
345
360
|
|
|
346
|
-
def
|
|
361
|
+
def space2
|
|
347
362
|
elements[2]
|
|
348
363
|
end
|
|
349
364
|
|
|
@@ -360,7 +375,10 @@ module ScientificNameCanonical
|
|
|
360
375
|
start_index = index
|
|
361
376
|
if node_cache[:garbage].has_key?(index)
|
|
362
377
|
cached = node_cache[:garbage][index]
|
|
363
|
-
|
|
378
|
+
if cached
|
|
379
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
380
|
+
@index = cached.interval.end
|
|
381
|
+
end
|
|
364
382
|
return cached
|
|
365
383
|
end
|
|
366
384
|
|
|
@@ -370,7 +388,7 @@ module ScientificNameCanonical
|
|
|
370
388
|
s1 << r2
|
|
371
389
|
if r2
|
|
372
390
|
if has_terminal?('\G["\',.]', true, index)
|
|
373
|
-
r3 =
|
|
391
|
+
r3 = true
|
|
374
392
|
@index += 1
|
|
375
393
|
else
|
|
376
394
|
r3 = nil
|
|
@@ -383,7 +401,7 @@ module ScientificNameCanonical
|
|
|
383
401
|
s5, i5 = [], index
|
|
384
402
|
loop do
|
|
385
403
|
if has_terminal?('\G[^щ]', true, index)
|
|
386
|
-
r6 =
|
|
404
|
+
r6 = true
|
|
387
405
|
@index += 1
|
|
388
406
|
else
|
|
389
407
|
r6 = nil
|
|
@@ -416,7 +434,7 @@ module ScientificNameCanonical
|
|
|
416
434
|
s9, i9 = [], index
|
|
417
435
|
loop do
|
|
418
436
|
if has_terminal?('\G[^ш]', true, index)
|
|
419
|
-
r10 =
|
|
437
|
+
r10 = true
|
|
420
438
|
@index += 1
|
|
421
439
|
else
|
|
422
440
|
r10 = nil
|
|
@@ -460,3 +478,4 @@ end
|
|
|
460
478
|
class ScientificNameCanonicalParser < Treetop::Runtime::CompiledParser
|
|
461
479
|
include ScientificNameCanonical
|
|
462
480
|
end
|
|
481
|
+
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# encoding: UTF-8
|
|
2
2
|
grammar ScientificNameCanonical
|
|
3
|
-
include ScientificNameClean
|
|
4
3
|
include ScientificNameDirty
|
|
5
4
|
|
|
6
5
|
rule root
|
|
@@ -12,6 +11,10 @@ grammar ScientificNameCanonical
|
|
|
12
11
|
def details
|
|
13
12
|
[super]
|
|
14
13
|
end
|
|
14
|
+
|
|
15
|
+
def parser_run
|
|
16
|
+
3
|
|
17
|
+
end
|
|
15
18
|
}
|
|
16
19
|
/
|
|
17
20
|
uninomial_with_garbage {
|
|
@@ -22,6 +25,10 @@ grammar ScientificNameCanonical
|
|
|
22
25
|
def details
|
|
23
26
|
[super]
|
|
24
27
|
end
|
|
28
|
+
|
|
29
|
+
def parser_run
|
|
30
|
+
3
|
|
31
|
+
end
|
|
25
32
|
}
|
|
26
33
|
end
|
|
27
34
|
|
|
@@ -7,7 +7,7 @@ module ScientificNameClean
|
|
|
7
7
|
end
|
|
8
8
|
|
|
9
9
|
module Root0
|
|
10
|
-
def
|
|
10
|
+
def space1
|
|
11
11
|
elements[0]
|
|
12
12
|
end
|
|
13
13
|
|
|
@@ -15,7 +15,7 @@ module ScientificNameClean
|
|
|
15
15
|
elements[1]
|
|
16
16
|
end
|
|
17
17
|
|
|
18
|
-
def
|
|
18
|
+
def space2
|
|
19
19
|
elements[2]
|
|
20
20
|
end
|
|
21
21
|
end
|
|
@@ -40,13 +40,20 @@ module ScientificNameClean
|
|
|
40
40
|
def details
|
|
41
41
|
a.details.class == Array ? a.details : [a.details]
|
|
42
42
|
end
|
|
43
|
+
|
|
44
|
+
def parser_run
|
|
45
|
+
1
|
|
46
|
+
end
|
|
43
47
|
end
|
|
44
48
|
|
|
45
49
|
def _nt_root
|
|
46
50
|
start_index = index
|
|
47
51
|
if node_cache[:root].has_key?(index)
|
|
48
52
|
cached = node_cache[:root][index]
|
|
49
|
-
|
|
53
|
+
if cached
|
|
54
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
55
|
+
@index = cached.interval.end
|
|
56
|
+
end
|
|
50
57
|
return cached
|
|
51
58
|
end
|
|
52
59
|
|
|
@@ -80,7 +87,7 @@ module ScientificNameClean
|
|
|
80
87
|
elements[0]
|
|
81
88
|
end
|
|
82
89
|
|
|
83
|
-
def
|
|
90
|
+
def space1
|
|
84
91
|
elements[1]
|
|
85
92
|
end
|
|
86
93
|
|
|
@@ -88,7 +95,7 @@ module ScientificNameClean
|
|
|
88
95
|
elements[2]
|
|
89
96
|
end
|
|
90
97
|
|
|
91
|
-
def
|
|
98
|
+
def space2
|
|
92
99
|
elements[3]
|
|
93
100
|
end
|
|
94
101
|
|
|
@@ -123,7 +130,10 @@ module ScientificNameClean
|
|
|
123
130
|
start_index = index
|
|
124
131
|
if node_cache[:scientific_name_5].has_key?(index)
|
|
125
132
|
cached = node_cache[:scientific_name_5][index]
|
|
126
|
-
|
|
133
|
+
if cached
|
|
134
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
135
|
+
@index = cached.interval.end
|
|
136
|
+
end
|
|
127
137
|
return cached
|
|
128
138
|
end
|
|
129
139
|
|
|
@@ -177,7 +187,7 @@ module ScientificNameClean
|
|
|
177
187
|
elements[0]
|
|
178
188
|
end
|
|
179
189
|
|
|
180
|
-
def
|
|
190
|
+
def space1
|
|
181
191
|
elements[1]
|
|
182
192
|
end
|
|
183
193
|
|
|
@@ -185,7 +195,7 @@ module ScientificNameClean
|
|
|
185
195
|
elements[2]
|
|
186
196
|
end
|
|
187
197
|
|
|
188
|
-
def
|
|
198
|
+
def space2
|
|
189
199
|
elements[3]
|
|
190
200
|
end
|
|
191
201
|
|
|
@@ -221,7 +231,7 @@ module ScientificNameClean
|
|
|
221
231
|
elements[0]
|
|
222
232
|
end
|
|
223
233
|
|
|
224
|
-
def
|
|
234
|
+
def space1
|
|
225
235
|
elements[1]
|
|
226
236
|
end
|
|
227
237
|
|
|
@@ -229,7 +239,7 @@ module ScientificNameClean
|
|
|
229
239
|
elements[2]
|
|
230
240
|
end
|
|
231
241
|
|
|
232
|
-
def
|
|
242
|
+
def space2
|
|
233
243
|
elements[3]
|
|
234
244
|
end
|
|
235
245
|
|
|
@@ -261,7 +271,10 @@ module ScientificNameClean
|
|
|
261
271
|
start_index = index
|
|
262
272
|
if node_cache[:scientific_name_4].has_key?(index)
|
|
263
273
|
cached = node_cache[:scientific_name_4][index]
|
|
264
|
-
|
|
274
|
+
if cached
|
|
275
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
276
|
+
@index = cached.interval.end
|
|
277
|
+
end
|
|
265
278
|
return cached
|
|
266
279
|
end
|
|
267
280
|
|
|
@@ -310,7 +323,7 @@ module ScientificNameClean
|
|
|
310
323
|
s7 << r11
|
|
311
324
|
if r11
|
|
312
325
|
if has_terminal?('\G[\\?]', true, index)
|
|
313
|
-
r13 =
|
|
326
|
+
r13 = true
|
|
314
327
|
@index += 1
|
|
315
328
|
else
|
|
316
329
|
r13 = nil
|
|
@@ -391,7 +404,10 @@ module ScientificNameClean
|
|
|
391
404
|
start_index = index
|
|
392
405
|
if node_cache[:scientific_name_3].has_key?(index)
|
|
393
406
|
cached = node_cache[:scientific_name_3][index]
|
|
394
|
-
|
|
407
|
+
if cached
|
|
408
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
409
|
+
@index = cached.interval.end
|
|
410
|
+
end
|
|
395
411
|
return cached
|
|
396
412
|
end
|
|
397
413
|
|
|
@@ -472,7 +488,10 @@ module ScientificNameClean
|
|
|
472
488
|
start_index = index
|
|
473
489
|
if node_cache[:scientific_name_2].has_key?(index)
|
|
474
490
|
cached = node_cache[:scientific_name_2][index]
|
|
475
|
-
|
|
491
|
+
if cached
|
|
492
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
493
|
+
@index = cached.interval.end
|
|
494
|
+
end
|
|
476
495
|
return cached
|
|
477
496
|
end
|
|
478
497
|
|
|
@@ -517,7 +536,10 @@ module ScientificNameClean
|
|
|
517
536
|
start_index = index
|
|
518
537
|
if node_cache[:scientific_name_1].has_key?(index)
|
|
519
538
|
cached = node_cache[:scientific_name_1][index]
|
|
520
|
-
|
|
539
|
+
if cached
|
|
540
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
541
|
+
@index = cached.interval.end
|
|
542
|
+
end
|
|
521
543
|
return cached
|
|
522
544
|
end
|
|
523
545
|
|
|
@@ -567,7 +589,10 @@ module ScientificNameClean
|
|
|
567
589
|
start_index = index
|
|
568
590
|
if node_cache[:status_part].has_key?(index)
|
|
569
591
|
cached = node_cache[:status_part][index]
|
|
570
|
-
|
|
592
|
+
if cached
|
|
593
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
594
|
+
@index = cached.interval.end
|
|
595
|
+
end
|
|
571
596
|
return cached
|
|
572
597
|
end
|
|
573
598
|
|
|
@@ -628,7 +653,10 @@ module ScientificNameClean
|
|
|
628
653
|
start_index = index
|
|
629
654
|
if node_cache[:status_word].has_key?(index)
|
|
630
655
|
cached = node_cache[:status_word][index]
|
|
631
|
-
|
|
656
|
+
if cached
|
|
657
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
658
|
+
@index = cached.interval.end
|
|
659
|
+
end
|
|
632
660
|
return cached
|
|
633
661
|
end
|
|
634
662
|
|
|
@@ -637,7 +665,7 @@ module ScientificNameClean
|
|
|
637
665
|
s0 << r1
|
|
638
666
|
if r1
|
|
639
667
|
if has_terminal?('\G[\\.]', true, index)
|
|
640
|
-
r2 =
|
|
668
|
+
r2 = true
|
|
641
669
|
@index += 1
|
|
642
670
|
else
|
|
643
671
|
r2 = nil
|
|
@@ -663,7 +691,7 @@ module ScientificNameClean
|
|
|
663
691
|
elements[0]
|
|
664
692
|
end
|
|
665
693
|
|
|
666
|
-
def
|
|
694
|
+
def space1
|
|
667
695
|
elements[1]
|
|
668
696
|
end
|
|
669
697
|
|
|
@@ -671,11 +699,11 @@ module ScientificNameClean
|
|
|
671
699
|
elements[2]
|
|
672
700
|
end
|
|
673
701
|
|
|
674
|
-
def
|
|
702
|
+
def space2
|
|
675
703
|
elements[3]
|
|
676
704
|
end
|
|
677
705
|
|
|
678
|
-
def
|
|
706
|
+
def space3
|
|
679
707
|
elements[5]
|
|
680
708
|
end
|
|
681
709
|
|
|
@@ -719,7 +747,7 @@ module ScientificNameClean
|
|
|
719
747
|
elements[0]
|
|
720
748
|
end
|
|
721
749
|
|
|
722
|
-
def
|
|
750
|
+
def space1
|
|
723
751
|
elements[1]
|
|
724
752
|
end
|
|
725
753
|
|
|
@@ -727,11 +755,11 @@ module ScientificNameClean
|
|
|
727
755
|
elements[2]
|
|
728
756
|
end
|
|
729
757
|
|
|
730
|
-
def
|
|
758
|
+
def space2
|
|
731
759
|
elements[3]
|
|
732
760
|
end
|
|
733
761
|
|
|
734
|
-
def
|
|
762
|
+
def space3
|
|
735
763
|
elements[5]
|
|
736
764
|
end
|
|
737
765
|
|
|
@@ -767,11 +795,11 @@ module ScientificNameClean
|
|
|
767
795
|
elements[0]
|
|
768
796
|
end
|
|
769
797
|
|
|
770
|
-
def
|
|
798
|
+
def space1
|
|
771
799
|
elements[1]
|
|
772
800
|
end
|
|
773
801
|
|
|
774
|
-
def
|
|
802
|
+
def space2
|
|
775
803
|
elements[3]
|
|
776
804
|
end
|
|
777
805
|
|
|
@@ -815,11 +843,11 @@ module ScientificNameClean
|
|
|
815
843
|
elements[0]
|
|
816
844
|
end
|
|
817
845
|
|
|
818
|
-
def
|
|
846
|
+
def space1
|
|
819
847
|
elements[1]
|
|
820
848
|
end
|
|
821
849
|
|
|
822
|
-
def
|
|
850
|
+
def space2
|
|
823
851
|
elements[3]
|
|
824
852
|
end
|
|
825
853
|
|
|
@@ -854,7 +882,10 @@ module ScientificNameClean
|
|
|
854
882
|
start_index = index
|
|
855
883
|
if node_cache[:multinomial_name].has_key?(index)
|
|
856
884
|
cached = node_cache[:multinomial_name][index]
|
|
857
|
-
|
|
885
|
+
if cached
|
|
886
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
887
|
+
@index = cached.interval.end
|
|
888
|
+
end
|
|
858
889
|
return cached
|
|
859
890
|
end
|
|
860
891
|
|
|
@@ -1087,7 +1118,10 @@ module ScientificNameClean
|
|
|
1087
1118
|
start_index = index
|
|
1088
1119
|
if node_cache[:infraspecies_mult].has_key?(index)
|
|
1089
1120
|
cached = node_cache[:infraspecies_mult][index]
|
|
1090
|
-
|
|
1121
|
+
if cached
|
|
1122
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1123
|
+
@index = cached.interval.end
|
|
1124
|
+
end
|
|
1091
1125
|
return cached
|
|
1092
1126
|
end
|
|
1093
1127
|
|
|
@@ -1165,7 +1199,10 @@ module ScientificNameClean
|
|
|
1165
1199
|
start_index = index
|
|
1166
1200
|
if node_cache[:infraspecies].has_key?(index)
|
|
1167
1201
|
cached = node_cache[:infraspecies][index]
|
|
1168
|
-
|
|
1202
|
+
if cached
|
|
1203
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1204
|
+
@index = cached.interval.end
|
|
1205
|
+
end
|
|
1169
1206
|
return cached
|
|
1170
1207
|
end
|
|
1171
1208
|
|
|
@@ -1206,7 +1243,7 @@ module ScientificNameClean
|
|
|
1206
1243
|
r0
|
|
1207
1244
|
end
|
|
1208
1245
|
|
|
1209
|
-
module
|
|
1246
|
+
module InfraspeciesString0
|
|
1210
1247
|
def sel
|
|
1211
1248
|
elements[0]
|
|
1212
1249
|
end
|
|
@@ -1220,7 +1257,7 @@ module ScientificNameClean
|
|
|
1220
1257
|
end
|
|
1221
1258
|
end
|
|
1222
1259
|
|
|
1223
|
-
module
|
|
1260
|
+
module InfraspeciesString1
|
|
1224
1261
|
def value
|
|
1225
1262
|
sel.apply(a)
|
|
1226
1263
|
end
|
|
@@ -1237,14 +1274,14 @@ module ScientificNameClean
|
|
|
1237
1274
|
end
|
|
1238
1275
|
end
|
|
1239
1276
|
|
|
1240
|
-
module
|
|
1277
|
+
module InfraspeciesString2
|
|
1241
1278
|
def species_word
|
|
1242
1279
|
elements[0]
|
|
1243
1280
|
end
|
|
1244
1281
|
|
|
1245
1282
|
end
|
|
1246
1283
|
|
|
1247
|
-
module
|
|
1284
|
+
module InfraspeciesString3
|
|
1248
1285
|
def value
|
|
1249
1286
|
text_value
|
|
1250
1287
|
end
|
|
@@ -1266,7 +1303,10 @@ module ScientificNameClean
|
|
|
1266
1303
|
start_index = index
|
|
1267
1304
|
if node_cache[:infraspecies_string].has_key?(index)
|
|
1268
1305
|
cached = node_cache[:infraspecies_string][index]
|
|
1269
|
-
|
|
1306
|
+
if cached
|
|
1307
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1308
|
+
@index = cached.interval.end
|
|
1309
|
+
end
|
|
1270
1310
|
return cached
|
|
1271
1311
|
end
|
|
1272
1312
|
|
|
@@ -1284,8 +1324,8 @@ module ScientificNameClean
|
|
|
1284
1324
|
end
|
|
1285
1325
|
if s1.last
|
|
1286
1326
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
1287
|
-
r1.extend(
|
|
1288
|
-
r1.extend(
|
|
1327
|
+
r1.extend(InfraspeciesString0)
|
|
1328
|
+
r1.extend(InfraspeciesString1)
|
|
1289
1329
|
else
|
|
1290
1330
|
@index = i1
|
|
1291
1331
|
r1 = nil
|
|
@@ -1299,7 +1339,7 @@ module ScientificNameClean
|
|
|
1299
1339
|
if r6
|
|
1300
1340
|
i7 = index
|
|
1301
1341
|
if has_terminal?('\G[\\.]', true, index)
|
|
1302
|
-
r8 =
|
|
1342
|
+
r8 = true
|
|
1303
1343
|
@index += 1
|
|
1304
1344
|
else
|
|
1305
1345
|
r8 = nil
|
|
@@ -1314,8 +1354,8 @@ module ScientificNameClean
|
|
|
1314
1354
|
end
|
|
1315
1355
|
if s5.last
|
|
1316
1356
|
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
|
1317
|
-
r5.extend(
|
|
1318
|
-
r5.extend(
|
|
1357
|
+
r5.extend(InfraspeciesString2)
|
|
1358
|
+
r5.extend(InfraspeciesString3)
|
|
1319
1359
|
else
|
|
1320
1360
|
@index = i5
|
|
1321
1361
|
r5 = nil
|
|
@@ -1349,7 +1389,10 @@ module ScientificNameClean
|
|
|
1349
1389
|
start_index = index
|
|
1350
1390
|
if node_cache[:taxon_concept_rank].has_key?(index)
|
|
1351
1391
|
cached = node_cache[:taxon_concept_rank][index]
|
|
1352
|
-
|
|
1392
|
+
if cached
|
|
1393
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1394
|
+
@index = cached.interval.end
|
|
1395
|
+
end
|
|
1353
1396
|
return cached
|
|
1354
1397
|
end
|
|
1355
1398
|
|
|
@@ -1408,7 +1451,10 @@ module ScientificNameClean
|
|
|
1408
1451
|
start_index = index
|
|
1409
1452
|
if node_cache[:rank].has_key?(index)
|
|
1410
1453
|
cached = node_cache[:rank][index]
|
|
1411
|
-
|
|
1454
|
+
if cached
|
|
1455
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1456
|
+
@index = cached.interval.end
|
|
1457
|
+
end
|
|
1412
1458
|
return cached
|
|
1413
1459
|
end
|
|
1414
1460
|
|
|
@@ -1872,7 +1918,10 @@ module ScientificNameClean
|
|
|
1872
1918
|
start_index = index
|
|
1873
1919
|
if node_cache[:rank_forma].has_key?(index)
|
|
1874
1920
|
cached = node_cache[:rank_forma][index]
|
|
1875
|
-
|
|
1921
|
+
if cached
|
|
1922
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
1923
|
+
@index = cached.interval.end
|
|
1924
|
+
end
|
|
1876
1925
|
return cached
|
|
1877
1926
|
end
|
|
1878
1927
|
|
|
@@ -1973,7 +2022,10 @@ module ScientificNameClean
|
|
|
1973
2022
|
start_index = index
|
|
1974
2023
|
if node_cache[:species].has_key?(index)
|
|
1975
2024
|
cached = node_cache[:species][index]
|
|
1976
|
-
|
|
2025
|
+
if cached
|
|
2026
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2027
|
+
@index = cached.interval.end
|
|
2028
|
+
end
|
|
1977
2029
|
return cached
|
|
1978
2030
|
end
|
|
1979
2031
|
|
|
@@ -2014,8 +2066,8 @@ module ScientificNameClean
|
|
|
2014
2066
|
r0
|
|
2015
2067
|
end
|
|
2016
2068
|
|
|
2017
|
-
module
|
|
2018
|
-
def
|
|
2069
|
+
module SpeciesString0
|
|
2070
|
+
def space_hard1
|
|
2019
2071
|
elements[0]
|
|
2020
2072
|
end
|
|
2021
2073
|
|
|
@@ -2023,19 +2075,19 @@ module ScientificNameClean
|
|
|
2023
2075
|
elements[1]
|
|
2024
2076
|
end
|
|
2025
2077
|
|
|
2026
|
-
def
|
|
2078
|
+
def space_hard2
|
|
2027
2079
|
elements[2]
|
|
2028
2080
|
end
|
|
2029
2081
|
end
|
|
2030
2082
|
|
|
2031
|
-
module
|
|
2083
|
+
module SpeciesString1
|
|
2032
2084
|
def a
|
|
2033
2085
|
elements[0]
|
|
2034
2086
|
end
|
|
2035
2087
|
|
|
2036
2088
|
end
|
|
2037
2089
|
|
|
2038
|
-
module
|
|
2090
|
+
module SpeciesString2
|
|
2039
2091
|
def value
|
|
2040
2092
|
a.value
|
|
2041
2093
|
end
|
|
@@ -2057,7 +2109,7 @@ module ScientificNameClean
|
|
|
2057
2109
|
end
|
|
2058
2110
|
end
|
|
2059
2111
|
|
|
2060
|
-
module
|
|
2112
|
+
module SpeciesString3
|
|
2061
2113
|
def canonical
|
|
2062
2114
|
value
|
|
2063
2115
|
end
|
|
@@ -2079,7 +2131,10 @@ module ScientificNameClean
|
|
|
2079
2131
|
start_index = index
|
|
2080
2132
|
if node_cache[:species_string].has_key?(index)
|
|
2081
2133
|
cached = node_cache[:species_string][index]
|
|
2082
|
-
|
|
2134
|
+
if cached
|
|
2135
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2136
|
+
@index = cached.interval.end
|
|
2137
|
+
end
|
|
2083
2138
|
return cached
|
|
2084
2139
|
end
|
|
2085
2140
|
|
|
@@ -2102,7 +2157,7 @@ module ScientificNameClean
|
|
|
2102
2157
|
end
|
|
2103
2158
|
if s4.last
|
|
2104
2159
|
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
2105
|
-
r4.extend(
|
|
2160
|
+
r4.extend(SpeciesString0)
|
|
2106
2161
|
else
|
|
2107
2162
|
@index = i4
|
|
2108
2163
|
r4 = nil
|
|
@@ -2117,8 +2172,8 @@ module ScientificNameClean
|
|
|
2117
2172
|
end
|
|
2118
2173
|
if s1.last
|
|
2119
2174
|
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
2120
|
-
r1.extend(
|
|
2121
|
-
r1.extend(
|
|
2175
|
+
r1.extend(SpeciesString1)
|
|
2176
|
+
r1.extend(SpeciesString2)
|
|
2122
2177
|
else
|
|
2123
2178
|
@index = i1
|
|
2124
2179
|
r1 = nil
|
|
@@ -2127,7 +2182,7 @@ module ScientificNameClean
|
|
|
2127
2182
|
r0 = r1
|
|
2128
2183
|
else
|
|
2129
2184
|
r8 = _nt_species_word
|
|
2130
|
-
r8.extend(
|
|
2185
|
+
r8.extend(SpeciesString3)
|
|
2131
2186
|
if r8
|
|
2132
2187
|
r0 = r8
|
|
2133
2188
|
else
|
|
@@ -2151,7 +2206,7 @@ module ScientificNameClean
|
|
|
2151
2206
|
elements[0]
|
|
2152
2207
|
end
|
|
2153
2208
|
|
|
2154
|
-
def
|
|
2209
|
+
def space1
|
|
2155
2210
|
elements[1]
|
|
2156
2211
|
end
|
|
2157
2212
|
|
|
@@ -2159,7 +2214,7 @@ module ScientificNameClean
|
|
|
2159
2214
|
elements[2]
|
|
2160
2215
|
end
|
|
2161
2216
|
|
|
2162
|
-
def
|
|
2217
|
+
def space2
|
|
2163
2218
|
elements[3]
|
|
2164
2219
|
end
|
|
2165
2220
|
|
|
@@ -2190,7 +2245,10 @@ module ScientificNameClean
|
|
|
2190
2245
|
start_index = index
|
|
2191
2246
|
if node_cache[:infragenus].has_key?(index)
|
|
2192
2247
|
cached = node_cache[:infragenus][index]
|
|
2193
|
-
|
|
2248
|
+
if cached
|
|
2249
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2250
|
+
@index = cached.interval.end
|
|
2251
|
+
end
|
|
2194
2252
|
return cached
|
|
2195
2253
|
end
|
|
2196
2254
|
|
|
@@ -2228,7 +2286,7 @@ module ScientificNameClean
|
|
|
2228
2286
|
end
|
|
2229
2287
|
|
|
2230
2288
|
module Genus0
|
|
2231
|
-
def
|
|
2289
|
+
def space_hard1
|
|
2232
2290
|
elements[0]
|
|
2233
2291
|
end
|
|
2234
2292
|
|
|
@@ -2236,7 +2294,7 @@ module ScientificNameClean
|
|
|
2236
2294
|
elements[1]
|
|
2237
2295
|
end
|
|
2238
2296
|
|
|
2239
|
-
def
|
|
2297
|
+
def space_hard2
|
|
2240
2298
|
elements[2]
|
|
2241
2299
|
end
|
|
2242
2300
|
|
|
@@ -2274,44 +2332,59 @@ module ScientificNameClean
|
|
|
2274
2332
|
start_index = index
|
|
2275
2333
|
if node_cache[:genus].has_key?(index)
|
|
2276
2334
|
cached = node_cache[:genus][index]
|
|
2277
|
-
|
|
2335
|
+
if cached
|
|
2336
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2337
|
+
@index = cached.interval.end
|
|
2338
|
+
end
|
|
2278
2339
|
return cached
|
|
2279
2340
|
end
|
|
2280
2341
|
|
|
2281
2342
|
i0, s0 = index, []
|
|
2282
|
-
|
|
2343
|
+
i1 = index
|
|
2344
|
+
r2 = _nt_cap_latin_word_pair
|
|
2345
|
+
if r2
|
|
2346
|
+
r1 = r2
|
|
2347
|
+
else
|
|
2348
|
+
r3 = _nt_cap_latin_word
|
|
2349
|
+
if r3
|
|
2350
|
+
r1 = r3
|
|
2351
|
+
else
|
|
2352
|
+
@index = i1
|
|
2353
|
+
r1 = nil
|
|
2354
|
+
end
|
|
2355
|
+
end
|
|
2283
2356
|
s0 << r1
|
|
2284
2357
|
if r1
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
if
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
if
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
if
|
|
2296
|
-
|
|
2297
|
-
|
|
2358
|
+
i4 = index
|
|
2359
|
+
i5, s5 = index, []
|
|
2360
|
+
r6 = _nt_space_hard
|
|
2361
|
+
s5 << r6
|
|
2362
|
+
if r6
|
|
2363
|
+
r7 = _nt_author_prefix_word
|
|
2364
|
+
s5 << r7
|
|
2365
|
+
if r7
|
|
2366
|
+
r8 = _nt_space_hard
|
|
2367
|
+
s5 << r8
|
|
2368
|
+
if r8
|
|
2369
|
+
r9 = _nt_author_word
|
|
2370
|
+
s5 << r9
|
|
2298
2371
|
end
|
|
2299
2372
|
end
|
|
2300
2373
|
end
|
|
2301
|
-
if
|
|
2302
|
-
|
|
2303
|
-
|
|
2374
|
+
if s5.last
|
|
2375
|
+
r5 = instantiate_node(SyntaxNode,input, i5...index, s5)
|
|
2376
|
+
r5.extend(Genus0)
|
|
2304
2377
|
else
|
|
2305
|
-
@index =
|
|
2306
|
-
|
|
2378
|
+
@index = i5
|
|
2379
|
+
r5 = nil
|
|
2307
2380
|
end
|
|
2308
|
-
if
|
|
2309
|
-
|
|
2381
|
+
if r5
|
|
2382
|
+
r4 = nil
|
|
2310
2383
|
else
|
|
2311
|
-
@index =
|
|
2312
|
-
|
|
2384
|
+
@index = i4
|
|
2385
|
+
r4 = instantiate_node(SyntaxNode,input, index...index)
|
|
2313
2386
|
end
|
|
2314
|
-
s0 <<
|
|
2387
|
+
s0 << r4
|
|
2315
2388
|
end
|
|
2316
2389
|
if s0.last
|
|
2317
2390
|
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
@@ -2367,7 +2440,10 @@ module ScientificNameClean
|
|
|
2367
2440
|
start_index = index
|
|
2368
2441
|
if node_cache[:uninomial_name].has_key?(index)
|
|
2369
2442
|
cached = node_cache[:uninomial_name][index]
|
|
2370
|
-
|
|
2443
|
+
if cached
|
|
2444
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2445
|
+
@index = cached.interval.end
|
|
2446
|
+
end
|
|
2371
2447
|
return cached
|
|
2372
2448
|
end
|
|
2373
2449
|
|
|
@@ -2408,7 +2484,7 @@ module ScientificNameClean
|
|
|
2408
2484
|
r0
|
|
2409
2485
|
end
|
|
2410
2486
|
|
|
2411
|
-
module
|
|
2487
|
+
module UninomialString0
|
|
2412
2488
|
def canonical
|
|
2413
2489
|
value
|
|
2414
2490
|
end
|
|
@@ -2430,12 +2506,28 @@ module ScientificNameClean
|
|
|
2430
2506
|
start_index = index
|
|
2431
2507
|
if node_cache[:uninomial_string].has_key?(index)
|
|
2432
2508
|
cached = node_cache[:uninomial_string][index]
|
|
2433
|
-
|
|
2509
|
+
if cached
|
|
2510
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2511
|
+
@index = cached.interval.end
|
|
2512
|
+
end
|
|
2434
2513
|
return cached
|
|
2435
2514
|
end
|
|
2436
2515
|
|
|
2437
|
-
|
|
2438
|
-
|
|
2516
|
+
i0 = index
|
|
2517
|
+
r1 = _nt_cap_latin_word_pair
|
|
2518
|
+
if r1
|
|
2519
|
+
r0 = r1
|
|
2520
|
+
r0.extend(UninomialString0)
|
|
2521
|
+
else
|
|
2522
|
+
r2 = _nt_cap_latin_word
|
|
2523
|
+
if r2
|
|
2524
|
+
r0 = r2
|
|
2525
|
+
r0.extend(UninomialString0)
|
|
2526
|
+
else
|
|
2527
|
+
@index = i0
|
|
2528
|
+
r0 = nil
|
|
2529
|
+
end
|
|
2530
|
+
end
|
|
2439
2531
|
|
|
2440
2532
|
node_cache[:uninomial_string][start_index] = r0
|
|
2441
2533
|
|
|
@@ -2447,7 +2539,7 @@ module ScientificNameClean
|
|
|
2447
2539
|
elements[0]
|
|
2448
2540
|
end
|
|
2449
2541
|
|
|
2450
|
-
def
|
|
2542
|
+
def space1
|
|
2451
2543
|
elements[1]
|
|
2452
2544
|
end
|
|
2453
2545
|
|
|
@@ -2455,7 +2547,7 @@ module ScientificNameClean
|
|
|
2455
2547
|
elements[2]
|
|
2456
2548
|
end
|
|
2457
2549
|
|
|
2458
|
-
def
|
|
2550
|
+
def space2
|
|
2459
2551
|
elements[4]
|
|
2460
2552
|
end
|
|
2461
2553
|
|
|
@@ -2543,7 +2635,10 @@ module ScientificNameClean
|
|
|
2543
2635
|
start_index = index
|
|
2544
2636
|
if node_cache[:authorship].has_key?(index)
|
|
2545
2637
|
cached = node_cache[:authorship][index]
|
|
2546
|
-
|
|
2638
|
+
if cached
|
|
2639
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2640
|
+
@index = cached.interval.end
|
|
2641
|
+
end
|
|
2547
2642
|
return cached
|
|
2548
2643
|
end
|
|
2549
2644
|
|
|
@@ -2678,7 +2773,7 @@ module ScientificNameClean
|
|
|
2678
2773
|
elements[0]
|
|
2679
2774
|
end
|
|
2680
2775
|
|
|
2681
|
-
def
|
|
2776
|
+
def space1
|
|
2682
2777
|
elements[1]
|
|
2683
2778
|
end
|
|
2684
2779
|
|
|
@@ -2686,7 +2781,7 @@ module ScientificNameClean
|
|
|
2686
2781
|
elements[2]
|
|
2687
2782
|
end
|
|
2688
2783
|
|
|
2689
|
-
def
|
|
2784
|
+
def space2
|
|
2690
2785
|
elements[3]
|
|
2691
2786
|
end
|
|
2692
2787
|
|
|
@@ -2694,11 +2789,11 @@ module ScientificNameClean
|
|
|
2694
2789
|
elements[4]
|
|
2695
2790
|
end
|
|
2696
2791
|
|
|
2697
|
-
def
|
|
2792
|
+
def space3
|
|
2698
2793
|
elements[5]
|
|
2699
2794
|
end
|
|
2700
2795
|
|
|
2701
|
-
def
|
|
2796
|
+
def space4
|
|
2702
2797
|
elements[7]
|
|
2703
2798
|
end
|
|
2704
2799
|
|
|
@@ -2728,7 +2823,7 @@ module ScientificNameClean
|
|
|
2728
2823
|
elements[0]
|
|
2729
2824
|
end
|
|
2730
2825
|
|
|
2731
|
-
def
|
|
2826
|
+
def space1
|
|
2732
2827
|
elements[1]
|
|
2733
2828
|
end
|
|
2734
2829
|
|
|
@@ -2736,7 +2831,7 @@ module ScientificNameClean
|
|
|
2736
2831
|
elements[2]
|
|
2737
2832
|
end
|
|
2738
2833
|
|
|
2739
|
-
def
|
|
2834
|
+
def space2
|
|
2740
2835
|
elements[4]
|
|
2741
2836
|
end
|
|
2742
2837
|
|
|
@@ -2744,7 +2839,7 @@ module ScientificNameClean
|
|
|
2744
2839
|
elements[5]
|
|
2745
2840
|
end
|
|
2746
2841
|
|
|
2747
|
-
def
|
|
2842
|
+
def space3
|
|
2748
2843
|
elements[6]
|
|
2749
2844
|
end
|
|
2750
2845
|
|
|
@@ -2775,7 +2870,7 @@ module ScientificNameClean
|
|
|
2775
2870
|
elements[0]
|
|
2776
2871
|
end
|
|
2777
2872
|
|
|
2778
|
-
def
|
|
2873
|
+
def space1
|
|
2779
2874
|
elements[1]
|
|
2780
2875
|
end
|
|
2781
2876
|
|
|
@@ -2783,7 +2878,7 @@ module ScientificNameClean
|
|
|
2783
2878
|
elements[2]
|
|
2784
2879
|
end
|
|
2785
2880
|
|
|
2786
|
-
def
|
|
2881
|
+
def space2
|
|
2787
2882
|
elements[3]
|
|
2788
2883
|
end
|
|
2789
2884
|
|
|
@@ -2813,7 +2908,7 @@ module ScientificNameClean
|
|
|
2813
2908
|
elements[0]
|
|
2814
2909
|
end
|
|
2815
2910
|
|
|
2816
|
-
def
|
|
2911
|
+
def space1
|
|
2817
2912
|
elements[1]
|
|
2818
2913
|
end
|
|
2819
2914
|
|
|
@@ -2821,7 +2916,7 @@ module ScientificNameClean
|
|
|
2821
2916
|
elements[2]
|
|
2822
2917
|
end
|
|
2823
2918
|
|
|
2824
|
-
def
|
|
2919
|
+
def space2
|
|
2825
2920
|
elements[3]
|
|
2826
2921
|
end
|
|
2827
2922
|
|
|
@@ -2848,7 +2943,10 @@ module ScientificNameClean
|
|
|
2848
2943
|
start_index = index
|
|
2849
2944
|
if node_cache[:basionym_authorship_with_parenthesis].has_key?(index)
|
|
2850
2945
|
cached = node_cache[:basionym_authorship_with_parenthesis][index]
|
|
2851
|
-
|
|
2946
|
+
if cached
|
|
2947
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
2948
|
+
@index = cached.interval.end
|
|
2949
|
+
end
|
|
2852
2950
|
return cached
|
|
2853
2951
|
end
|
|
2854
2952
|
|
|
@@ -2873,7 +2971,7 @@ module ScientificNameClean
|
|
|
2873
2971
|
s1 << r7
|
|
2874
2972
|
if r7
|
|
2875
2973
|
if has_terminal?('\G[,]', true, index)
|
|
2876
|
-
r9 =
|
|
2974
|
+
r9 = true
|
|
2877
2975
|
@index += 1
|
|
2878
2976
|
else
|
|
2879
2977
|
r9 = nil
|
|
@@ -3073,7 +3171,10 @@ module ScientificNameClean
|
|
|
3073
3171
|
start_index = index
|
|
3074
3172
|
if node_cache[:ex_authorship].has_key?(index)
|
|
3075
3173
|
cached = node_cache[:ex_authorship][index]
|
|
3076
|
-
|
|
3174
|
+
if cached
|
|
3175
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3176
|
+
@index = cached.interval.end
|
|
3177
|
+
end
|
|
3077
3178
|
return cached
|
|
3078
3179
|
end
|
|
3079
3180
|
|
|
@@ -3107,11 +3208,11 @@ module ScientificNameClean
|
|
|
3107
3208
|
elements[0]
|
|
3108
3209
|
end
|
|
3109
3210
|
|
|
3110
|
-
def
|
|
3211
|
+
def space1
|
|
3111
3212
|
elements[1]
|
|
3112
3213
|
end
|
|
3113
3214
|
|
|
3114
|
-
def
|
|
3215
|
+
def space2
|
|
3115
3216
|
elements[3]
|
|
3116
3217
|
end
|
|
3117
3218
|
|
|
@@ -3119,11 +3220,11 @@ module ScientificNameClean
|
|
|
3119
3220
|
elements[4]
|
|
3120
3221
|
end
|
|
3121
3222
|
|
|
3122
|
-
def
|
|
3223
|
+
def space3
|
|
3123
3224
|
elements[6]
|
|
3124
3225
|
end
|
|
3125
3226
|
|
|
3126
|
-
def
|
|
3227
|
+
def space4
|
|
3127
3228
|
elements[8]
|
|
3128
3229
|
end
|
|
3129
3230
|
|
|
@@ -3131,11 +3232,11 @@ module ScientificNameClean
|
|
|
3131
3232
|
elements[9]
|
|
3132
3233
|
end
|
|
3133
3234
|
|
|
3134
|
-
def
|
|
3235
|
+
def space5
|
|
3135
3236
|
elements[10]
|
|
3136
3237
|
end
|
|
3137
3238
|
|
|
3138
|
-
def
|
|
3239
|
+
def space6
|
|
3139
3240
|
elements[12]
|
|
3140
3241
|
end
|
|
3141
3242
|
|
|
@@ -3171,11 +3272,11 @@ module ScientificNameClean
|
|
|
3171
3272
|
elements[0]
|
|
3172
3273
|
end
|
|
3173
3274
|
|
|
3174
|
-
def
|
|
3275
|
+
def space1
|
|
3175
3276
|
elements[1]
|
|
3176
3277
|
end
|
|
3177
3278
|
|
|
3178
|
-
def
|
|
3279
|
+
def space2
|
|
3179
3280
|
elements[3]
|
|
3180
3281
|
end
|
|
3181
3282
|
|
|
@@ -3226,7 +3327,10 @@ module ScientificNameClean
|
|
|
3226
3327
|
start_index = index
|
|
3227
3328
|
if node_cache[:simple_authorship].has_key?(index)
|
|
3228
3329
|
cached = node_cache[:simple_authorship][index]
|
|
3229
|
-
|
|
3330
|
+
if cached
|
|
3331
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3332
|
+
@index = cached.interval.end
|
|
3333
|
+
end
|
|
3230
3334
|
return cached
|
|
3231
3335
|
end
|
|
3232
3336
|
|
|
@@ -3239,7 +3343,7 @@ module ScientificNameClean
|
|
|
3239
3343
|
s1 << r3
|
|
3240
3344
|
if r3
|
|
3241
3345
|
if has_terminal?('\G[,]', true, index)
|
|
3242
|
-
r5 =
|
|
3346
|
+
r5 = true
|
|
3243
3347
|
@index += 1
|
|
3244
3348
|
else
|
|
3245
3349
|
r5 = nil
|
|
@@ -3263,7 +3367,7 @@ module ScientificNameClean
|
|
|
3263
3367
|
s1 << r7
|
|
3264
3368
|
if r7
|
|
3265
3369
|
if has_terminal?('\G[,]', true, index)
|
|
3266
|
-
r10 =
|
|
3370
|
+
r10 = true
|
|
3267
3371
|
@index += 1
|
|
3268
3372
|
else
|
|
3269
3373
|
r10 = nil
|
|
@@ -3297,7 +3401,7 @@ module ScientificNameClean
|
|
|
3297
3401
|
s1 << r15
|
|
3298
3402
|
if r15
|
|
3299
3403
|
if has_terminal?('\G[,]', true, index)
|
|
3300
|
-
r17 =
|
|
3404
|
+
r17 = true
|
|
3301
3405
|
@index += 1
|
|
3302
3406
|
else
|
|
3303
3407
|
r17 = nil
|
|
@@ -3346,7 +3450,7 @@ module ScientificNameClean
|
|
|
3346
3450
|
s20 << r22
|
|
3347
3451
|
if r22
|
|
3348
3452
|
if has_terminal?('\G[,]', true, index)
|
|
3349
|
-
r24 =
|
|
3453
|
+
r24 = true
|
|
3350
3454
|
@index += 1
|
|
3351
3455
|
else
|
|
3352
3456
|
r24 = nil
|
|
@@ -3399,7 +3503,7 @@ module ScientificNameClean
|
|
|
3399
3503
|
elements[0]
|
|
3400
3504
|
end
|
|
3401
3505
|
|
|
3402
|
-
def
|
|
3506
|
+
def space1
|
|
3403
3507
|
elements[1]
|
|
3404
3508
|
end
|
|
3405
3509
|
|
|
@@ -3407,7 +3511,7 @@ module ScientificNameClean
|
|
|
3407
3511
|
elements[2]
|
|
3408
3512
|
end
|
|
3409
3513
|
|
|
3410
|
-
def
|
|
3514
|
+
def space2
|
|
3411
3515
|
elements[3]
|
|
3412
3516
|
end
|
|
3413
3517
|
|
|
@@ -3434,7 +3538,10 @@ module ScientificNameClean
|
|
|
3434
3538
|
start_index = index
|
|
3435
3539
|
if node_cache[:authors_names].has_key?(index)
|
|
3436
3540
|
cached = node_cache[:authors_names][index]
|
|
3437
|
-
|
|
3541
|
+
if cached
|
|
3542
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3543
|
+
@index = cached.interval.end
|
|
3544
|
+
end
|
|
3438
3545
|
return cached
|
|
3439
3546
|
end
|
|
3440
3547
|
|
|
@@ -3506,7 +3613,10 @@ module ScientificNameClean
|
|
|
3506
3613
|
start_index = index
|
|
3507
3614
|
if node_cache[:unknown_auth].has_key?(index)
|
|
3508
3615
|
cached = node_cache[:unknown_auth][index]
|
|
3509
|
-
|
|
3616
|
+
if cached
|
|
3617
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3618
|
+
@index = cached.interval.end
|
|
3619
|
+
end
|
|
3510
3620
|
return cached
|
|
3511
3621
|
end
|
|
3512
3622
|
|
|
@@ -3574,7 +3684,10 @@ module ScientificNameClean
|
|
|
3574
3684
|
start_index = index
|
|
3575
3685
|
if node_cache[:ex_sep].has_key?(index)
|
|
3576
3686
|
cached = node_cache[:ex_sep][index]
|
|
3577
|
-
|
|
3687
|
+
if cached
|
|
3688
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3689
|
+
@index = cached.interval.end
|
|
3690
|
+
end
|
|
3578
3691
|
return cached
|
|
3579
3692
|
end
|
|
3580
3693
|
|
|
@@ -3608,7 +3721,7 @@ module ScientificNameClean
|
|
|
3608
3721
|
if r1
|
|
3609
3722
|
i4 = index
|
|
3610
3723
|
if has_terminal?('\G[\\s]', true, index)
|
|
3611
|
-
r5 =
|
|
3724
|
+
r5 = true
|
|
3612
3725
|
@index += 1
|
|
3613
3726
|
else
|
|
3614
3727
|
r5 = nil
|
|
@@ -3654,7 +3767,10 @@ module ScientificNameClean
|
|
|
3654
3767
|
start_index = index
|
|
3655
3768
|
if node_cache[:author_separator].has_key?(index)
|
|
3656
3769
|
cached = node_cache[:author_separator][index]
|
|
3657
|
-
|
|
3770
|
+
if cached
|
|
3771
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3772
|
+
@index = cached.interval.end
|
|
3773
|
+
end
|
|
3658
3774
|
return cached
|
|
3659
3775
|
end
|
|
3660
3776
|
|
|
@@ -3716,7 +3832,7 @@ module ScientificNameClean
|
|
|
3716
3832
|
end
|
|
3717
3833
|
|
|
3718
3834
|
module AuthorName0
|
|
3719
|
-
def
|
|
3835
|
+
def space1
|
|
3720
3836
|
elements[0]
|
|
3721
3837
|
end
|
|
3722
3838
|
|
|
@@ -3724,7 +3840,7 @@ module ScientificNameClean
|
|
|
3724
3840
|
elements[1]
|
|
3725
3841
|
end
|
|
3726
3842
|
|
|
3727
|
-
def
|
|
3843
|
+
def space2
|
|
3728
3844
|
elements[2]
|
|
3729
3845
|
end
|
|
3730
3846
|
|
|
@@ -3732,7 +3848,7 @@ module ScientificNameClean
|
|
|
3732
3848
|
elements[3]
|
|
3733
3849
|
end
|
|
3734
3850
|
|
|
3735
|
-
def
|
|
3851
|
+
def space3
|
|
3736
3852
|
elements[4]
|
|
3737
3853
|
end
|
|
3738
3854
|
end
|
|
@@ -3752,7 +3868,7 @@ module ScientificNameClean
|
|
|
3752
3868
|
end
|
|
3753
3869
|
|
|
3754
3870
|
module AuthorName2
|
|
3755
|
-
def
|
|
3871
|
+
def space1
|
|
3756
3872
|
elements[0]
|
|
3757
3873
|
end
|
|
3758
3874
|
|
|
@@ -3760,7 +3876,7 @@ module ScientificNameClean
|
|
|
3760
3876
|
elements[1]
|
|
3761
3877
|
end
|
|
3762
3878
|
|
|
3763
|
-
def
|
|
3879
|
+
def space2
|
|
3764
3880
|
elements[2]
|
|
3765
3881
|
end
|
|
3766
3882
|
|
|
@@ -3768,7 +3884,7 @@ module ScientificNameClean
|
|
|
3768
3884
|
elements[3]
|
|
3769
3885
|
end
|
|
3770
3886
|
|
|
3771
|
-
def
|
|
3887
|
+
def space3
|
|
3772
3888
|
elements[4]
|
|
3773
3889
|
end
|
|
3774
3890
|
end
|
|
@@ -3791,7 +3907,10 @@ module ScientificNameClean
|
|
|
3791
3907
|
start_index = index
|
|
3792
3908
|
if node_cache[:author_name].has_key?(index)
|
|
3793
3909
|
cached = node_cache[:author_name][index]
|
|
3794
|
-
|
|
3910
|
+
if cached
|
|
3911
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
3912
|
+
@index = cached.interval.end
|
|
3913
|
+
end
|
|
3795
3914
|
return cached
|
|
3796
3915
|
end
|
|
3797
3916
|
|
|
@@ -3938,7 +4057,10 @@ module ScientificNameClean
|
|
|
3938
4057
|
start_index = index
|
|
3939
4058
|
if node_cache[:author_word].has_key?(index)
|
|
3940
4059
|
cached = node_cache[:author_word][index]
|
|
3941
|
-
|
|
4060
|
+
if cached
|
|
4061
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
4062
|
+
@index = cached.interval.end
|
|
4063
|
+
end
|
|
3942
4064
|
return cached
|
|
3943
4065
|
end
|
|
3944
4066
|
|
|
@@ -4119,7 +4241,7 @@ module ScientificNameClean
|
|
|
4119
4241
|
r7 = r19
|
|
4120
4242
|
else
|
|
4121
4243
|
if has_terminal?('\G[A-W]', true, index)
|
|
4122
|
-
r20 =
|
|
4244
|
+
r20 = true
|
|
4123
4245
|
@index += 1
|
|
4124
4246
|
else
|
|
4125
4247
|
r20 = nil
|
|
@@ -4128,7 +4250,7 @@ module ScientificNameClean
|
|
|
4128
4250
|
r7 = r20
|
|
4129
4251
|
else
|
|
4130
4252
|
if has_terminal?('\G[Y-Z]', true, index)
|
|
4131
|
-
r21 =
|
|
4253
|
+
r21 = true
|
|
4132
4254
|
@index += 1
|
|
4133
4255
|
else
|
|
4134
4256
|
r21 = nil
|
|
@@ -4157,7 +4279,7 @@ module ScientificNameClean
|
|
|
4157
4279
|
s22, i22 = [], index
|
|
4158
4280
|
loop do
|
|
4159
4281
|
if has_terminal?('\G[^0-9\\[\\]\\(\\)\\s&,]', true, index)
|
|
4160
|
-
r23 =
|
|
4282
|
+
r23 = true
|
|
4161
4283
|
@index += 1
|
|
4162
4284
|
else
|
|
4163
4285
|
r23 = nil
|
|
@@ -4195,7 +4317,7 @@ module ScientificNameClean
|
|
|
4195
4317
|
s26, i26 = [], index
|
|
4196
4318
|
loop do
|
|
4197
4319
|
if has_terminal?('\G[^0-9\\[\\]\\(\\)\\s&,]', true, index)
|
|
4198
|
-
r27 =
|
|
4320
|
+
r27 = true
|
|
4199
4321
|
@index += 1
|
|
4200
4322
|
else
|
|
4201
4323
|
r27 = nil
|
|
@@ -4264,7 +4386,10 @@ module ScientificNameClean
|
|
|
4264
4386
|
start_index = index
|
|
4265
4387
|
if node_cache[:author_prefix_word].has_key?(index)
|
|
4266
4388
|
cached = node_cache[:author_prefix_word][index]
|
|
4267
|
-
|
|
4389
|
+
if cached
|
|
4390
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
4391
|
+
@index = cached.interval.end
|
|
4392
|
+
end
|
|
4268
4393
|
return cached
|
|
4269
4394
|
end
|
|
4270
4395
|
|
|
@@ -4456,6 +4581,64 @@ module ScientificNameClean
|
|
|
4456
4581
|
r0
|
|
4457
4582
|
end
|
|
4458
4583
|
|
|
4584
|
+
module CapLatinWordPair0
|
|
4585
|
+
def a
|
|
4586
|
+
elements[0]
|
|
4587
|
+
end
|
|
4588
|
+
|
|
4589
|
+
def b
|
|
4590
|
+
elements[2]
|
|
4591
|
+
end
|
|
4592
|
+
end
|
|
4593
|
+
|
|
4594
|
+
module CapLatinWordPair1
|
|
4595
|
+
def value
|
|
4596
|
+
a.value + b.value.downcase
|
|
4597
|
+
end
|
|
4598
|
+
end
|
|
4599
|
+
|
|
4600
|
+
def _nt_cap_latin_word_pair
|
|
4601
|
+
start_index = index
|
|
4602
|
+
if node_cache[:cap_latin_word_pair].has_key?(index)
|
|
4603
|
+
cached = node_cache[:cap_latin_word_pair][index]
|
|
4604
|
+
if cached
|
|
4605
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
4606
|
+
@index = cached.interval.end
|
|
4607
|
+
end
|
|
4608
|
+
return cached
|
|
4609
|
+
end
|
|
4610
|
+
|
|
4611
|
+
i0, s0 = index, []
|
|
4612
|
+
r1 = _nt_cap_latin_word
|
|
4613
|
+
s0 << r1
|
|
4614
|
+
if r1
|
|
4615
|
+
if has_terminal?("-", false, index)
|
|
4616
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
4617
|
+
@index += 1
|
|
4618
|
+
else
|
|
4619
|
+
terminal_parse_failure("-")
|
|
4620
|
+
r2 = nil
|
|
4621
|
+
end
|
|
4622
|
+
s0 << r2
|
|
4623
|
+
if r2
|
|
4624
|
+
r3 = _nt_cap_latin_word
|
|
4625
|
+
s0 << r3
|
|
4626
|
+
end
|
|
4627
|
+
end
|
|
4628
|
+
if s0.last
|
|
4629
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
4630
|
+
r0.extend(CapLatinWordPair0)
|
|
4631
|
+
r0.extend(CapLatinWordPair1)
|
|
4632
|
+
else
|
|
4633
|
+
@index = i0
|
|
4634
|
+
r0 = nil
|
|
4635
|
+
end
|
|
4636
|
+
|
|
4637
|
+
node_cache[:cap_latin_word_pair][start_index] = r0
|
|
4638
|
+
|
|
4639
|
+
r0
|
|
4640
|
+
end
|
|
4641
|
+
|
|
4459
4642
|
module CapLatinWord0
|
|
4460
4643
|
def a
|
|
4461
4644
|
elements[0]
|
|
@@ -4490,6 +4673,22 @@ module ScientificNameClean
|
|
|
4490
4673
|
end
|
|
4491
4674
|
|
|
4492
4675
|
module CapLatinWord4
|
|
4676
|
+
def a
|
|
4677
|
+
elements[0]
|
|
4678
|
+
end
|
|
4679
|
+
|
|
4680
|
+
def b
|
|
4681
|
+
elements[1]
|
|
4682
|
+
end
|
|
4683
|
+
end
|
|
4684
|
+
|
|
4685
|
+
module CapLatinWord5
|
|
4686
|
+
def value
|
|
4687
|
+
a.text_value[0..0] + 'e' + b.value
|
|
4688
|
+
end
|
|
4689
|
+
end
|
|
4690
|
+
|
|
4691
|
+
module CapLatinWord6
|
|
4493
4692
|
def value
|
|
4494
4693
|
text_value
|
|
4495
4694
|
end
|
|
@@ -4499,7 +4698,10 @@ module ScientificNameClean
|
|
|
4499
4698
|
start_index = index
|
|
4500
4699
|
if node_cache[:cap_latin_word].has_key?(index)
|
|
4501
4700
|
cached = node_cache[:cap_latin_word][index]
|
|
4502
|
-
|
|
4701
|
+
if cached
|
|
4702
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
4703
|
+
@index = cached.interval.end
|
|
4704
|
+
end
|
|
4503
4705
|
return cached
|
|
4504
4706
|
end
|
|
4505
4707
|
|
|
@@ -4507,7 +4709,7 @@ module ScientificNameClean
|
|
|
4507
4709
|
i1, s1 = index, []
|
|
4508
4710
|
i2 = index
|
|
4509
4711
|
if has_terminal?('\G[A-Z]', true, index)
|
|
4510
|
-
r3 =
|
|
4712
|
+
r3 = true
|
|
4511
4713
|
@index += 1
|
|
4512
4714
|
else
|
|
4513
4715
|
r3 = nil
|
|
@@ -4552,7 +4754,7 @@ module ScientificNameClean
|
|
|
4552
4754
|
i7, s7 = index, []
|
|
4553
4755
|
i8 = index
|
|
4554
4756
|
if has_terminal?('\G[A-Z]', true, index)
|
|
4555
|
-
r9 =
|
|
4757
|
+
r9 = true
|
|
4556
4758
|
@index += 1
|
|
4557
4759
|
else
|
|
4558
4760
|
r9 = nil
|
|
@@ -4584,218 +4786,261 @@ module ScientificNameClean
|
|
|
4584
4786
|
if r7
|
|
4585
4787
|
r0 = r7
|
|
4586
4788
|
else
|
|
4587
|
-
i12 = index
|
|
4588
|
-
|
|
4589
|
-
|
|
4789
|
+
i12, s12 = index, []
|
|
4790
|
+
i13 = index
|
|
4791
|
+
if has_terminal?("AE", false, index)
|
|
4792
|
+
r14 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4590
4793
|
@index += 2
|
|
4591
4794
|
else
|
|
4592
|
-
terminal_parse_failure("
|
|
4593
|
-
|
|
4795
|
+
terminal_parse_failure("AE")
|
|
4796
|
+
r14 = nil
|
|
4797
|
+
end
|
|
4798
|
+
if r14
|
|
4799
|
+
r13 = r14
|
|
4800
|
+
else
|
|
4801
|
+
if has_terminal?("OE", false, index)
|
|
4802
|
+
r15 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4803
|
+
@index += 2
|
|
4804
|
+
else
|
|
4805
|
+
terminal_parse_failure("OE")
|
|
4806
|
+
r15 = nil
|
|
4807
|
+
end
|
|
4808
|
+
if r15
|
|
4809
|
+
r13 = r15
|
|
4810
|
+
else
|
|
4811
|
+
@index = i13
|
|
4812
|
+
r13 = nil
|
|
4813
|
+
end
|
|
4594
4814
|
end
|
|
4815
|
+
s12 << r13
|
|
4595
4816
|
if r13
|
|
4596
|
-
|
|
4817
|
+
r16 = _nt_latin_word
|
|
4818
|
+
s12 << r16
|
|
4819
|
+
end
|
|
4820
|
+
if s12.last
|
|
4821
|
+
r12 = instantiate_node(SyntaxNode,input, i12...index, s12)
|
|
4597
4822
|
r12.extend(CapLatinWord4)
|
|
4823
|
+
r12.extend(CapLatinWord5)
|
|
4598
4824
|
else
|
|
4599
|
-
|
|
4600
|
-
|
|
4825
|
+
@index = i12
|
|
4826
|
+
r12 = nil
|
|
4827
|
+
end
|
|
4828
|
+
if r12
|
|
4829
|
+
r0 = r12
|
|
4830
|
+
else
|
|
4831
|
+
i17 = index
|
|
4832
|
+
if has_terminal?("Ca", false, index)
|
|
4833
|
+
r18 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4601
4834
|
@index += 2
|
|
4602
4835
|
else
|
|
4603
|
-
terminal_parse_failure("
|
|
4604
|
-
|
|
4836
|
+
terminal_parse_failure("Ca")
|
|
4837
|
+
r18 = nil
|
|
4605
4838
|
end
|
|
4606
|
-
if
|
|
4607
|
-
|
|
4608
|
-
|
|
4839
|
+
if r18
|
|
4840
|
+
r17 = r18
|
|
4841
|
+
r17.extend(CapLatinWord6)
|
|
4609
4842
|
else
|
|
4610
|
-
if has_terminal?("
|
|
4611
|
-
|
|
4843
|
+
if has_terminal?("Ea", false, index)
|
|
4844
|
+
r19 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4612
4845
|
@index += 2
|
|
4613
4846
|
else
|
|
4614
|
-
terminal_parse_failure("
|
|
4615
|
-
|
|
4847
|
+
terminal_parse_failure("Ea")
|
|
4848
|
+
r19 = nil
|
|
4616
4849
|
end
|
|
4617
|
-
if
|
|
4618
|
-
|
|
4619
|
-
|
|
4850
|
+
if r19
|
|
4851
|
+
r17 = r19
|
|
4852
|
+
r17.extend(CapLatinWord6)
|
|
4620
4853
|
else
|
|
4621
|
-
if has_terminal?("
|
|
4622
|
-
|
|
4854
|
+
if has_terminal?("Ge", false, index)
|
|
4855
|
+
r20 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4623
4856
|
@index += 2
|
|
4624
4857
|
else
|
|
4625
|
-
terminal_parse_failure("
|
|
4626
|
-
|
|
4858
|
+
terminal_parse_failure("Ge")
|
|
4859
|
+
r20 = nil
|
|
4627
4860
|
end
|
|
4628
|
-
if
|
|
4629
|
-
|
|
4630
|
-
|
|
4861
|
+
if r20
|
|
4862
|
+
r17 = r20
|
|
4863
|
+
r17.extend(CapLatinWord6)
|
|
4631
4864
|
else
|
|
4632
|
-
if has_terminal?("
|
|
4633
|
-
|
|
4865
|
+
if has_terminal?("Ia", false, index)
|
|
4866
|
+
r21 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4634
4867
|
@index += 2
|
|
4635
4868
|
else
|
|
4636
|
-
terminal_parse_failure("
|
|
4637
|
-
|
|
4869
|
+
terminal_parse_failure("Ia")
|
|
4870
|
+
r21 = nil
|
|
4638
4871
|
end
|
|
4639
|
-
if
|
|
4640
|
-
|
|
4641
|
-
|
|
4872
|
+
if r21
|
|
4873
|
+
r17 = r21
|
|
4874
|
+
r17.extend(CapLatinWord6)
|
|
4642
4875
|
else
|
|
4643
4876
|
if has_terminal?("Io", false, index)
|
|
4644
|
-
|
|
4877
|
+
r22 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4645
4878
|
@index += 2
|
|
4646
4879
|
else
|
|
4647
4880
|
terminal_parse_failure("Io")
|
|
4648
|
-
|
|
4881
|
+
r22 = nil
|
|
4649
4882
|
end
|
|
4650
|
-
if
|
|
4651
|
-
|
|
4652
|
-
|
|
4883
|
+
if r22
|
|
4884
|
+
r17 = r22
|
|
4885
|
+
r17.extend(CapLatinWord6)
|
|
4653
4886
|
else
|
|
4654
|
-
if has_terminal?("
|
|
4655
|
-
|
|
4887
|
+
if has_terminal?("Io", false, index)
|
|
4888
|
+
r23 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4656
4889
|
@index += 2
|
|
4657
4890
|
else
|
|
4658
|
-
terminal_parse_failure("
|
|
4659
|
-
|
|
4891
|
+
terminal_parse_failure("Io")
|
|
4892
|
+
r23 = nil
|
|
4660
4893
|
end
|
|
4661
|
-
if
|
|
4662
|
-
|
|
4663
|
-
|
|
4894
|
+
if r23
|
|
4895
|
+
r17 = r23
|
|
4896
|
+
r17.extend(CapLatinWord6)
|
|
4664
4897
|
else
|
|
4665
|
-
if has_terminal?("
|
|
4666
|
-
|
|
4898
|
+
if has_terminal?("Ix", false, index)
|
|
4899
|
+
r24 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4667
4900
|
@index += 2
|
|
4668
4901
|
else
|
|
4669
|
-
terminal_parse_failure("
|
|
4670
|
-
|
|
4902
|
+
terminal_parse_failure("Ix")
|
|
4903
|
+
r24 = nil
|
|
4671
4904
|
end
|
|
4672
|
-
if
|
|
4673
|
-
|
|
4674
|
-
|
|
4905
|
+
if r24
|
|
4906
|
+
r17 = r24
|
|
4907
|
+
r17.extend(CapLatinWord6)
|
|
4675
4908
|
else
|
|
4676
|
-
if has_terminal?("
|
|
4677
|
-
|
|
4909
|
+
if has_terminal?("Lo", false, index)
|
|
4910
|
+
r25 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4678
4911
|
@index += 2
|
|
4679
4912
|
else
|
|
4680
|
-
terminal_parse_failure("
|
|
4681
|
-
|
|
4913
|
+
terminal_parse_failure("Lo")
|
|
4914
|
+
r25 = nil
|
|
4682
4915
|
end
|
|
4683
|
-
if
|
|
4684
|
-
|
|
4685
|
-
|
|
4916
|
+
if r25
|
|
4917
|
+
r17 = r25
|
|
4918
|
+
r17.extend(CapLatinWord6)
|
|
4686
4919
|
else
|
|
4687
|
-
if has_terminal?("
|
|
4688
|
-
|
|
4920
|
+
if has_terminal?("Oa", false, index)
|
|
4921
|
+
r26 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4689
4922
|
@index += 2
|
|
4690
4923
|
else
|
|
4691
|
-
terminal_parse_failure("
|
|
4692
|
-
|
|
4924
|
+
terminal_parse_failure("Oa")
|
|
4925
|
+
r26 = nil
|
|
4693
4926
|
end
|
|
4694
|
-
if
|
|
4695
|
-
|
|
4696
|
-
|
|
4927
|
+
if r26
|
|
4928
|
+
r17 = r26
|
|
4929
|
+
r17.extend(CapLatinWord6)
|
|
4697
4930
|
else
|
|
4698
|
-
if has_terminal?("
|
|
4699
|
-
|
|
4931
|
+
if has_terminal?("Ra", false, index)
|
|
4932
|
+
r27 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4700
4933
|
@index += 2
|
|
4701
4934
|
else
|
|
4702
|
-
terminal_parse_failure("
|
|
4703
|
-
|
|
4935
|
+
terminal_parse_failure("Ra")
|
|
4936
|
+
r27 = nil
|
|
4704
4937
|
end
|
|
4705
|
-
if
|
|
4706
|
-
|
|
4707
|
-
|
|
4938
|
+
if r27
|
|
4939
|
+
r17 = r27
|
|
4940
|
+
r17.extend(CapLatinWord6)
|
|
4708
4941
|
else
|
|
4709
|
-
if has_terminal?("
|
|
4710
|
-
|
|
4942
|
+
if has_terminal?("Ty", false, index)
|
|
4943
|
+
r28 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4711
4944
|
@index += 2
|
|
4712
4945
|
else
|
|
4713
|
-
terminal_parse_failure("
|
|
4714
|
-
|
|
4946
|
+
terminal_parse_failure("Ty")
|
|
4947
|
+
r28 = nil
|
|
4715
4948
|
end
|
|
4716
|
-
if
|
|
4717
|
-
|
|
4718
|
-
|
|
4949
|
+
if r28
|
|
4950
|
+
r17 = r28
|
|
4951
|
+
r17.extend(CapLatinWord6)
|
|
4719
4952
|
else
|
|
4720
|
-
if has_terminal?("
|
|
4721
|
-
|
|
4953
|
+
if has_terminal?("Ua", false, index)
|
|
4954
|
+
r29 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4722
4955
|
@index += 2
|
|
4723
4956
|
else
|
|
4724
|
-
terminal_parse_failure("
|
|
4725
|
-
|
|
4957
|
+
terminal_parse_failure("Ua")
|
|
4958
|
+
r29 = nil
|
|
4726
4959
|
end
|
|
4727
|
-
if
|
|
4728
|
-
|
|
4729
|
-
|
|
4960
|
+
if r29
|
|
4961
|
+
r17 = r29
|
|
4962
|
+
r17.extend(CapLatinWord6)
|
|
4730
4963
|
else
|
|
4731
|
-
if has_terminal?("
|
|
4732
|
-
|
|
4964
|
+
if has_terminal?("Aa", false, index)
|
|
4965
|
+
r30 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4733
4966
|
@index += 2
|
|
4734
4967
|
else
|
|
4735
|
-
terminal_parse_failure("
|
|
4736
|
-
|
|
4968
|
+
terminal_parse_failure("Aa")
|
|
4969
|
+
r30 = nil
|
|
4737
4970
|
end
|
|
4738
|
-
if
|
|
4739
|
-
|
|
4740
|
-
|
|
4971
|
+
if r30
|
|
4972
|
+
r17 = r30
|
|
4973
|
+
r17.extend(CapLatinWord6)
|
|
4741
4974
|
else
|
|
4742
|
-
if has_terminal?("
|
|
4743
|
-
|
|
4975
|
+
if has_terminal?("Ja", false, index)
|
|
4976
|
+
r31 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4744
4977
|
@index += 2
|
|
4745
4978
|
else
|
|
4746
|
-
terminal_parse_failure("
|
|
4747
|
-
|
|
4979
|
+
terminal_parse_failure("Ja")
|
|
4980
|
+
r31 = nil
|
|
4748
4981
|
end
|
|
4749
|
-
if
|
|
4750
|
-
|
|
4751
|
-
|
|
4982
|
+
if r31
|
|
4983
|
+
r17 = r31
|
|
4984
|
+
r17.extend(CapLatinWord6)
|
|
4752
4985
|
else
|
|
4753
|
-
if has_terminal?("
|
|
4754
|
-
|
|
4986
|
+
if has_terminal?("Zu", false, index)
|
|
4987
|
+
r32 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4755
4988
|
@index += 2
|
|
4756
4989
|
else
|
|
4757
|
-
terminal_parse_failure("
|
|
4758
|
-
|
|
4990
|
+
terminal_parse_failure("Zu")
|
|
4991
|
+
r32 = nil
|
|
4759
4992
|
end
|
|
4760
|
-
if
|
|
4761
|
-
|
|
4762
|
-
|
|
4993
|
+
if r32
|
|
4994
|
+
r17 = r32
|
|
4995
|
+
r17.extend(CapLatinWord6)
|
|
4763
4996
|
else
|
|
4764
|
-
if has_terminal?("
|
|
4765
|
-
|
|
4997
|
+
if has_terminal?("La", false, index)
|
|
4998
|
+
r33 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4766
4999
|
@index += 2
|
|
4767
5000
|
else
|
|
4768
|
-
terminal_parse_failure("
|
|
4769
|
-
|
|
5001
|
+
terminal_parse_failure("La")
|
|
5002
|
+
r33 = nil
|
|
4770
5003
|
end
|
|
4771
|
-
if
|
|
4772
|
-
|
|
4773
|
-
|
|
5004
|
+
if r33
|
|
5005
|
+
r17 = r33
|
|
5006
|
+
r17.extend(CapLatinWord6)
|
|
4774
5007
|
else
|
|
4775
|
-
if has_terminal?("
|
|
4776
|
-
|
|
5008
|
+
if has_terminal?("Qu", false, index)
|
|
5009
|
+
r34 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4777
5010
|
@index += 2
|
|
4778
5011
|
else
|
|
4779
|
-
terminal_parse_failure("
|
|
4780
|
-
|
|
5012
|
+
terminal_parse_failure("Qu")
|
|
5013
|
+
r34 = nil
|
|
4781
5014
|
end
|
|
4782
|
-
if
|
|
4783
|
-
|
|
4784
|
-
|
|
5015
|
+
if r34
|
|
5016
|
+
r17 = r34
|
|
5017
|
+
r17.extend(CapLatinWord6)
|
|
4785
5018
|
else
|
|
4786
|
-
if has_terminal?("
|
|
4787
|
-
|
|
5019
|
+
if has_terminal?("As", false, index)
|
|
5020
|
+
r35 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
4788
5021
|
@index += 2
|
|
4789
5022
|
else
|
|
4790
|
-
terminal_parse_failure("
|
|
4791
|
-
|
|
5023
|
+
terminal_parse_failure("As")
|
|
5024
|
+
r35 = nil
|
|
4792
5025
|
end
|
|
4793
|
-
if
|
|
4794
|
-
|
|
4795
|
-
|
|
5026
|
+
if r35
|
|
5027
|
+
r17 = r35
|
|
5028
|
+
r17.extend(CapLatinWord6)
|
|
4796
5029
|
else
|
|
4797
|
-
|
|
4798
|
-
|
|
5030
|
+
if has_terminal?("Ba", false, index)
|
|
5031
|
+
r36 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
5032
|
+
@index += 2
|
|
5033
|
+
else
|
|
5034
|
+
terminal_parse_failure("Ba")
|
|
5035
|
+
r36 = nil
|
|
5036
|
+
end
|
|
5037
|
+
if r36
|
|
5038
|
+
r17 = r36
|
|
5039
|
+
r17.extend(CapLatinWord6)
|
|
5040
|
+
else
|
|
5041
|
+
@index = i17
|
|
5042
|
+
r17 = nil
|
|
5043
|
+
end
|
|
4799
5044
|
end
|
|
4800
5045
|
end
|
|
4801
5046
|
end
|
|
@@ -4814,12 +5059,12 @@ module ScientificNameClean
|
|
|
4814
5059
|
end
|
|
4815
5060
|
end
|
|
4816
5061
|
end
|
|
4817
|
-
|
|
4818
|
-
|
|
4819
|
-
|
|
4820
|
-
|
|
4821
|
-
|
|
4822
|
-
|
|
5062
|
+
if r17
|
|
5063
|
+
r0 = r17
|
|
5064
|
+
else
|
|
5065
|
+
@index = i0
|
|
5066
|
+
r0 = nil
|
|
5067
|
+
end
|
|
4823
5068
|
end
|
|
4824
5069
|
end
|
|
4825
5070
|
end
|
|
@@ -4941,7 +5186,10 @@ module ScientificNameClean
|
|
|
4941
5186
|
start_index = index
|
|
4942
5187
|
if node_cache[:species_word_hybrid].has_key?(index)
|
|
4943
5188
|
cached = node_cache[:species_word_hybrid][index]
|
|
4944
|
-
|
|
5189
|
+
if cached
|
|
5190
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5191
|
+
@index = cached.interval.end
|
|
5192
|
+
end
|
|
4945
5193
|
return cached
|
|
4946
5194
|
end
|
|
4947
5195
|
|
|
@@ -5042,7 +5290,10 @@ module ScientificNameClean
|
|
|
5042
5290
|
start_index = index
|
|
5043
5291
|
if node_cache[:species_prefix].has_key?(index)
|
|
5044
5292
|
cached = node_cache[:species_prefix][index]
|
|
5045
|
-
|
|
5293
|
+
if cached
|
|
5294
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5295
|
+
@index = cached.interval.end
|
|
5296
|
+
end
|
|
5046
5297
|
return cached
|
|
5047
5298
|
end
|
|
5048
5299
|
|
|
@@ -5128,7 +5379,10 @@ module ScientificNameClean
|
|
|
5128
5379
|
start_index = index
|
|
5129
5380
|
if node_cache[:species_word].has_key?(index)
|
|
5130
5381
|
cached = node_cache[:species_word][index]
|
|
5131
|
-
|
|
5382
|
+
if cached
|
|
5383
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5384
|
+
@index = cached.interval.end
|
|
5385
|
+
end
|
|
5132
5386
|
return cached
|
|
5133
5387
|
end
|
|
5134
5388
|
|
|
@@ -5137,7 +5391,7 @@ module ScientificNameClean
|
|
|
5137
5391
|
s2, i2 = [], index
|
|
5138
5392
|
loop do
|
|
5139
5393
|
if has_terminal?('\G[0-9]', true, index)
|
|
5140
|
-
r3 =
|
|
5394
|
+
r3 = true
|
|
5141
5395
|
@index += 1
|
|
5142
5396
|
else
|
|
5143
5397
|
r3 = nil
|
|
@@ -5211,23 +5465,10 @@ module ScientificNameClean
|
|
|
5211
5465
|
|
|
5212
5466
|
module LatinWord1
|
|
5213
5467
|
def value
|
|
5214
|
-
a.text_value
|
|
5215
|
-
|
|
5216
|
-
|
|
5217
|
-
|
|
5218
|
-
module LatinWord2
|
|
5219
|
-
def a
|
|
5220
|
-
elements[0]
|
|
5221
|
-
end
|
|
5222
|
-
|
|
5223
|
-
def b
|
|
5224
|
-
elements[1]
|
|
5225
|
-
end
|
|
5226
|
-
end
|
|
5227
|
-
|
|
5228
|
-
module LatinWord3
|
|
5229
|
-
def value
|
|
5230
|
-
a.value + b.value
|
|
5468
|
+
l = a.text_value
|
|
5469
|
+
l = 'ae' if l == 'æ'
|
|
5470
|
+
l = 'oe' if l == 'œ'
|
|
5471
|
+
l + b.value
|
|
5231
5472
|
end
|
|
5232
5473
|
end
|
|
5233
5474
|
|
|
@@ -5235,165 +5476,51 @@ module ScientificNameClean
|
|
|
5235
5476
|
start_index = index
|
|
5236
5477
|
if node_cache[:latin_word].has_key?(index)
|
|
5237
5478
|
cached = node_cache[:latin_word][index]
|
|
5238
|
-
|
|
5479
|
+
if cached
|
|
5480
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5481
|
+
@index = cached.interval.end
|
|
5482
|
+
end
|
|
5239
5483
|
return cached
|
|
5240
5484
|
end
|
|
5241
5485
|
|
|
5242
|
-
i0 = index
|
|
5243
|
-
|
|
5244
|
-
|
|
5245
|
-
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
5486
|
+
i0, s0 = index, []
|
|
5487
|
+
if has_terminal?('\G[a-zëæœ]', true, index)
|
|
5488
|
+
r1 = true
|
|
5246
5489
|
@index += 1
|
|
5247
5490
|
else
|
|
5248
|
-
r2 = nil
|
|
5249
|
-
end
|
|
5250
|
-
s1 << r2
|
|
5251
|
-
if r2
|
|
5252
|
-
r3 = _nt_full_name_letters
|
|
5253
|
-
s1 << r3
|
|
5254
|
-
end
|
|
5255
|
-
if s1.last
|
|
5256
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
5257
|
-
r1.extend(LatinWord0)
|
|
5258
|
-
r1.extend(LatinWord1)
|
|
5259
|
-
else
|
|
5260
|
-
@index = i1
|
|
5261
5491
|
r1 = nil
|
|
5262
5492
|
end
|
|
5493
|
+
s0 << r1
|
|
5263
5494
|
if r1
|
|
5264
|
-
|
|
5265
|
-
|
|
5266
|
-
i4, s4 = index, []
|
|
5267
|
-
r5 = _nt_digraph
|
|
5268
|
-
s4 << r5
|
|
5269
|
-
if r5
|
|
5270
|
-
r6 = _nt_full_name_letters
|
|
5271
|
-
s4 << r6
|
|
5272
|
-
end
|
|
5273
|
-
if s4.last
|
|
5274
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
5275
|
-
r4.extend(LatinWord2)
|
|
5276
|
-
r4.extend(LatinWord3)
|
|
5277
|
-
else
|
|
5278
|
-
@index = i4
|
|
5279
|
-
r4 = nil
|
|
5280
|
-
end
|
|
5281
|
-
if r4
|
|
5282
|
-
r0 = r4
|
|
5283
|
-
else
|
|
5284
|
-
@index = i0
|
|
5285
|
-
r0 = nil
|
|
5286
|
-
end
|
|
5287
|
-
end
|
|
5288
|
-
|
|
5289
|
-
node_cache[:latin_word][start_index] = r0
|
|
5290
|
-
|
|
5291
|
-
r0
|
|
5292
|
-
end
|
|
5293
|
-
|
|
5294
|
-
module FullNameLetters0
|
|
5295
|
-
def a
|
|
5296
|
-
elements[0]
|
|
5297
|
-
end
|
|
5298
|
-
|
|
5299
|
-
def b
|
|
5300
|
-
elements[1]
|
|
5301
|
-
end
|
|
5302
|
-
end
|
|
5303
|
-
|
|
5304
|
-
module FullNameLetters1
|
|
5305
|
-
def value
|
|
5306
|
-
a.value + b.value
|
|
5307
|
-
end
|
|
5308
|
-
end
|
|
5309
|
-
|
|
5310
|
-
module FullNameLetters2
|
|
5311
|
-
def a
|
|
5312
|
-
elements[0]
|
|
5313
|
-
end
|
|
5314
|
-
|
|
5315
|
-
def b
|
|
5316
|
-
elements[1]
|
|
5317
|
-
end
|
|
5318
|
-
|
|
5319
|
-
def c
|
|
5320
|
-
elements[2]
|
|
5321
|
-
end
|
|
5322
|
-
end
|
|
5323
|
-
|
|
5324
|
-
module FullNameLetters3
|
|
5325
|
-
def value
|
|
5326
|
-
a.value + b.value + c.value
|
|
5327
|
-
end
|
|
5328
|
-
end
|
|
5329
|
-
|
|
5330
|
-
def _nt_full_name_letters
|
|
5331
|
-
start_index = index
|
|
5332
|
-
if node_cache[:full_name_letters].has_key?(index)
|
|
5333
|
-
cached = node_cache[:full_name_letters][index]
|
|
5334
|
-
@index = cached.interval.end if cached
|
|
5335
|
-
return cached
|
|
5336
|
-
end
|
|
5337
|
-
|
|
5338
|
-
i0 = index
|
|
5339
|
-
i1, s1 = index, []
|
|
5340
|
-
r2 = _nt_digraph
|
|
5341
|
-
s1 << r2
|
|
5342
|
-
if r2
|
|
5343
|
-
r3 = _nt_full_name_letters
|
|
5344
|
-
s1 << r3
|
|
5345
|
-
end
|
|
5346
|
-
if s1.last
|
|
5347
|
-
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
|
5348
|
-
r1.extend(FullNameLetters0)
|
|
5349
|
-
r1.extend(FullNameLetters1)
|
|
5350
|
-
else
|
|
5351
|
-
@index = i1
|
|
5352
|
-
r1 = nil
|
|
5495
|
+
r2 = _nt_valid_name_letters
|
|
5496
|
+
s0 << r2
|
|
5353
5497
|
end
|
|
5354
|
-
if
|
|
5355
|
-
r0 =
|
|
5498
|
+
if s0.last
|
|
5499
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
|
5500
|
+
r0.extend(LatinWord0)
|
|
5501
|
+
r0.extend(LatinWord1)
|
|
5356
5502
|
else
|
|
5357
|
-
|
|
5358
|
-
|
|
5359
|
-
s4 << r5
|
|
5360
|
-
if r5
|
|
5361
|
-
r6 = _nt_digraph
|
|
5362
|
-
s4 << r6
|
|
5363
|
-
if r6
|
|
5364
|
-
r7 = _nt_full_name_letters
|
|
5365
|
-
s4 << r7
|
|
5366
|
-
end
|
|
5367
|
-
end
|
|
5368
|
-
if s4.last
|
|
5369
|
-
r4 = instantiate_node(SyntaxNode,input, i4...index, s4)
|
|
5370
|
-
r4.extend(FullNameLetters2)
|
|
5371
|
-
r4.extend(FullNameLetters3)
|
|
5372
|
-
else
|
|
5373
|
-
@index = i4
|
|
5374
|
-
r4 = nil
|
|
5375
|
-
end
|
|
5376
|
-
if r4
|
|
5377
|
-
r0 = r4
|
|
5378
|
-
else
|
|
5379
|
-
r8 = _nt_valid_name_letters
|
|
5380
|
-
if r8
|
|
5381
|
-
r0 = r8
|
|
5382
|
-
else
|
|
5383
|
-
@index = i0
|
|
5384
|
-
r0 = nil
|
|
5385
|
-
end
|
|
5386
|
-
end
|
|
5503
|
+
@index = i0
|
|
5504
|
+
r0 = nil
|
|
5387
5505
|
end
|
|
5388
5506
|
|
|
5389
|
-
node_cache[:
|
|
5507
|
+
node_cache[:latin_word][start_index] = r0
|
|
5390
5508
|
|
|
5391
5509
|
r0
|
|
5392
5510
|
end
|
|
5393
5511
|
|
|
5394
5512
|
module ValidNameLetters0
|
|
5395
5513
|
def value
|
|
5396
|
-
|
|
5514
|
+
res = ''
|
|
5515
|
+
text_value.split('').each do |l|
|
|
5516
|
+
l = 'ae' if l == 'æ'
|
|
5517
|
+
l = 'oe' if l == 'œ'
|
|
5518
|
+
# not sure if we should normalize ë as well. It is legal in botanical code, but it
|
|
5519
|
+
# might be beneficial to normalize it for the reconsiliation purposes
|
|
5520
|
+
# l = 'e' if l == 'ë'
|
|
5521
|
+
res << l
|
|
5522
|
+
end
|
|
5523
|
+
res
|
|
5397
5524
|
end
|
|
5398
5525
|
end
|
|
5399
5526
|
|
|
@@ -5401,14 +5528,17 @@ module ScientificNameClean
|
|
|
5401
5528
|
start_index = index
|
|
5402
5529
|
if node_cache[:valid_name_letters].has_key?(index)
|
|
5403
5530
|
cached = node_cache[:valid_name_letters][index]
|
|
5404
|
-
|
|
5531
|
+
if cached
|
|
5532
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5533
|
+
@index = cached.interval.end
|
|
5534
|
+
end
|
|
5405
5535
|
return cached
|
|
5406
5536
|
end
|
|
5407
5537
|
|
|
5408
5538
|
s0, i0 = [], index
|
|
5409
5539
|
loop do
|
|
5410
|
-
if has_terminal?('\G[a-z
|
|
5411
|
-
r1 =
|
|
5540
|
+
if has_terminal?('\G[a-z\\-ëæœ]', true, index)
|
|
5541
|
+
r1 = true
|
|
5412
5542
|
@index += 1
|
|
5413
5543
|
else
|
|
5414
5544
|
r1 = nil
|
|
@@ -5448,7 +5578,10 @@ module ScientificNameClean
|
|
|
5448
5578
|
start_index = index
|
|
5449
5579
|
if node_cache[:cap_digraph].has_key?(index)
|
|
5450
5580
|
cached = node_cache[:cap_digraph][index]
|
|
5451
|
-
|
|
5581
|
+
if cached
|
|
5582
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5583
|
+
@index = cached.interval.end
|
|
5584
|
+
end
|
|
5452
5585
|
return cached
|
|
5453
5586
|
end
|
|
5454
5587
|
|
|
@@ -5485,65 +5618,12 @@ module ScientificNameClean
|
|
|
5485
5618
|
r0
|
|
5486
5619
|
end
|
|
5487
5620
|
|
|
5488
|
-
module Digraph0
|
|
5489
|
-
def value
|
|
5490
|
-
'ae'
|
|
5491
|
-
end
|
|
5492
|
-
end
|
|
5493
|
-
|
|
5494
|
-
module Digraph1
|
|
5495
|
-
def value
|
|
5496
|
-
'oe'
|
|
5497
|
-
end
|
|
5498
|
-
end
|
|
5499
|
-
|
|
5500
|
-
def _nt_digraph
|
|
5501
|
-
start_index = index
|
|
5502
|
-
if node_cache[:digraph].has_key?(index)
|
|
5503
|
-
cached = node_cache[:digraph][index]
|
|
5504
|
-
@index = cached.interval.end if cached
|
|
5505
|
-
return cached
|
|
5506
|
-
end
|
|
5507
|
-
|
|
5508
|
-
i0 = index
|
|
5509
|
-
if has_terminal?("æ", false, index)
|
|
5510
|
-
r1 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
5511
|
-
r1.extend(Digraph0)
|
|
5512
|
-
@index += 2
|
|
5513
|
-
else
|
|
5514
|
-
terminal_parse_failure("æ")
|
|
5515
|
-
r1 = nil
|
|
5516
|
-
end
|
|
5517
|
-
if r1
|
|
5518
|
-
r0 = r1
|
|
5519
|
-
else
|
|
5520
|
-
if has_terminal?("œ", false, index)
|
|
5521
|
-
r2 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
5522
|
-
r2.extend(Digraph1)
|
|
5523
|
-
@index += 2
|
|
5524
|
-
else
|
|
5525
|
-
terminal_parse_failure("œ")
|
|
5526
|
-
r2 = nil
|
|
5527
|
-
end
|
|
5528
|
-
if r2
|
|
5529
|
-
r0 = r2
|
|
5530
|
-
else
|
|
5531
|
-
@index = i0
|
|
5532
|
-
r0 = nil
|
|
5533
|
-
end
|
|
5534
|
-
end
|
|
5535
|
-
|
|
5536
|
-
node_cache[:digraph][start_index] = r0
|
|
5537
|
-
|
|
5538
|
-
r0
|
|
5539
|
-
end
|
|
5540
|
-
|
|
5541
5621
|
module Year0
|
|
5542
5622
|
def b
|
|
5543
5623
|
elements[0]
|
|
5544
5624
|
end
|
|
5545
5625
|
|
|
5546
|
-
def
|
|
5626
|
+
def space1
|
|
5547
5627
|
elements[1]
|
|
5548
5628
|
end
|
|
5549
5629
|
|
|
@@ -5551,7 +5631,7 @@ module ScientificNameClean
|
|
|
5551
5631
|
elements[2]
|
|
5552
5632
|
end
|
|
5553
5633
|
|
|
5554
|
-
def
|
|
5634
|
+
def space2
|
|
5555
5635
|
elements[3]
|
|
5556
5636
|
end
|
|
5557
5637
|
|
|
@@ -5578,7 +5658,10 @@ module ScientificNameClean
|
|
|
5578
5658
|
start_index = index
|
|
5579
5659
|
if node_cache[:year].has_key?(index)
|
|
5580
5660
|
cached = node_cache[:year][index]
|
|
5581
|
-
|
|
5661
|
+
if cached
|
|
5662
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5663
|
+
@index = cached.interval.end
|
|
5664
|
+
end
|
|
5582
5665
|
return cached
|
|
5583
5666
|
end
|
|
5584
5667
|
|
|
@@ -5669,7 +5752,10 @@ module ScientificNameClean
|
|
|
5669
5752
|
start_index = index
|
|
5670
5753
|
if node_cache[:year_number_with_character].has_key?(index)
|
|
5671
5754
|
cached = node_cache[:year_number_with_character][index]
|
|
5672
|
-
|
|
5755
|
+
if cached
|
|
5756
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5757
|
+
@index = cached.interval.end
|
|
5758
|
+
end
|
|
5673
5759
|
return cached
|
|
5674
5760
|
end
|
|
5675
5761
|
|
|
@@ -5678,7 +5764,7 @@ module ScientificNameClean
|
|
|
5678
5764
|
s0 << r1
|
|
5679
5765
|
if r1
|
|
5680
5766
|
if has_terminal?('\G[a-zA-Z]', true, index)
|
|
5681
|
-
r2 =
|
|
5767
|
+
r2 = true
|
|
5682
5768
|
@index += 1
|
|
5683
5769
|
else
|
|
5684
5770
|
r2 = nil
|
|
@@ -5720,13 +5806,16 @@ module ScientificNameClean
|
|
|
5720
5806
|
start_index = index
|
|
5721
5807
|
if node_cache[:year_number].has_key?(index)
|
|
5722
5808
|
cached = node_cache[:year_number][index]
|
|
5723
|
-
|
|
5809
|
+
if cached
|
|
5810
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5811
|
+
@index = cached.interval.end
|
|
5812
|
+
end
|
|
5724
5813
|
return cached
|
|
5725
5814
|
end
|
|
5726
5815
|
|
|
5727
5816
|
i0, s0 = index, []
|
|
5728
5817
|
if has_terminal?('\G[12]', true, index)
|
|
5729
|
-
r1 =
|
|
5818
|
+
r1 = true
|
|
5730
5819
|
@index += 1
|
|
5731
5820
|
else
|
|
5732
5821
|
r1 = nil
|
|
@@ -5734,7 +5823,7 @@ module ScientificNameClean
|
|
|
5734
5823
|
s0 << r1
|
|
5735
5824
|
if r1
|
|
5736
5825
|
if has_terminal?('\G[7890]', true, index)
|
|
5737
|
-
r2 =
|
|
5826
|
+
r2 = true
|
|
5738
5827
|
@index += 1
|
|
5739
5828
|
else
|
|
5740
5829
|
r2 = nil
|
|
@@ -5742,7 +5831,7 @@ module ScientificNameClean
|
|
|
5742
5831
|
s0 << r2
|
|
5743
5832
|
if r2
|
|
5744
5833
|
if has_terminal?('\G[0-9]', true, index)
|
|
5745
|
-
r3 =
|
|
5834
|
+
r3 = true
|
|
5746
5835
|
@index += 1
|
|
5747
5836
|
else
|
|
5748
5837
|
r3 = nil
|
|
@@ -5750,7 +5839,7 @@ module ScientificNameClean
|
|
|
5750
5839
|
s0 << r3
|
|
5751
5840
|
if r3
|
|
5752
5841
|
if has_terminal?('\G[0-9]', true, index)
|
|
5753
|
-
r5 =
|
|
5842
|
+
r5 = true
|
|
5754
5843
|
@index += 1
|
|
5755
5844
|
else
|
|
5756
5845
|
r5 = nil
|
|
@@ -5763,7 +5852,7 @@ module ScientificNameClean
|
|
|
5763
5852
|
s0 << r4
|
|
5764
5853
|
if r4
|
|
5765
5854
|
if has_terminal?('\G[\\?]', true, index)
|
|
5766
|
-
r7 =
|
|
5855
|
+
r7 = true
|
|
5767
5856
|
@index += 1
|
|
5768
5857
|
else
|
|
5769
5858
|
r7 = nil
|
|
@@ -5796,7 +5885,10 @@ module ScientificNameClean
|
|
|
5796
5885
|
start_index = index
|
|
5797
5886
|
if node_cache[:left_paren].has_key?(index)
|
|
5798
5887
|
cached = node_cache[:left_paren][index]
|
|
5799
|
-
|
|
5888
|
+
if cached
|
|
5889
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5890
|
+
@index = cached.interval.end
|
|
5891
|
+
end
|
|
5800
5892
|
return cached
|
|
5801
5893
|
end
|
|
5802
5894
|
|
|
@@ -5817,7 +5909,10 @@ module ScientificNameClean
|
|
|
5817
5909
|
start_index = index
|
|
5818
5910
|
if node_cache[:right_paren].has_key?(index)
|
|
5819
5911
|
cached = node_cache[:right_paren][index]
|
|
5820
|
-
|
|
5912
|
+
if cached
|
|
5913
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5914
|
+
@index = cached.interval.end
|
|
5915
|
+
end
|
|
5821
5916
|
return cached
|
|
5822
5917
|
end
|
|
5823
5918
|
|
|
@@ -5844,7 +5939,10 @@ module ScientificNameClean
|
|
|
5844
5939
|
start_index = index
|
|
5845
5940
|
if node_cache[:hybrid_character].has_key?(index)
|
|
5846
5941
|
cached = node_cache[:hybrid_character][index]
|
|
5847
|
-
|
|
5942
|
+
if cached
|
|
5943
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
5944
|
+
@index = cached.interval.end
|
|
5945
|
+
end
|
|
5848
5946
|
return cached
|
|
5849
5947
|
end
|
|
5850
5948
|
|
|
@@ -5895,7 +5993,7 @@ module ScientificNameClean
|
|
|
5895
5993
|
|
|
5896
5994
|
module MultiplicationSign0
|
|
5897
5995
|
def value
|
|
5898
|
-
|
|
5996
|
+
"×"
|
|
5899
5997
|
end
|
|
5900
5998
|
end
|
|
5901
5999
|
|
|
@@ -5903,17 +6001,39 @@ module ScientificNameClean
|
|
|
5903
6001
|
start_index = index
|
|
5904
6002
|
if node_cache[:multiplication_sign].has_key?(index)
|
|
5905
6003
|
cached = node_cache[:multiplication_sign][index]
|
|
5906
|
-
|
|
6004
|
+
if cached
|
|
6005
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
6006
|
+
@index = cached.interval.end
|
|
6007
|
+
end
|
|
5907
6008
|
return cached
|
|
5908
6009
|
end
|
|
5909
6010
|
|
|
6011
|
+
i0 = index
|
|
5910
6012
|
if has_terminal?("×", false, index)
|
|
5911
|
-
|
|
5912
|
-
r0.extend(MultiplicationSign0)
|
|
6013
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 2))
|
|
5913
6014
|
@index += 2
|
|
5914
6015
|
else
|
|
5915
6016
|
terminal_parse_failure("×")
|
|
5916
|
-
|
|
6017
|
+
r1 = nil
|
|
6018
|
+
end
|
|
6019
|
+
if r1
|
|
6020
|
+
r0 = r1
|
|
6021
|
+
r0.extend(MultiplicationSign0)
|
|
6022
|
+
else
|
|
6023
|
+
if has_terminal?("*", false, index)
|
|
6024
|
+
r2 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
|
6025
|
+
@index += 1
|
|
6026
|
+
else
|
|
6027
|
+
terminal_parse_failure("*")
|
|
6028
|
+
r2 = nil
|
|
6029
|
+
end
|
|
6030
|
+
if r2
|
|
6031
|
+
r0 = r2
|
|
6032
|
+
r0.extend(MultiplicationSign0)
|
|
6033
|
+
else
|
|
6034
|
+
@index = i0
|
|
6035
|
+
r0 = nil
|
|
6036
|
+
end
|
|
5917
6037
|
end
|
|
5918
6038
|
|
|
5919
6039
|
node_cache[:multiplication_sign][start_index] = r0
|
|
@@ -5925,14 +6045,17 @@ module ScientificNameClean
|
|
|
5925
6045
|
start_index = index
|
|
5926
6046
|
if node_cache[:space].has_key?(index)
|
|
5927
6047
|
cached = node_cache[:space][index]
|
|
5928
|
-
|
|
6048
|
+
if cached
|
|
6049
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
6050
|
+
@index = cached.interval.end
|
|
6051
|
+
end
|
|
5929
6052
|
return cached
|
|
5930
6053
|
end
|
|
5931
6054
|
|
|
5932
6055
|
s0, i0 = [], index
|
|
5933
6056
|
loop do
|
|
5934
6057
|
if has_terminal?('\G[\\s]', true, index)
|
|
5935
|
-
r1 =
|
|
6058
|
+
r1 = true
|
|
5936
6059
|
@index += 1
|
|
5937
6060
|
else
|
|
5938
6061
|
r1 = nil
|
|
@@ -5954,14 +6077,17 @@ module ScientificNameClean
|
|
|
5954
6077
|
start_index = index
|
|
5955
6078
|
if node_cache[:space_hard].has_key?(index)
|
|
5956
6079
|
cached = node_cache[:space_hard][index]
|
|
5957
|
-
|
|
6080
|
+
if cached
|
|
6081
|
+
cached = SyntaxNode.new(input, index...(index + 1)) if cached == true
|
|
6082
|
+
@index = cached.interval.end
|
|
6083
|
+
end
|
|
5958
6084
|
return cached
|
|
5959
6085
|
end
|
|
5960
6086
|
|
|
5961
6087
|
s0, i0 = [], index
|
|
5962
6088
|
loop do
|
|
5963
6089
|
if has_terminal?('\G[\\s]', true, index)
|
|
5964
|
-
r1 =
|
|
6090
|
+
r1 = true
|
|
5965
6091
|
@index += 1
|
|
5966
6092
|
else
|
|
5967
6093
|
r1 = nil
|
|
@@ -5989,3 +6115,4 @@ end
|
|
|
5989
6115
|
class ScientificNameCleanParser < Treetop::Runtime::CompiledParser
|
|
5990
6116
|
include ScientificNameClean
|
|
5991
6117
|
end
|
|
6118
|
+
|