dimus-biodiversity 0.0.12 → 0.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +3 -1
- data/bin/nnparse +2 -1
- data/lib/biodiversity/parser.rb +19 -1
- data/lib/biodiversity/parser/scientific_name_canonical.rb +214 -0
- data/lib/biodiversity/parser/scientific_name_canonical.treetop +40 -0
- data/lib/biodiversity/parser/{scientific_name.rb → scientific_name_clean.rb} +1432 -369
- data/lib/biodiversity/parser/{scientific_name.treetop → scientific_name_clean.treetop} +197 -18
- data/lib/biodiversity/parser/scientific_name_dirty.rb +473 -0
- data/lib/biodiversity/parser/scientific_name_dirty.treetop +80 -0
- data/spec/parser/scientific_name.spec.rb +14 -252
- data/spec/parser/scientific_name_canonical.spec.rb +43 -0
- data/spec/parser/scientific_name_clean.spec.rb +393 -0
- data/spec/parser/scientific_name_dirty.spec.rb +90 -0
- metadata +10 -3
@@ -1,5 +1,5 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
grammar
|
2
|
+
grammar ScientificNameClean
|
3
3
|
|
4
4
|
rule composite_scientific_name
|
5
5
|
a:scientific_name space hybrid_separator space b:scientific_name space {
|
@@ -34,6 +34,34 @@ grammar ScientificName
|
|
34
34
|
rule scientific_name
|
35
35
|
name_part_authors_mix
|
36
36
|
/
|
37
|
+
space a:name_part space b:authors_part space c:taxon_concept_rank space d:authors_part space {
|
38
|
+
def value
|
39
|
+
a.value + " " + b.value + " " + c.apply(d)
|
40
|
+
end
|
41
|
+
|
42
|
+
def canonical
|
43
|
+
a.canonical
|
44
|
+
end
|
45
|
+
|
46
|
+
def details
|
47
|
+
a.details.merge(b.details).merge(c.details(d)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ').strip})
|
48
|
+
end
|
49
|
+
}
|
50
|
+
/
|
51
|
+
space a:name_part space b:taxon_concept_rank space c:authors_part space {
|
52
|
+
def value
|
53
|
+
a.value + " " + b.apply(c)
|
54
|
+
end
|
55
|
+
|
56
|
+
def canonical
|
57
|
+
a.canonical
|
58
|
+
end
|
59
|
+
|
60
|
+
def details
|
61
|
+
a.details.merge(b.details(c)).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
|
62
|
+
end
|
63
|
+
}
|
64
|
+
/
|
37
65
|
space a:name_part space b:authors_part space c:status_part space {
|
38
66
|
def value
|
39
67
|
a.value + " " + b.value + " " + c.value
|
@@ -42,7 +70,7 @@ grammar ScientificName
|
|
42
70
|
a.canonical
|
43
71
|
end
|
44
72
|
def details
|
45
|
-
a.details.merge(b.details).merge(c.details)
|
73
|
+
a.details.merge(b.details).merge(c.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ').strip})
|
46
74
|
end
|
47
75
|
}
|
48
76
|
/
|
@@ -54,7 +82,7 @@ grammar ScientificName
|
|
54
82
|
a.canonical
|
55
83
|
end
|
56
84
|
def details
|
57
|
-
a.details.merge(b.details)
|
85
|
+
a.details.merge(b.details).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
|
58
86
|
end
|
59
87
|
}
|
60
88
|
/
|
@@ -68,7 +96,7 @@ grammar ScientificName
|
|
68
96
|
end
|
69
97
|
|
70
98
|
def details
|
71
|
-
a.details.merge(b.details).merge({:is_valid => false})
|
99
|
+
a.details.merge(b.details).merge({:is_valid => false}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => b.text_value.gsub(/\s{2,}/, ' ')})
|
72
100
|
end
|
73
101
|
}
|
74
102
|
/
|
@@ -104,25 +132,25 @@ grammar ScientificName
|
|
104
132
|
rule name_part_authors_mix
|
105
133
|
a:species_name space b:authors_part space c:subspecies_name space d:authors_part {
|
106
134
|
def value
|
107
|
-
a.value + " " + b.value + " " + c.value + " " + d.value
|
135
|
+
(a.value + " " + b.value + " " + c.value + " " + d.value).gsub(/\s+/,' ')
|
108
136
|
end
|
109
137
|
def canonical
|
110
|
-
a.canonical + " " + c.canonical
|
138
|
+
(a.canonical + " " + c.canonical).gsub(/\s+/,' ')
|
111
139
|
end
|
112
140
|
def details
|
113
|
-
a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details})
|
141
|
+
a.details.merge(c.details).merge({:species_authors=>b.details, :subspecies_authors => d.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value + " " + d.text_value).gsub(/\s{2,}/, ' ')})
|
114
142
|
end
|
115
143
|
}
|
116
144
|
/
|
117
145
|
a:species_name space b:authors_part space c:subspecies_name {
|
118
146
|
def value
|
119
|
-
a.value + " " + b.value + " " + c.value
|
147
|
+
(a.value + " " + b.value + " " + c.value).gsub(/\s+/,' ')
|
120
148
|
end
|
121
149
|
def canonical
|
122
|
-
a.canonical + " " + c.canonical
|
150
|
+
(a.canonical + " " + c.canonical).gsub(/\s+/,' ')
|
123
151
|
end
|
124
152
|
def details
|
125
|
-
a.details.merge(c.details).merge({:species_authors=>b.details})
|
153
|
+
a.details.merge(c.details).merge({:species_authors=>b.details}).merge({:name_part_verbatim => a.text_value, :auth_part_verbatim => (b.text_value + " " + c.text_value).gsub(/\s{2,}/, ' ')})
|
126
154
|
end
|
127
155
|
}
|
128
156
|
end
|
@@ -180,7 +208,7 @@ grammar ScientificName
|
|
180
208
|
end
|
181
209
|
|
182
210
|
rule original_authors_names_full
|
183
|
-
|
211
|
+
left_bracket space a:authors_names space right_bracket space [,]? space b:year {
|
184
212
|
def value
|
185
213
|
"(" + a.value + " " + b.value + ")"
|
186
214
|
end
|
@@ -189,7 +217,25 @@ grammar ScientificName
|
|
189
217
|
end
|
190
218
|
}
|
191
219
|
/
|
192
|
-
|
220
|
+
left_bracket space a:authors_names_full space right_bracket {
|
221
|
+
def value
|
222
|
+
"(" + a.value + ")"
|
223
|
+
end
|
224
|
+
def details
|
225
|
+
{:orig_authors => a.details[:authors]}
|
226
|
+
end
|
227
|
+
}
|
228
|
+
/
|
229
|
+
"[" space a:authors_names_full space "]" {
|
230
|
+
def value
|
231
|
+
"(" + a.value + ")"
|
232
|
+
end
|
233
|
+
def details
|
234
|
+
{:orig_authors => a.details[:authors]}
|
235
|
+
end
|
236
|
+
}
|
237
|
+
/
|
238
|
+
left_bracket space a:unknown_auth space right_bracket {
|
193
239
|
def value
|
194
240
|
"(" + a.value + ")"
|
195
241
|
end
|
@@ -197,10 +243,19 @@ grammar ScientificName
|
|
197
243
|
{:orig_authors => a.details[:authors]}
|
198
244
|
end
|
199
245
|
}
|
246
|
+
/
|
247
|
+
left_bracket space "?" space right_bracket {
|
248
|
+
def value
|
249
|
+
"(?)"
|
250
|
+
end
|
251
|
+
def details
|
252
|
+
{:orig_authors => "unknown"}
|
253
|
+
end
|
254
|
+
}
|
200
255
|
end
|
201
256
|
|
202
257
|
rule original_authors_revised_name
|
203
|
-
|
258
|
+
left_bracket space a:authors_revised_name space right_bracket {
|
204
259
|
def value
|
205
260
|
"(" + a.value + ")"
|
206
261
|
end
|
@@ -233,6 +288,19 @@ grammar ScientificName
|
|
233
288
|
}
|
234
289
|
/
|
235
290
|
authors_names
|
291
|
+
/
|
292
|
+
unknown_auth
|
293
|
+
end
|
294
|
+
|
295
|
+
rule unknown_auth
|
296
|
+
("auct."/"hort."/"anon."/"ht.") {
|
297
|
+
def value
|
298
|
+
text_value
|
299
|
+
end
|
300
|
+
def details
|
301
|
+
{:authors => "unknown"}
|
302
|
+
end
|
303
|
+
}
|
236
304
|
end
|
237
305
|
|
238
306
|
rule ex_sep
|
@@ -282,7 +350,14 @@ grammar ScientificName
|
|
282
350
|
end
|
283
351
|
|
284
352
|
rule author_word
|
285
|
-
"A S. Xu"
|
353
|
+
"A S. Xu" {
|
354
|
+
def value
|
355
|
+
text_value.strip
|
356
|
+
end
|
357
|
+
def details
|
358
|
+
{:authors => {:names => [value]}}
|
359
|
+
end
|
360
|
+
}
|
286
361
|
/
|
287
362
|
("anon."/"f."/"bis"/"arg."/"da"/"der"/"den"/"de"/"du"/"la"/"ter"/"van"/"et al.\{\?\}"/"et al.") {
|
288
363
|
def value
|
@@ -409,7 +484,7 @@ grammar ScientificName
|
|
409
484
|
end
|
410
485
|
|
411
486
|
rule rank
|
412
|
-
("
|
487
|
+
("morph."/"f.sp."/"B"/"ssp."/"mut."/"pseudovar."/"sect."/"ser."/"var."/"subvar."/ "[var.]" /"subsp."/"subf."/"race"/"α"
|
413
488
|
/"ββ"/"β"/"γ"/"δ"/"ε"/"φ"/"θ"/"μ"/"a."/"b."/"c."/"d."/"e."/"g."/"k."/"****"/"**"/"*")
|
414
489
|
{
|
415
490
|
def value
|
@@ -425,6 +500,26 @@ grammar ScientificName
|
|
425
500
|
{:subspecies => [{:rank => text_value, :value => (a.value rescue nil)}]}
|
426
501
|
end
|
427
502
|
}
|
503
|
+
/
|
504
|
+
rank_forma
|
505
|
+
end
|
506
|
+
|
507
|
+
rule rank_forma
|
508
|
+
("forma"/"form."/"fo."/"f.")
|
509
|
+
{
|
510
|
+
def value
|
511
|
+
"f."
|
512
|
+
end
|
513
|
+
def apply(a)
|
514
|
+
" " + value + " " + a.value
|
515
|
+
end
|
516
|
+
def canonical(a)
|
517
|
+
" " + a.value
|
518
|
+
end
|
519
|
+
def details(a = nil)
|
520
|
+
{:subspecies => [{:rank => value, :value => (a.value rescue nil)}]}
|
521
|
+
end
|
522
|
+
}
|
428
523
|
end
|
429
524
|
|
430
525
|
rule species_name
|
@@ -501,8 +596,42 @@ grammar ScientificName
|
|
501
596
|
}
|
502
597
|
end
|
503
598
|
|
599
|
+
rule taxon_concept_rank
|
600
|
+
"sec." {
|
601
|
+
def value
|
602
|
+
"sec."
|
603
|
+
end
|
604
|
+
def apply(a)
|
605
|
+
" " + value + " " + a.value
|
606
|
+
end
|
607
|
+
def details(a = nil)
|
608
|
+
{:taxon_concept => a.details}
|
609
|
+
end
|
610
|
+
}
|
611
|
+
end
|
612
|
+
|
613
|
+
# "subsect."/"subtrib."/"subgen."/"trib."/
|
614
|
+
rule genus_rank
|
615
|
+
("subsect."/"subtrib."/"subgen."/"trib.")
|
616
|
+
{
|
617
|
+
def value
|
618
|
+
text_value.strip
|
619
|
+
end
|
620
|
+
def apply(a)
|
621
|
+
" " + text_value + " " + a.value
|
622
|
+
end
|
623
|
+
def canonical(a)
|
624
|
+
" " + a.value
|
625
|
+
end
|
626
|
+
def details(a = nil)
|
627
|
+
{:subgenus => [{:rank => text_value, :value => (a.value rescue nil)}]}
|
628
|
+
end
|
629
|
+
}
|
630
|
+
end
|
631
|
+
|
632
|
+
|
504
633
|
rule cap_latin_word
|
505
|
-
a:[A-
|
634
|
+
a:[A-Zή] b:latin_word "?" {
|
506
635
|
def value
|
507
636
|
a.text_value + b.value
|
508
637
|
end
|
@@ -515,10 +644,38 @@ grammar ScientificName
|
|
515
644
|
{:uninomial => value}
|
516
645
|
end
|
517
646
|
}
|
647
|
+
/
|
648
|
+
a:[A-Zή] b:latin_word {
|
649
|
+
def value
|
650
|
+
a.text_value + b.value
|
651
|
+
end
|
652
|
+
|
653
|
+
def canonical
|
654
|
+
value
|
655
|
+
end
|
656
|
+
|
657
|
+
def details
|
658
|
+
{:uninomial => value}
|
659
|
+
end
|
660
|
+
}
|
661
|
+
/
|
662
|
+
("Ca"/"Ea"/"Ge"/"Ia"/"Io"/"Io"/"Ix"/"Lo"/"Oa"/"Ra"/"Ty"/"Ua"/"Aa"/"Ja"/"Zu"/"La"/"Qu"/"As"/"Ba") {
|
663
|
+
def value
|
664
|
+
text_value
|
665
|
+
end
|
666
|
+
|
667
|
+
def canonical
|
668
|
+
value
|
669
|
+
end
|
670
|
+
|
671
|
+
def details
|
672
|
+
{:uninomial => value}
|
673
|
+
end
|
674
|
+
}
|
518
675
|
end
|
519
676
|
|
520
677
|
rule latin_word
|
521
|
-
a:[a-
|
678
|
+
a:[a-zëüäöïé] b:full_name_letters {
|
522
679
|
def value
|
523
680
|
a.text_value + b.value
|
524
681
|
end
|
@@ -560,7 +717,7 @@ grammar ScientificName
|
|
560
717
|
end
|
561
718
|
|
562
719
|
rule valid_name_letters
|
563
|
-
[a-z
|
720
|
+
[a-z\-ëüäöïé]+ {
|
564
721
|
def value
|
565
722
|
text_value
|
566
723
|
end
|
@@ -618,6 +775,28 @@ grammar ScientificName
|
|
618
775
|
end
|
619
776
|
}
|
620
777
|
end
|
778
|
+
|
779
|
+
# Next two rles only for ( (author) )
|
780
|
+
# doesn't touch parenthesis inside another one like (bla-bla-bla1 (bla-bla-bla2))
|
781
|
+
|
782
|
+
rule left_bracket
|
783
|
+
"( ("/"("
|
784
|
+
{
|
785
|
+
def value
|
786
|
+
"("
|
787
|
+
end
|
788
|
+
}
|
789
|
+
end
|
790
|
+
|
791
|
+
rule right_bracket
|
792
|
+
") )"/")"
|
793
|
+
{
|
794
|
+
def value
|
795
|
+
")"
|
796
|
+
end
|
797
|
+
}
|
798
|
+
end
|
799
|
+
|
621
800
|
|
622
801
|
rule space
|
623
802
|
[\s]* {
|
@@ -0,0 +1,473 @@
|
|
1
|
+
module ScientificNameDirty
|
2
|
+
include Treetop::Runtime
|
3
|
+
|
4
|
+
def root
|
5
|
+
@root || :composite_scientific_name
|
6
|
+
end
|
7
|
+
|
8
|
+
include ScientificNameClean
|
9
|
+
|
10
|
+
def _nt_composite_scientific_name
|
11
|
+
start_index = index
|
12
|
+
if node_cache[:composite_scientific_name].has_key?(index)
|
13
|
+
cached = node_cache[:composite_scientific_name][index]
|
14
|
+
@index = cached.interval.end if cached
|
15
|
+
return cached
|
16
|
+
end
|
17
|
+
|
18
|
+
r0 = super
|
19
|
+
|
20
|
+
node_cache[:composite_scientific_name][start_index] = r0
|
21
|
+
|
22
|
+
return r0
|
23
|
+
end
|
24
|
+
|
25
|
+
module Year0
|
26
|
+
def a
|
27
|
+
elements[0]
|
28
|
+
end
|
29
|
+
|
30
|
+
def space
|
31
|
+
elements[1]
|
32
|
+
end
|
33
|
+
|
34
|
+
def b
|
35
|
+
elements[2]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module Year1
|
40
|
+
|
41
|
+
def value
|
42
|
+
a.text_value + " " + b.text_value
|
43
|
+
end
|
44
|
+
def details
|
45
|
+
{:ambiguous_year => value}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
module Year2
|
50
|
+
def a
|
51
|
+
elements[0]
|
52
|
+
end
|
53
|
+
|
54
|
+
def page_number
|
55
|
+
elements[1]
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
module Year3
|
60
|
+
|
61
|
+
def value
|
62
|
+
a.text_value
|
63
|
+
end
|
64
|
+
def details
|
65
|
+
{:year => value}
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def _nt_year
|
70
|
+
start_index = index
|
71
|
+
if node_cache[:year].has_key?(index)
|
72
|
+
cached = node_cache[:year][index]
|
73
|
+
@index = cached.interval.end if cached
|
74
|
+
return cached
|
75
|
+
end
|
76
|
+
|
77
|
+
i0 = index
|
78
|
+
i1, s1 = index, []
|
79
|
+
s2, i2 = [], index
|
80
|
+
loop do
|
81
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
82
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
83
|
+
@index += 1
|
84
|
+
else
|
85
|
+
r3 = nil
|
86
|
+
end
|
87
|
+
if r3
|
88
|
+
s2 << r3
|
89
|
+
else
|
90
|
+
break
|
91
|
+
end
|
92
|
+
end
|
93
|
+
if s2.empty?
|
94
|
+
self.index = i2
|
95
|
+
r2 = nil
|
96
|
+
else
|
97
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
98
|
+
end
|
99
|
+
s1 << r2
|
100
|
+
if r2
|
101
|
+
r4 = _nt_space
|
102
|
+
s1 << r4
|
103
|
+
if r4
|
104
|
+
r5 = _nt_approximate_year
|
105
|
+
s1 << r5
|
106
|
+
end
|
107
|
+
end
|
108
|
+
if s1.last
|
109
|
+
r1 = instantiate_node(SyntaxNode,input, i1...index, s1)
|
110
|
+
r1.extend(Year0)
|
111
|
+
r1.extend(Year1)
|
112
|
+
else
|
113
|
+
self.index = i1
|
114
|
+
r1 = nil
|
115
|
+
end
|
116
|
+
if r1
|
117
|
+
r0 = r1
|
118
|
+
else
|
119
|
+
i6, s6 = index, []
|
120
|
+
s7, i7 = [], index
|
121
|
+
loop do
|
122
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
123
|
+
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
124
|
+
@index += 1
|
125
|
+
else
|
126
|
+
r8 = nil
|
127
|
+
end
|
128
|
+
if r8
|
129
|
+
s7 << r8
|
130
|
+
else
|
131
|
+
break
|
132
|
+
end
|
133
|
+
end
|
134
|
+
if s7.empty?
|
135
|
+
self.index = i7
|
136
|
+
r7 = nil
|
137
|
+
else
|
138
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
139
|
+
end
|
140
|
+
s6 << r7
|
141
|
+
if r7
|
142
|
+
r9 = _nt_page_number
|
143
|
+
s6 << r9
|
144
|
+
end
|
145
|
+
if s6.last
|
146
|
+
r6 = instantiate_node(SyntaxNode,input, i6...index, s6)
|
147
|
+
r6.extend(Year2)
|
148
|
+
r6.extend(Year3)
|
149
|
+
else
|
150
|
+
self.index = i6
|
151
|
+
r6 = nil
|
152
|
+
end
|
153
|
+
if r6
|
154
|
+
r0 = r6
|
155
|
+
else
|
156
|
+
r10 = _nt_double_year
|
157
|
+
if r10
|
158
|
+
r0 = r10
|
159
|
+
else
|
160
|
+
r11 = _nt_approximate_year
|
161
|
+
if r11
|
162
|
+
r0 = r11
|
163
|
+
else
|
164
|
+
r12 = super
|
165
|
+
if r12
|
166
|
+
r0 = r12
|
167
|
+
else
|
168
|
+
self.index = i0
|
169
|
+
r0 = nil
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
node_cache[:year][start_index] = r0
|
177
|
+
|
178
|
+
return r0
|
179
|
+
end
|
180
|
+
|
181
|
+
module ApproximateYear0
|
182
|
+
end
|
183
|
+
|
184
|
+
module ApproximateYear1
|
185
|
+
def space
|
186
|
+
elements[1]
|
187
|
+
end
|
188
|
+
|
189
|
+
def a
|
190
|
+
elements[2]
|
191
|
+
end
|
192
|
+
|
193
|
+
def space
|
194
|
+
elements[3]
|
195
|
+
end
|
196
|
+
|
197
|
+
end
|
198
|
+
|
199
|
+
module ApproximateYear2
|
200
|
+
|
201
|
+
def value
|
202
|
+
"(" + a.text_value + ")"
|
203
|
+
end
|
204
|
+
def details
|
205
|
+
{:approximate_year => value}
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def _nt_approximate_year
|
210
|
+
start_index = index
|
211
|
+
if node_cache[:approximate_year].has_key?(index)
|
212
|
+
cached = node_cache[:approximate_year][index]
|
213
|
+
@index = cached.interval.end if cached
|
214
|
+
return cached
|
215
|
+
end
|
216
|
+
|
217
|
+
i0, s0 = index, []
|
218
|
+
if input.index("[", index) == index
|
219
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
220
|
+
@index += 1
|
221
|
+
else
|
222
|
+
terminal_parse_failure("[")
|
223
|
+
r1 = nil
|
224
|
+
end
|
225
|
+
s0 << r1
|
226
|
+
if r1
|
227
|
+
r2 = _nt_space
|
228
|
+
s0 << r2
|
229
|
+
if r2
|
230
|
+
i3, s3 = index, []
|
231
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
232
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
233
|
+
@index += 1
|
234
|
+
else
|
235
|
+
r4 = nil
|
236
|
+
end
|
237
|
+
s3 << r4
|
238
|
+
if r4
|
239
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
240
|
+
r5 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
241
|
+
@index += 1
|
242
|
+
else
|
243
|
+
r5 = nil
|
244
|
+
end
|
245
|
+
s3 << r5
|
246
|
+
if r5
|
247
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
248
|
+
r6 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
249
|
+
@index += 1
|
250
|
+
else
|
251
|
+
r6 = nil
|
252
|
+
end
|
253
|
+
s3 << r6
|
254
|
+
if r6
|
255
|
+
s7, i7 = [], index
|
256
|
+
loop do
|
257
|
+
if input.index(Regexp.new('[\\d\\?]'), index) == index
|
258
|
+
r8 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
259
|
+
@index += 1
|
260
|
+
else
|
261
|
+
r8 = nil
|
262
|
+
end
|
263
|
+
if r8
|
264
|
+
s7 << r8
|
265
|
+
else
|
266
|
+
break
|
267
|
+
end
|
268
|
+
end
|
269
|
+
if s7.empty?
|
270
|
+
self.index = i7
|
271
|
+
r7 = nil
|
272
|
+
else
|
273
|
+
r7 = instantiate_node(SyntaxNode,input, i7...index, s7)
|
274
|
+
end
|
275
|
+
s3 << r7
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
if s3.last
|
280
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
281
|
+
r3.extend(ApproximateYear0)
|
282
|
+
else
|
283
|
+
self.index = i3
|
284
|
+
r3 = nil
|
285
|
+
end
|
286
|
+
s0 << r3
|
287
|
+
if r3
|
288
|
+
r9 = _nt_space
|
289
|
+
s0 << r9
|
290
|
+
if r9
|
291
|
+
s10, i10 = [], index
|
292
|
+
loop do
|
293
|
+
if input.index("]", index) == index
|
294
|
+
r11 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
295
|
+
@index += 1
|
296
|
+
else
|
297
|
+
terminal_parse_failure("]")
|
298
|
+
r11 = nil
|
299
|
+
end
|
300
|
+
if r11
|
301
|
+
s10 << r11
|
302
|
+
else
|
303
|
+
break
|
304
|
+
end
|
305
|
+
end
|
306
|
+
if s10.empty?
|
307
|
+
self.index = i10
|
308
|
+
r10 = nil
|
309
|
+
else
|
310
|
+
r10 = instantiate_node(SyntaxNode,input, i10...index, s10)
|
311
|
+
end
|
312
|
+
s0 << r10
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
if s0.last
|
318
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
319
|
+
r0.extend(ApproximateYear1)
|
320
|
+
r0.extend(ApproximateYear2)
|
321
|
+
else
|
322
|
+
self.index = i0
|
323
|
+
r0 = nil
|
324
|
+
end
|
325
|
+
|
326
|
+
node_cache[:approximate_year][start_index] = r0
|
327
|
+
|
328
|
+
return r0
|
329
|
+
end
|
330
|
+
|
331
|
+
module DoubleYear0
|
332
|
+
end
|
333
|
+
|
334
|
+
module DoubleYear1
|
335
|
+
|
336
|
+
def value
|
337
|
+
text_value
|
338
|
+
end
|
339
|
+
def details
|
340
|
+
{:year => value}
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
def _nt_double_year
|
345
|
+
start_index = index
|
346
|
+
if node_cache[:double_year].has_key?(index)
|
347
|
+
cached = node_cache[:double_year][index]
|
348
|
+
@index = cached.interval.end if cached
|
349
|
+
return cached
|
350
|
+
end
|
351
|
+
|
352
|
+
i0, s0 = index, []
|
353
|
+
if input.index(Regexp.new('[0-9]'), index) == index
|
354
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
355
|
+
@index += 1
|
356
|
+
else
|
357
|
+
r1 = nil
|
358
|
+
end
|
359
|
+
s0 << r1
|
360
|
+
if r1
|
361
|
+
s2, i2 = [], index
|
362
|
+
loop do
|
363
|
+
if input.index(Regexp.new('[0-9A-Za-z\\?\\-]'), index) == index
|
364
|
+
r3 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
365
|
+
@index += 1
|
366
|
+
else
|
367
|
+
r3 = nil
|
368
|
+
end
|
369
|
+
if r3
|
370
|
+
s2 << r3
|
371
|
+
else
|
372
|
+
break
|
373
|
+
end
|
374
|
+
end
|
375
|
+
if s2.empty?
|
376
|
+
self.index = i2
|
377
|
+
r2 = nil
|
378
|
+
else
|
379
|
+
r2 = instantiate_node(SyntaxNode,input, i2...index, s2)
|
380
|
+
end
|
381
|
+
s0 << r2
|
382
|
+
end
|
383
|
+
if s0.last
|
384
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
385
|
+
r0.extend(DoubleYear0)
|
386
|
+
r0.extend(DoubleYear1)
|
387
|
+
else
|
388
|
+
self.index = i0
|
389
|
+
r0 = nil
|
390
|
+
end
|
391
|
+
|
392
|
+
node_cache[:double_year][start_index] = r0
|
393
|
+
|
394
|
+
return r0
|
395
|
+
end
|
396
|
+
|
397
|
+
module PageNumber0
|
398
|
+
def space
|
399
|
+
elements[1]
|
400
|
+
end
|
401
|
+
|
402
|
+
end
|
403
|
+
|
404
|
+
module PageNumber1
|
405
|
+
|
406
|
+
def value
|
407
|
+
end
|
408
|
+
end
|
409
|
+
|
410
|
+
def _nt_page_number
|
411
|
+
start_index = index
|
412
|
+
if node_cache[:page_number].has_key?(index)
|
413
|
+
cached = node_cache[:page_number][index]
|
414
|
+
@index = cached.interval.end if cached
|
415
|
+
return cached
|
416
|
+
end
|
417
|
+
|
418
|
+
i0, s0 = index, []
|
419
|
+
if input.index(":", index) == index
|
420
|
+
r1 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
421
|
+
@index += 1
|
422
|
+
else
|
423
|
+
terminal_parse_failure(":")
|
424
|
+
r1 = nil
|
425
|
+
end
|
426
|
+
s0 << r1
|
427
|
+
if r1
|
428
|
+
r2 = _nt_space
|
429
|
+
s0 << r2
|
430
|
+
if r2
|
431
|
+
s3, i3 = [], index
|
432
|
+
loop do
|
433
|
+
if input.index(Regexp.new('[\\d]'), index) == index
|
434
|
+
r4 = instantiate_node(SyntaxNode,input, index...(index + 1))
|
435
|
+
@index += 1
|
436
|
+
else
|
437
|
+
r4 = nil
|
438
|
+
end
|
439
|
+
if r4
|
440
|
+
s3 << r4
|
441
|
+
else
|
442
|
+
break
|
443
|
+
end
|
444
|
+
end
|
445
|
+
if s3.empty?
|
446
|
+
self.index = i3
|
447
|
+
r3 = nil
|
448
|
+
else
|
449
|
+
r3 = instantiate_node(SyntaxNode,input, i3...index, s3)
|
450
|
+
end
|
451
|
+
s0 << r3
|
452
|
+
end
|
453
|
+
end
|
454
|
+
if s0.last
|
455
|
+
r0 = instantiate_node(SyntaxNode,input, i0...index, s0)
|
456
|
+
r0.extend(PageNumber0)
|
457
|
+
r0.extend(PageNumber1)
|
458
|
+
else
|
459
|
+
self.index = i0
|
460
|
+
r0 = nil
|
461
|
+
end
|
462
|
+
|
463
|
+
node_cache[:page_number][start_index] = r0
|
464
|
+
|
465
|
+
return r0
|
466
|
+
end
|
467
|
+
|
468
|
+
end
|
469
|
+
|
470
|
+
class ScientificNameDirtyParser < Treetop::Runtime::CompiledParser
|
471
|
+
include ScientificNameDirty
|
472
|
+
end
|
473
|
+
|