peggy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/Copy of builder.rb +363 -0
- data/lib/Copy of peg.rb +135 -0
- data/lib/ast.rb +86 -0
- data/lib/builder.rb +359 -0
- data/lib/parser.rb +203 -0
- data/lib/peg.rb +68 -0
- data/lib/peggy.rb +50 -0
- data/test/test_builder.rb +72 -0
- data/test/test_parser.rb +119 -0
- data/test/test_peg.rb +54 -0
- data/test/test_peggy.rb +66 -0
- data/test/tests.rb +6 -0
- metadata +61 -0
data/lib/ast.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
module Peggy
|
2
|
+
class Node
|
3
|
+
attr_accessor :name, :first, :next, :parent, :from, :to
|
4
|
+
|
5
|
+
def initialize name
|
6
|
+
self.name = name
|
7
|
+
end
|
8
|
+
|
9
|
+
def << child
|
10
|
+
child.parent = self
|
11
|
+
if first
|
12
|
+
last.next = child
|
13
|
+
else
|
14
|
+
first = child
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def each
|
19
|
+
child = first
|
20
|
+
while child
|
21
|
+
yield child
|
22
|
+
child = child.next
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def format tabs
|
27
|
+
result = "#{tabs}#{self}\n"
|
28
|
+
tabs << ' '
|
29
|
+
node = first
|
30
|
+
while node
|
31
|
+
"#{tabs}#{node.format tabs}"
|
32
|
+
end
|
33
|
+
tabs = tabs[0..-3]
|
34
|
+
end
|
35
|
+
|
36
|
+
def last
|
37
|
+
node = first
|
38
|
+
while (n2 = node.next)
|
39
|
+
node = n2
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s source=nil
|
44
|
+
source ? source[from...to] : "#{name}[#{from}...#{to}]"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class AST
|
49
|
+
|
50
|
+
attr_reader :root
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@root = Node.new
|
54
|
+
build result, index, @root
|
55
|
+
end
|
56
|
+
|
57
|
+
def self::build parser, index = 0
|
58
|
+
AST.new.build_one results, index, root
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def build_one results, index, parent
|
64
|
+
row = results[index]
|
65
|
+
row = results[index += 1] until row
|
66
|
+
top = parent
|
67
|
+
results[:found_order].reverse_each do |name|
|
68
|
+
node = Node.new name
|
69
|
+
node.from = index
|
70
|
+
node.to = row[name]
|
71
|
+
build_rest results, node
|
72
|
+
top << node
|
73
|
+
node = top
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_rest results, previous
|
79
|
+
build_one results, previous.to, previous.parent
|
80
|
+
end
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
@root.format ''
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end # Peggy
|
data/lib/builder.rb
ADDED
@@ -0,0 +1,359 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parser'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'parser')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Base syntax element class.
|
8
|
+
class Element
|
9
|
+
# Create an element.
|
10
|
+
def self::build *args
|
11
|
+
new *args
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test to see if there is a match of this element at the current index.
|
15
|
+
# Return's the index following if match is found, or NO_MATCH if not
|
16
|
+
def match parser, index
|
17
|
+
raise "Must override match"
|
18
|
+
end
|
19
|
+
|
20
|
+
def report index
|
21
|
+
# puts "#{to_s} #{index}"
|
22
|
+
index
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# An element that matches a sequence of elements. All must match for the sequence to match.
|
27
|
+
class Sequence < Element
|
28
|
+
# Add a child element.
|
29
|
+
def add element
|
30
|
+
@list = [] unless @list
|
31
|
+
@list << element
|
32
|
+
end
|
33
|
+
|
34
|
+
# Synonym for add(element)
|
35
|
+
alias :'<<' :add
|
36
|
+
|
37
|
+
# Reference a child by index.
|
38
|
+
def [] index
|
39
|
+
@list[index]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child iterator.
|
43
|
+
def each &blk
|
44
|
+
@list.each &blk
|
45
|
+
end
|
46
|
+
|
47
|
+
# Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
|
48
|
+
# returns the end index of the last.
|
49
|
+
def match parser, index
|
50
|
+
raise "no children added to sequence" unless @list
|
51
|
+
each do |element|
|
52
|
+
index = element.match parser, index
|
53
|
+
return NO_MATCH unless index
|
54
|
+
end
|
55
|
+
report index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# An element which matches any one of its children. The children are tested in order. The first
|
60
|
+
# to match wins.
|
61
|
+
class Alternatives < Sequence
|
62
|
+
# Match any one of the children. The children are tried in order. The first to match wins.
|
63
|
+
# The result is the end index of the first matching child. If none match this returns NO_MATCH.
|
64
|
+
def match parser, index
|
65
|
+
raise "no children added to alternate" unless @list
|
66
|
+
each do |element|
|
67
|
+
found = element.match parser, index
|
68
|
+
return report(found) if found
|
69
|
+
end
|
70
|
+
report NO_MATCH
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# An element which tries its single child multiple times. It is greedy, meaning it will continue
|
75
|
+
# to match as long as possible, unless the range specifies a maximum number of matches.
|
76
|
+
class Multiple < Element
|
77
|
+
# A big number
|
78
|
+
MANY = 32767
|
79
|
+
# The minimum and maximum number of tries
|
80
|
+
attr_accessor :range
|
81
|
+
# The single child
|
82
|
+
attr_accessor :child
|
83
|
+
|
84
|
+
# Init the range
|
85
|
+
def initialize range
|
86
|
+
@range = range
|
87
|
+
end
|
88
|
+
|
89
|
+
# synonym for child=(element)
|
90
|
+
alias :'<<' :'child='
|
91
|
+
|
92
|
+
# Matches the child multiple times. The range specifies the least and most number of matches.
|
93
|
+
# If the number of matches is less than the minimim of the range then NO_MATCH is returned.
|
94
|
+
# If equal or more than the minimim then the end index of the last match is returned.
|
95
|
+
def match parser, index
|
96
|
+
raise "multiple element child not set" unless child
|
97
|
+
raise "multiple element range not set" unless range
|
98
|
+
count = 0
|
99
|
+
while count < range.last
|
100
|
+
found = child.match parser, index
|
101
|
+
break unless found
|
102
|
+
index = found
|
103
|
+
count += 1
|
104
|
+
end
|
105
|
+
report range === count ? index : NO_MATCH
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Matcher of 0 or more times.
|
110
|
+
class AnyNumber < Multiple
|
111
|
+
def initialize
|
112
|
+
super 0..MANY
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Matcher of 1 or more times.
|
117
|
+
class AtLeastOne < Multiple
|
118
|
+
def initialize
|
119
|
+
super 1..MANY
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Matcher of 0 or 1 time.
|
124
|
+
class Optional < Multiple
|
125
|
+
def initialize
|
126
|
+
super 0..1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# An element which tries its single child but does not advance the index if found.
|
131
|
+
# If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
|
132
|
+
class Positive < Element
|
133
|
+
# The single child
|
134
|
+
attr_accessor :child
|
135
|
+
|
136
|
+
# synonym for child=(element)
|
137
|
+
alias :'<<' :'child='
|
138
|
+
|
139
|
+
# Matches the child once. If found the original index is returned.
|
140
|
+
# If not found NO_MATCH is returned.
|
141
|
+
def match parser, index
|
142
|
+
raise "positive element child not set" unless child
|
143
|
+
found = child.match parser, index
|
144
|
+
found ? index : NO_MATCH
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# An element which tries its single child but does not advance the index if not found.
|
149
|
+
# If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
|
150
|
+
class Negative < Positive
|
151
|
+
def match parser, index
|
152
|
+
raise "negative element child not set" unless child
|
153
|
+
found = child.match parser, index
|
154
|
+
found ? NO_MATCH : index
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Match another production in the grammar.
|
159
|
+
class Reference < Element
|
160
|
+
# The name of the production to lookup and match.
|
161
|
+
attr_reader :name
|
162
|
+
|
163
|
+
# Init the name
|
164
|
+
def initialize name=nil
|
165
|
+
self.name = name
|
166
|
+
end
|
167
|
+
|
168
|
+
# Set the name of production to match.
|
169
|
+
def name= value
|
170
|
+
@name = value.to_sym
|
171
|
+
end
|
172
|
+
|
173
|
+
# Match the entire production from the parser grammar. If it matches
|
174
|
+
# the end index is returned. If not, NO_MATCH is returned.
|
175
|
+
def match parser, index
|
176
|
+
raise "reference name not set" unless name
|
177
|
+
parser.match? name, index
|
178
|
+
end
|
179
|
+
|
180
|
+
def to_s
|
181
|
+
name
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Matcher of a grammar production. The one and only child defines the production.
|
186
|
+
class Production < Reference
|
187
|
+
# The production definition.
|
188
|
+
attr_accessor :child
|
189
|
+
|
190
|
+
# Init the name and child.
|
191
|
+
def initialize name=nil, child=nil
|
192
|
+
super name
|
193
|
+
@child = child
|
194
|
+
end
|
195
|
+
|
196
|
+
# Synonym of child=(element)
|
197
|
+
alias :'<<' :'child='
|
198
|
+
|
199
|
+
# Match the production one time. If it matches the end index is returned. If not,
|
200
|
+
# NO_MATCH is returned.
|
201
|
+
def match parser, index
|
202
|
+
raise "production name not set" unless name
|
203
|
+
raise "production child not set" unless child
|
204
|
+
report @child.match(parser, index)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Matcher of a literal string or regular expression.
|
209
|
+
class Literal < Element
|
210
|
+
# Value to match.
|
211
|
+
attr_reader :value
|
212
|
+
|
213
|
+
# Init the value.
|
214
|
+
def initialize value=nil
|
215
|
+
@value = value
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set the value to match.
|
219
|
+
def value= literal
|
220
|
+
# Make sure regular expressions check at the beginnig of the string
|
221
|
+
literal = correct_regexp literal if literal.is_a? Regexp
|
222
|
+
@value = literal
|
223
|
+
end
|
224
|
+
|
225
|
+
# Match the literal value. If it matches the end index is returned.
|
226
|
+
# If no, NO_MATCH is returned.
|
227
|
+
def match parser, index
|
228
|
+
report parser.literal?(value, index)
|
229
|
+
end
|
230
|
+
|
231
|
+
def to_s
|
232
|
+
value.inspect
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Parser builder. The built in methods create syntax elements. Any other
|
237
|
+
# method called on this object create references to production, or actual
|
238
|
+
# productions, if called at the top level.
|
239
|
+
# Todo: Change to a class and separate from Parser.
|
240
|
+
class Builder < Parser
|
241
|
+
# Productions to build
|
242
|
+
attr_reader :productions
|
243
|
+
# Current parent being built
|
244
|
+
attr_reader :parent
|
245
|
+
|
246
|
+
def initialize
|
247
|
+
@building = true
|
248
|
+
end
|
249
|
+
|
250
|
+
# Reference a production by its name index.
|
251
|
+
def [] index
|
252
|
+
productions[index]
|
253
|
+
end
|
254
|
+
|
255
|
+
# Create a production if at the top level, or a reference to a production a
|
256
|
+
# production is being built.
|
257
|
+
def method_missing name, *args
|
258
|
+
if @building
|
259
|
+
if @parent
|
260
|
+
ref = Reference.new name
|
261
|
+
@parent << ref
|
262
|
+
elsif block_given?
|
263
|
+
@productions = {} unless @productions
|
264
|
+
prod = Production.new name
|
265
|
+
@parent = prod
|
266
|
+
yield
|
267
|
+
@parent = nil
|
268
|
+
@productions[name] = prod
|
269
|
+
else
|
270
|
+
super
|
271
|
+
end
|
272
|
+
else
|
273
|
+
prod = @productions[name]
|
274
|
+
super unless prod
|
275
|
+
# puts "matching #{name} at #{args.first}"
|
276
|
+
prod.match self, args.first
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
# Add an Alternatives element to the parent.
|
281
|
+
def one &blk
|
282
|
+
build_piece Alternatives, blk
|
283
|
+
end
|
284
|
+
# Synonym for one().
|
285
|
+
alias :alt :one
|
286
|
+
|
287
|
+
def eof *args
|
288
|
+
if args.length == 1 then super args.first
|
289
|
+
else method_missing :eof, *args
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Add an Sequence element to the parent.
|
294
|
+
def each &blk
|
295
|
+
build_piece Sequence, blk
|
296
|
+
end
|
297
|
+
# Synonym for each()
|
298
|
+
alias :seq :each
|
299
|
+
|
300
|
+
# Add an Literal element to the parent.
|
301
|
+
def lit *values
|
302
|
+
if values.size == 1
|
303
|
+
build_piece Literal, nil, values.first
|
304
|
+
else
|
305
|
+
one{
|
306
|
+
for v in values
|
307
|
+
build_piece Literal, nil, v
|
308
|
+
end
|
309
|
+
}
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Add an AnyNumber element to the parent.
|
314
|
+
def many &blk
|
315
|
+
build_piece AnyNumber, blk
|
316
|
+
end
|
317
|
+
|
318
|
+
# Add an Optional element to the parent.
|
319
|
+
def opt &blk
|
320
|
+
build_piece Optional, blk
|
321
|
+
end
|
322
|
+
|
323
|
+
# Add an AtLeastOne element to the parent.
|
324
|
+
def some &blk
|
325
|
+
build_piece AtLeastOne, blk
|
326
|
+
end
|
327
|
+
|
328
|
+
def neg &blk
|
329
|
+
build_piece Negative, blk
|
330
|
+
end
|
331
|
+
|
332
|
+
def pos &blk
|
333
|
+
build_piece Positive, blk
|
334
|
+
end
|
335
|
+
|
336
|
+
def parse? goal, index=0
|
337
|
+
@building = nil
|
338
|
+
super
|
339
|
+
end
|
340
|
+
|
341
|
+
private
|
342
|
+
|
343
|
+
# Add an object of klass to the parent and yield to its block. If
|
344
|
+
# value is specified it is passed to the klass constructor.
|
345
|
+
def build_piece klass, blk=nil, value=nil
|
346
|
+
# puts "building #{klass.name} with #{value.inspect}"
|
347
|
+
elem = value ? klass.new(value) : klass.new
|
348
|
+
@parent << elem
|
349
|
+
if blk
|
350
|
+
parent = @parent
|
351
|
+
@parent = elem
|
352
|
+
blk.call
|
353
|
+
@parent = parent
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
end # Builder
|
358
|
+
|
359
|
+
end # Peggy
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
# Peggy is a packrat parsing engine. Packrat parsers memoize every production so that
|
4
|
+
# parses can happen in linear time. No production needs to be processed more than once for
|
5
|
+
# a given position of the source. See http://pdos.csail.mit.edu/~baford/packrat/ for
|
6
|
+
# more details.
|
7
|
+
#
|
8
|
+
# Peggy also incorporates Parsing Expression Grammar (PEG) as proposed by Bryan Ford,
|
9
|
+
# as one of several input grammars. PEG is a formalized grammar specification needing
|
10
|
+
# no separate lexer/scanner step. See http://pdos.csail.mit.edu/~baford/packrat/popl04/
|
11
|
+
#
|
12
|
+
# As good as packrat parsers are, they have a few limitations. They cannot handle left
|
13
|
+
# recursion of a production, meaning a production cannot reference itself as the first
|
14
|
+
# element in a sequence. Also memoizing of production results means than memory consumption
|
15
|
+
# increasses with the size of the source being parsed. This is not usaly a concern, execpt
|
16
|
+
# when attempting to parse multi-megabyte source files, such as a huge XML database.
|
17
|
+
module Peggy
|
18
|
+
|
19
|
+
# Returned when a production did not match
|
20
|
+
NO_MATCH = false
|
21
|
+
# Used to prevent infinite (left) recursions
|
22
|
+
IN_USE = true
|
23
|
+
|
24
|
+
# Packrat parser class. Note all methods have a trailing exclamation (!) or question
|
25
|
+
# mark (?), or have long names with underscores (_). This is because productions are
|
26
|
+
# methods and we need to avoid name collisions. To use this class you must subclass
|
27
|
+
# Parser and provide your productions as methods. Your productions must call match?
|
28
|
+
# or one of the protected convenience routines to perform parsing. Productions must
|
29
|
+
# never call another production directly, or results will not get memoized and you
|
30
|
+
# will slow down your parse conciderably, and possibly risk getting into an infinite
|
31
|
+
# recursion (until the stack blows its top). Note, as a conveience in writting
|
32
|
+
# productions, you can call any match? function multiple times, passing each returned
|
33
|
+
# index, such as in a sequence, without checking the results of each production.
|
34
|
+
class Parser
|
35
|
+
|
36
|
+
# Tells parser to print intermediate results if set.
|
37
|
+
attr_accessor :debug_flag
|
38
|
+
|
39
|
+
# The source to parse, can be set prior to calling parse!().
|
40
|
+
attr_accessor :source_text
|
41
|
+
|
42
|
+
# The results of the parse. A hash (keys of indexs) of hashes (keys of production
|
43
|
+
# symbols and values of end indexes.
|
44
|
+
attr_reader :parse_results
|
45
|
+
|
46
|
+
# The productions to ignore.
|
47
|
+
attr_accessor :ignore_productions
|
48
|
+
|
49
|
+
# Return a range (or character) of the source_text.
|
50
|
+
def [] range
|
51
|
+
raise "source_text not set" if source_text.nil?
|
52
|
+
source_text[range]
|
53
|
+
end
|
54
|
+
|
55
|
+
# Envokes the parser from the beginning of the source on the given production goal.
|
56
|
+
# You sohuld provide the source here or you can set source_text prior to calling.
|
57
|
+
# If index is provided the parser will ignore characters previous to it.
|
58
|
+
def parse? goal, source = nil, index = 0
|
59
|
+
source_text = source unless source.nil?
|
60
|
+
# Hash of automatic hashes
|
61
|
+
@parse_results = Hash.new {|h1, k1| h1[k1] = {}}
|
62
|
+
@keys = nil
|
63
|
+
index = match? goal, index
|
64
|
+
puts pp(parse_results) if debug_flag
|
65
|
+
index
|
66
|
+
end
|
67
|
+
|
68
|
+
# Queries the parse results for a heirarchy of production matches. An array of
|
69
|
+
# index ranges is returned, or an empny array if none are found. This can only be
|
70
|
+
# called after parse_results have been set by a parse.
|
71
|
+
def query? *args
|
72
|
+
raise "You must first call parse!" unless parse_results
|
73
|
+
@keys = @parse_results.keys.sort unless @keys
|
74
|
+
found_list = []
|
75
|
+
index = 0
|
76
|
+
args.each do |arg|
|
77
|
+
index = find? arg, index
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Try to match a production from the given index. Returns the end index if found
|
82
|
+
# or start index if not found.
|
83
|
+
def allow? goal, index
|
84
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
85
|
+
found = match? goal, index
|
86
|
+
found == NO_MATCH ? index : found
|
87
|
+
end
|
88
|
+
|
89
|
+
# Try to match a production from the given index then backtrack. Returns index if
|
90
|
+
# found or NO_MATCH if not.
|
91
|
+
def check? goal, index
|
92
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
93
|
+
found = match? goal, index
|
94
|
+
found == NO_MATCH ? NO_MATCH : index
|
95
|
+
end
|
96
|
+
|
97
|
+
# Try not to match a production from the given index then backtrack. Returns index
|
98
|
+
# if not found or NO_MATCH if found.
|
99
|
+
def dissallow? goal, index
|
100
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
101
|
+
found = match? goal, index
|
102
|
+
found == NO_MATCH ? index : NO_MATCH
|
103
|
+
end
|
104
|
+
|
105
|
+
# Special production that only matches the end of source_text. Note, this function
|
106
|
+
# does not end in (?) or (!) because it is meant be used as a normal production.
|
107
|
+
def eof index
|
108
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
109
|
+
index >= source_text.length ? index : NO_MATCH
|
110
|
+
end
|
111
|
+
|
112
|
+
# Match a production from the given index. Returns the end index if found or NO_MATCH
|
113
|
+
# if not found.
|
114
|
+
def match? goal, index
|
115
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
116
|
+
index = ignore? index unless @ignoring
|
117
|
+
goal = goal.to_sym
|
118
|
+
position = parse_results[index]
|
119
|
+
found = position.fetch(goal) do
|
120
|
+
position[goal] = IN_USE # used to prevent inifinite recursion in case user attemts
|
121
|
+
# a left recursion
|
122
|
+
if (result = send goal, index)
|
123
|
+
position[:found_order] = [] unless position.has_key?(:found_order)
|
124
|
+
position[:found_order] << goal
|
125
|
+
end
|
126
|
+
position[goal] = result
|
127
|
+
end
|
128
|
+
puts "found #{goal} at #{index}...#{found} #{source_text[index...found].inspect}" if found && debug_flag
|
129
|
+
raise "Parser cannot handle infinite (left) recursions. Please rewrite usage of '#{goal}'." if found == IN_USE
|
130
|
+
found
|
131
|
+
end
|
132
|
+
|
133
|
+
# Match tokens that should be ignored. Used by match?(). Returns end index if found
|
134
|
+
# or start index if not found. Subclasses should override this method if they wish
|
135
|
+
# to ignore other text, such as comments.
|
136
|
+
def ignore? index
|
137
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
138
|
+
return index if @ignoring || ignore_productions.nil?
|
139
|
+
@ignoring = true
|
140
|
+
ignore_productions.each do |prod|
|
141
|
+
index = allow? prod, index
|
142
|
+
end
|
143
|
+
@ignoring = nil
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
# Match a literal string or regular expression from the given index. Returns
|
148
|
+
# the end index if found or NO_MATCH if not found.
|
149
|
+
def literal? value, index
|
150
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
151
|
+
case value
|
152
|
+
when String
|
153
|
+
string? value, index
|
154
|
+
when Regexp
|
155
|
+
regexp? value, index
|
156
|
+
else
|
157
|
+
raise "Unknown literal: #{value.inspect}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Match a string from the given index. Returns the end index if found
|
162
|
+
# or NO_MATCH if not found.
|
163
|
+
def string? value, index
|
164
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
165
|
+
value = value.to_s
|
166
|
+
index = ignore? index unless @ignoring
|
167
|
+
i2 = index + value.length
|
168
|
+
# puts source_text[index...i2].inspect + ' ' + value.inspect
|
169
|
+
source_text[index...i2] == value ? i2 : NO_MATCH
|
170
|
+
end
|
171
|
+
|
172
|
+
# Match a regular expression from the given index. Returns the end index
|
173
|
+
# if found or NO_MATCH if not found.
|
174
|
+
def regexp? value, index
|
175
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
176
|
+
value = correct_regexp! value
|
177
|
+
index = ignore? index unless @ignoring
|
178
|
+
found = value.match source_text[index..-1]
|
179
|
+
# puts "#{value.inspect} ~= #{found[0].inspect}" if found
|
180
|
+
found ? found.end(0) + index : NO_MATCH
|
181
|
+
end
|
182
|
+
|
183
|
+
# Make sure regular expressions match the beginning of the string, actually from
|
184
|
+
# the string from the given index.
|
185
|
+
def correct_regexp! re
|
186
|
+
source = re.source
|
187
|
+
source[0..1] == '\\A' ? re : Regexp.new("\\A(#{source})", re.options)
|
188
|
+
end
|
189
|
+
|
190
|
+
protected
|
191
|
+
|
192
|
+
def index_results!
|
193
|
+
raise "You must first call parse!" unless parse_results
|
194
|
+
@index = new Hash {|h, k| h[k] = []}
|
195
|
+
parse_results.each_pair do |index, prod_map|
|
196
|
+
prod_map[:found_order].reverse_each
|
197
|
+
prod_map.each_value
|
198
|
+
@index[prod]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end # Parser
|
202
|
+
|
203
|
+
end # Peggy
|