peggy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Copy of builder.rb +363 -0
- data/lib/Copy of peg.rb +135 -0
- data/lib/ast.rb +86 -0
- data/lib/builder.rb +359 -0
- data/lib/parser.rb +203 -0
- data/lib/peg.rb +68 -0
- data/lib/peggy.rb +50 -0
- data/test/test_builder.rb +72 -0
- data/test/test_parser.rb +119 -0
- data/test/test_peg.rb +54 -0
- data/test/test_peggy.rb +66 -0
- data/test/tests.rb +6 -0
- metadata +61 -0
data/lib/ast.rb
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
module Peggy
|
2
|
+
class Node
|
3
|
+
attr_accessor :name, :first, :next, :parent, :from, :to
|
4
|
+
|
5
|
+
def initialize name
|
6
|
+
self.name = name
|
7
|
+
end
|
8
|
+
|
9
|
+
def << child
|
10
|
+
child.parent = self
|
11
|
+
if first
|
12
|
+
last.next = child
|
13
|
+
else
|
14
|
+
first = child
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def each
|
19
|
+
child = first
|
20
|
+
while child
|
21
|
+
yield child
|
22
|
+
child = child.next
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def format tabs
|
27
|
+
result = "#{tabs}#{self}\n"
|
28
|
+
tabs << ' '
|
29
|
+
node = first
|
30
|
+
while node
|
31
|
+
"#{tabs}#{node.format tabs}"
|
32
|
+
end
|
33
|
+
tabs = tabs[0..-3]
|
34
|
+
end
|
35
|
+
|
36
|
+
def last
|
37
|
+
node = first
|
38
|
+
while (n2 = node.next)
|
39
|
+
node = n2
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def to_s source=nil
|
44
|
+
source ? source[from...to] : "#{name}[#{from}...#{to}]"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class AST
|
49
|
+
|
50
|
+
attr_reader :root
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@root = Node.new
|
54
|
+
build result, index, @root
|
55
|
+
end
|
56
|
+
|
57
|
+
def self::build parser, index = 0
|
58
|
+
AST.new.build_one results, index, root
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def build_one results, index, parent
|
64
|
+
row = results[index]
|
65
|
+
row = results[index += 1] until row
|
66
|
+
top = parent
|
67
|
+
results[:found_order].reverse_each do |name|
|
68
|
+
node = Node.new name
|
69
|
+
node.from = index
|
70
|
+
node.to = row[name]
|
71
|
+
build_rest results, node
|
72
|
+
top << node
|
73
|
+
node = top
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
def build_rest results, previous
|
79
|
+
build_one results, previous.to, previous.parent
|
80
|
+
end
|
81
|
+
|
82
|
+
def to_s
|
83
|
+
@root.format ''
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end # Peggy
|
data/lib/builder.rb
ADDED
@@ -0,0 +1,359 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parser'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'parser')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Base syntax element class.
|
8
|
+
class Element
|
9
|
+
# Create an element.
|
10
|
+
def self::build *args
|
11
|
+
new *args
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test to see if there is a match of this element at the current index.
|
15
|
+
# Return's the index following if match is found, or NO_MATCH if not
|
16
|
+
def match parser, index
|
17
|
+
raise "Must override match"
|
18
|
+
end
|
19
|
+
|
20
|
+
def report index
|
21
|
+
# puts "#{to_s} #{index}"
|
22
|
+
index
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# An element that matches a sequence of elements. All must match for the sequence to match.
|
27
|
+
class Sequence < Element
|
28
|
+
# Add a child element.
|
29
|
+
def add element
|
30
|
+
@list = [] unless @list
|
31
|
+
@list << element
|
32
|
+
end
|
33
|
+
|
34
|
+
# Synonym for add(element)
|
35
|
+
alias :'<<' :add
|
36
|
+
|
37
|
+
# Reference a child by index.
|
38
|
+
def [] index
|
39
|
+
@list[index]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child iterator.
|
43
|
+
def each &blk
|
44
|
+
@list.each &blk
|
45
|
+
end
|
46
|
+
|
47
|
+
# Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
|
48
|
+
# returns the end index of the last.
|
49
|
+
def match parser, index
|
50
|
+
raise "no children added to sequence" unless @list
|
51
|
+
each do |element|
|
52
|
+
index = element.match parser, index
|
53
|
+
return NO_MATCH unless index
|
54
|
+
end
|
55
|
+
report index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# An element which matches any one of its children. The children are tested in order. The first
|
60
|
+
# to match wins.
|
61
|
+
class Alternatives < Sequence
|
62
|
+
# Match any one of the children. The children are tried in order. The first to match wins.
|
63
|
+
# The result is the end index of the first matching child. If none match this returns NO_MATCH.
|
64
|
+
def match parser, index
|
65
|
+
raise "no children added to alternate" unless @list
|
66
|
+
each do |element|
|
67
|
+
found = element.match parser, index
|
68
|
+
return report(found) if found
|
69
|
+
end
|
70
|
+
report NO_MATCH
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# An element which tries its single child multiple times. It is greedy, meaning it will continue
|
75
|
+
# to match as long as possible, unless the range specifies a maximum number of matches.
|
76
|
+
class Multiple < Element
|
77
|
+
# A big number
|
78
|
+
MANY = 32767
|
79
|
+
# The minimum and maximum number of tries
|
80
|
+
attr_accessor :range
|
81
|
+
# The single child
|
82
|
+
attr_accessor :child
|
83
|
+
|
84
|
+
# Init the range
|
85
|
+
def initialize range
|
86
|
+
@range = range
|
87
|
+
end
|
88
|
+
|
89
|
+
# synonym for child=(element)
|
90
|
+
alias :'<<' :'child='
|
91
|
+
|
92
|
+
# Matches the child multiple times. The range specifies the least and most number of matches.
|
93
|
+
# If the number of matches is less than the minimim of the range then NO_MATCH is returned.
|
94
|
+
# If equal or more than the minimim then the end index of the last match is returned.
|
95
|
+
def match parser, index
|
96
|
+
raise "multiple element child not set" unless child
|
97
|
+
raise "multiple element range not set" unless range
|
98
|
+
count = 0
|
99
|
+
while count < range.last
|
100
|
+
found = child.match parser, index
|
101
|
+
break unless found
|
102
|
+
index = found
|
103
|
+
count += 1
|
104
|
+
end
|
105
|
+
report range === count ? index : NO_MATCH
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Matcher of 0 or more times.
|
110
|
+
class AnyNumber < Multiple
|
111
|
+
def initialize
|
112
|
+
super 0..MANY
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Matcher of 1 or more times.
|
117
|
+
class AtLeastOne < Multiple
|
118
|
+
def initialize
|
119
|
+
super 1..MANY
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Matcher of 0 or 1 time.
|
124
|
+
class Optional < Multiple
|
125
|
+
def initialize
|
126
|
+
super 0..1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# An element which tries its single child but does not advance the index if found.
|
131
|
+
# If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
|
132
|
+
class Positive < Element
|
133
|
+
# The single child
|
134
|
+
attr_accessor :child
|
135
|
+
|
136
|
+
# synonym for child=(element)
|
137
|
+
alias :'<<' :'child='
|
138
|
+
|
139
|
+
# Matches the child once. If found the original index is returned.
|
140
|
+
# If not found NO_MATCH is returned.
|
141
|
+
def match parser, index
|
142
|
+
raise "positive element child not set" unless child
|
143
|
+
found = child.match parser, index
|
144
|
+
found ? index : NO_MATCH
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# An element which tries its single child but does not advance the index if not found.
|
149
|
+
# If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
|
150
|
+
class Negative < Positive
|
151
|
+
def match parser, index
|
152
|
+
raise "negative element child not set" unless child
|
153
|
+
found = child.match parser, index
|
154
|
+
found ? NO_MATCH : index
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Match another production in the grammar.
|
159
|
+
class Reference < Element
|
160
|
+
# The name of the production to lookup and match.
|
161
|
+
attr_reader :name
|
162
|
+
|
163
|
+
# Init the name
|
164
|
+
def initialize name=nil
|
165
|
+
self.name = name
|
166
|
+
end
|
167
|
+
|
168
|
+
# Set the name of production to match.
|
169
|
+
def name= value
|
170
|
+
@name = value.to_sym
|
171
|
+
end
|
172
|
+
|
173
|
+
# Match the entire production from the parser grammar. If it matches
|
174
|
+
# the end index is returned. If not, NO_MATCH is returned.
|
175
|
+
def match parser, index
|
176
|
+
raise "reference name not set" unless name
|
177
|
+
parser.match? name, index
|
178
|
+
end
|
179
|
+
|
180
|
+
def to_s
|
181
|
+
name
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Matcher of a grammar production. The one and only child defines the production.
|
186
|
+
class Production < Reference
|
187
|
+
# The production definition.
|
188
|
+
attr_accessor :child
|
189
|
+
|
190
|
+
# Init the name and child.
|
191
|
+
def initialize name=nil, child=nil
|
192
|
+
super name
|
193
|
+
@child = child
|
194
|
+
end
|
195
|
+
|
196
|
+
# Synonym of child=(element)
|
197
|
+
alias :'<<' :'child='
|
198
|
+
|
199
|
+
# Match the production one time. If it matches the end index is returned. If not,
|
200
|
+
# NO_MATCH is returned.
|
201
|
+
def match parser, index
|
202
|
+
raise "production name not set" unless name
|
203
|
+
raise "production child not set" unless child
|
204
|
+
report @child.match(parser, index)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Matcher of a literal string or regular expression.
|
209
|
+
class Literal < Element
|
210
|
+
# Value to match.
|
211
|
+
attr_reader :value
|
212
|
+
|
213
|
+
# Init the value.
|
214
|
+
def initialize value=nil
|
215
|
+
@value = value
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set the value to match.
|
219
|
+
def value= literal
|
220
|
+
# Make sure regular expressions check at the beginnig of the string
|
221
|
+
literal = correct_regexp literal if literal.is_a? Regexp
|
222
|
+
@value = literal
|
223
|
+
end
|
224
|
+
|
225
|
+
# Match the literal value. If it matches the end index is returned.
|
226
|
+
# If no, NO_MATCH is returned.
|
227
|
+
def match parser, index
|
228
|
+
report parser.literal?(value, index)
|
229
|
+
end
|
230
|
+
|
231
|
+
def to_s
|
232
|
+
value.inspect
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Parser builder. The built in methods create syntax elements. Any other
|
237
|
+
# method called on this object create references to production, or actual
|
238
|
+
# productions, if called at the top level.
|
239
|
+
# Todo: Change to a class and separate from Parser.
|
240
|
+
class Builder < Parser
|
241
|
+
# Productions to build
|
242
|
+
attr_reader :productions
|
243
|
+
# Current parent being built
|
244
|
+
attr_reader :parent
|
245
|
+
|
246
|
+
def initialize
|
247
|
+
@building = true
|
248
|
+
end
|
249
|
+
|
250
|
+
# Reference a production by its name index.
|
251
|
+
def [] index
|
252
|
+
productions[index]
|
253
|
+
end
|
254
|
+
|
255
|
+
# Create a production if at the top level, or a reference to a production a
|
256
|
+
# production is being built.
|
257
|
+
def method_missing name, *args
|
258
|
+
if @building
|
259
|
+
if @parent
|
260
|
+
ref = Reference.new name
|
261
|
+
@parent << ref
|
262
|
+
elsif block_given?
|
263
|
+
@productions = {} unless @productions
|
264
|
+
prod = Production.new name
|
265
|
+
@parent = prod
|
266
|
+
yield
|
267
|
+
@parent = nil
|
268
|
+
@productions[name] = prod
|
269
|
+
else
|
270
|
+
super
|
271
|
+
end
|
272
|
+
else
|
273
|
+
prod = @productions[name]
|
274
|
+
super unless prod
|
275
|
+
# puts "matching #{name} at #{args.first}"
|
276
|
+
prod.match self, args.first
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
# Add an Alternatives element to the parent.
|
281
|
+
def one &blk
|
282
|
+
build_piece Alternatives, blk
|
283
|
+
end
|
284
|
+
# Synonym for one().
|
285
|
+
alias :alt :one
|
286
|
+
|
287
|
+
def eof *args
|
288
|
+
if args.length == 1 then super args.first
|
289
|
+
else method_missing :eof, *args
|
290
|
+
end
|
291
|
+
end
|
292
|
+
|
293
|
+
# Add an Sequence element to the parent.
|
294
|
+
def each &blk
|
295
|
+
build_piece Sequence, blk
|
296
|
+
end
|
297
|
+
# Synonym for each()
|
298
|
+
alias :seq :each
|
299
|
+
|
300
|
+
# Add an Literal element to the parent.
|
301
|
+
def lit *values
|
302
|
+
if values.size == 1
|
303
|
+
build_piece Literal, nil, values.first
|
304
|
+
else
|
305
|
+
one{
|
306
|
+
for v in values
|
307
|
+
build_piece Literal, nil, v
|
308
|
+
end
|
309
|
+
}
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
# Add an AnyNumber element to the parent.
|
314
|
+
def many &blk
|
315
|
+
build_piece AnyNumber, blk
|
316
|
+
end
|
317
|
+
|
318
|
+
# Add an Optional element to the parent.
|
319
|
+
def opt &blk
|
320
|
+
build_piece Optional, blk
|
321
|
+
end
|
322
|
+
|
323
|
+
# Add an AtLeastOne element to the parent.
|
324
|
+
def some &blk
|
325
|
+
build_piece AtLeastOne, blk
|
326
|
+
end
|
327
|
+
|
328
|
+
def neg &blk
|
329
|
+
build_piece Negative, blk
|
330
|
+
end
|
331
|
+
|
332
|
+
def pos &blk
|
333
|
+
build_piece Positive, blk
|
334
|
+
end
|
335
|
+
|
336
|
+
def parse? goal, index=0
|
337
|
+
@building = nil
|
338
|
+
super
|
339
|
+
end
|
340
|
+
|
341
|
+
private
|
342
|
+
|
343
|
+
# Add an object of klass to the parent and yield to its block. If
|
344
|
+
# value is specified it is passed to the klass constructor.
|
345
|
+
def build_piece klass, blk=nil, value=nil
|
346
|
+
# puts "building #{klass.name} with #{value.inspect}"
|
347
|
+
elem = value ? klass.new(value) : klass.new
|
348
|
+
@parent << elem
|
349
|
+
if blk
|
350
|
+
parent = @parent
|
351
|
+
@parent = elem
|
352
|
+
blk.call
|
353
|
+
@parent = parent
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
end # Builder
|
358
|
+
|
359
|
+
end # Peggy
|
data/lib/parser.rb
ADDED
@@ -0,0 +1,203 @@
|
|
1
|
+
require 'pp'
|
2
|
+
|
3
|
+
# Peggy is a packrat parsing engine. Packrat parsers memoize every production so that
|
4
|
+
# parses can happen in linear time. No production needs to be processed more than once for
|
5
|
+
# a given position of the source. See http://pdos.csail.mit.edu/~baford/packrat/ for
|
6
|
+
# more details.
|
7
|
+
#
|
8
|
+
# Peggy also incorporates Parsing Expression Grammar (PEG) as proposed by Bryan Ford,
|
9
|
+
# as one of several input grammars. PEG is a formalized grammar specification needing
|
10
|
+
# no separate lexer/scanner step. See http://pdos.csail.mit.edu/~baford/packrat/popl04/
|
11
|
+
#
|
12
|
+
# As good as packrat parsers are, they have a few limitations. They cannot handle left
|
13
|
+
# recursion of a production, meaning a production cannot reference itself as the first
|
14
|
+
# element in a sequence. Also memoizing of production results means than memory consumption
|
15
|
+
# increasses with the size of the source being parsed. This is not usaly a concern, execpt
|
16
|
+
# when attempting to parse multi-megabyte source files, such as a huge XML database.
|
17
|
+
module Peggy
|
18
|
+
|
19
|
+
# Returned when a production did not match
|
20
|
+
NO_MATCH = false
|
21
|
+
# Used to prevent infinite (left) recursions
|
22
|
+
IN_USE = true
|
23
|
+
|
24
|
+
# Packrat parser class. Note all methods have a trailing exclamation (!) or question
|
25
|
+
# mark (?), or have long names with underscores (_). This is because productions are
|
26
|
+
# methods and we need to avoid name collisions. To use this class you must subclass
|
27
|
+
# Parser and provide your productions as methods. Your productions must call match?
|
28
|
+
# or one of the protected convenience routines to perform parsing. Productions must
|
29
|
+
# never call another production directly, or results will not get memoized and you
|
30
|
+
# will slow down your parse conciderably, and possibly risk getting into an infinite
|
31
|
+
# recursion (until the stack blows its top). Note, as a conveience in writting
|
32
|
+
# productions, you can call any match? function multiple times, passing each returned
|
33
|
+
# index, such as in a sequence, without checking the results of each production.
|
34
|
+
class Parser
|
35
|
+
|
36
|
+
# Tells parser to print intermediate results if set.
|
37
|
+
attr_accessor :debug_flag
|
38
|
+
|
39
|
+
# The source to parse, can be set prior to calling parse!().
|
40
|
+
attr_accessor :source_text
|
41
|
+
|
42
|
+
# The results of the parse. A hash (keys of indexs) of hashes (keys of production
|
43
|
+
# symbols and values of end indexes.
|
44
|
+
attr_reader :parse_results
|
45
|
+
|
46
|
+
# The productions to ignore.
|
47
|
+
attr_accessor :ignore_productions
|
48
|
+
|
49
|
+
# Return a range (or character) of the source_text.
|
50
|
+
def [] range
|
51
|
+
raise "source_text not set" if source_text.nil?
|
52
|
+
source_text[range]
|
53
|
+
end
|
54
|
+
|
55
|
+
# Envokes the parser from the beginning of the source on the given production goal.
|
56
|
+
# You sohuld provide the source here or you can set source_text prior to calling.
|
57
|
+
# If index is provided the parser will ignore characters previous to it.
|
58
|
+
def parse? goal, source = nil, index = 0
|
59
|
+
source_text = source unless source.nil?
|
60
|
+
# Hash of automatic hashes
|
61
|
+
@parse_results = Hash.new {|h1, k1| h1[k1] = {}}
|
62
|
+
@keys = nil
|
63
|
+
index = match? goal, index
|
64
|
+
puts pp(parse_results) if debug_flag
|
65
|
+
index
|
66
|
+
end
|
67
|
+
|
68
|
+
# Queries the parse results for a heirarchy of production matches. An array of
|
69
|
+
# index ranges is returned, or an empny array if none are found. This can only be
|
70
|
+
# called after parse_results have been set by a parse.
|
71
|
+
def query? *args
|
72
|
+
raise "You must first call parse!" unless parse_results
|
73
|
+
@keys = @parse_results.keys.sort unless @keys
|
74
|
+
found_list = []
|
75
|
+
index = 0
|
76
|
+
args.each do |arg|
|
77
|
+
index = find? arg, index
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Try to match a production from the given index. Returns the end index if found
|
82
|
+
# or start index if not found.
|
83
|
+
def allow? goal, index
|
84
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
85
|
+
found = match? goal, index
|
86
|
+
found == NO_MATCH ? index : found
|
87
|
+
end
|
88
|
+
|
89
|
+
# Try to match a production from the given index then backtrack. Returns index if
|
90
|
+
# found or NO_MATCH if not.
|
91
|
+
def check? goal, index
|
92
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
93
|
+
found = match? goal, index
|
94
|
+
found == NO_MATCH ? NO_MATCH : index
|
95
|
+
end
|
96
|
+
|
97
|
+
# Try not to match a production from the given index then backtrack. Returns index
|
98
|
+
# if not found or NO_MATCH if found.
|
99
|
+
def dissallow? goal, index
|
100
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
101
|
+
found = match? goal, index
|
102
|
+
found == NO_MATCH ? index : NO_MATCH
|
103
|
+
end
|
104
|
+
|
105
|
+
# Special production that only matches the end of source_text. Note, this function
|
106
|
+
# does not end in (?) or (!) because it is meant be used as a normal production.
|
107
|
+
def eof index
|
108
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
109
|
+
index >= source_text.length ? index : NO_MATCH
|
110
|
+
end
|
111
|
+
|
112
|
+
# Match a production from the given index. Returns the end index if found or NO_MATCH
|
113
|
+
# if not found.
|
114
|
+
def match? goal, index
|
115
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
116
|
+
index = ignore? index unless @ignoring
|
117
|
+
goal = goal.to_sym
|
118
|
+
position = parse_results[index]
|
119
|
+
found = position.fetch(goal) do
|
120
|
+
position[goal] = IN_USE # used to prevent inifinite recursion in case user attemts
|
121
|
+
# a left recursion
|
122
|
+
if (result = send goal, index)
|
123
|
+
position[:found_order] = [] unless position.has_key?(:found_order)
|
124
|
+
position[:found_order] << goal
|
125
|
+
end
|
126
|
+
position[goal] = result
|
127
|
+
end
|
128
|
+
puts "found #{goal} at #{index}...#{found} #{source_text[index...found].inspect}" if found && debug_flag
|
129
|
+
raise "Parser cannot handle infinite (left) recursions. Please rewrite usage of '#{goal}'." if found == IN_USE
|
130
|
+
found
|
131
|
+
end
|
132
|
+
|
133
|
+
# Match tokens that should be ignored. Used by match?(). Returns end index if found
|
134
|
+
# or start index if not found. Subclasses should override this method if they wish
|
135
|
+
# to ignore other text, such as comments.
|
136
|
+
def ignore? index
|
137
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
138
|
+
return index if @ignoring || ignore_productions.nil?
|
139
|
+
@ignoring = true
|
140
|
+
ignore_productions.each do |prod|
|
141
|
+
index = allow? prod, index
|
142
|
+
end
|
143
|
+
@ignoring = nil
|
144
|
+
index
|
145
|
+
end
|
146
|
+
|
147
|
+
# Match a literal string or regular expression from the given index. Returns
|
148
|
+
# the end index if found or NO_MATCH if not found.
|
149
|
+
def literal? value, index
|
150
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
151
|
+
case value
|
152
|
+
when String
|
153
|
+
string? value, index
|
154
|
+
when Regexp
|
155
|
+
regexp? value, index
|
156
|
+
else
|
157
|
+
raise "Unknown literal: #{value.inspect}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# Match a string from the given index. Returns the end index if found
|
162
|
+
# or NO_MATCH if not found.
|
163
|
+
def string? value, index
|
164
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
165
|
+
value = value.to_s
|
166
|
+
index = ignore? index unless @ignoring
|
167
|
+
i2 = index + value.length
|
168
|
+
# puts source_text[index...i2].inspect + ' ' + value.inspect
|
169
|
+
source_text[index...i2] == value ? i2 : NO_MATCH
|
170
|
+
end
|
171
|
+
|
172
|
+
# Match a regular expression from the given index. Returns the end index
|
173
|
+
# if found or NO_MATCH if not found.
|
174
|
+
def regexp? value, index
|
175
|
+
return NO_MATCH if index == NO_MATCH # allow users to not check results of a sequence
|
176
|
+
value = correct_regexp! value
|
177
|
+
index = ignore? index unless @ignoring
|
178
|
+
found = value.match source_text[index..-1]
|
179
|
+
# puts "#{value.inspect} ~= #{found[0].inspect}" if found
|
180
|
+
found ? found.end(0) + index : NO_MATCH
|
181
|
+
end
|
182
|
+
|
183
|
+
# Make sure regular expressions match the beginning of the string, actually from
|
184
|
+
# the string from the given index.
|
185
|
+
def correct_regexp! re
|
186
|
+
source = re.source
|
187
|
+
source[0..1] == '\\A' ? re : Regexp.new("\\A(#{source})", re.options)
|
188
|
+
end
|
189
|
+
|
190
|
+
protected
|
191
|
+
|
192
|
+
def index_results!
|
193
|
+
raise "You must first call parse!" unless parse_results
|
194
|
+
@index = new Hash {|h, k| h[k] = []}
|
195
|
+
parse_results.each_pair do |index, prod_map|
|
196
|
+
prod_map[:found_order].reverse_each
|
197
|
+
prod_map.each_value
|
198
|
+
@index[prod]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end # Parser
|
202
|
+
|
203
|
+
end # Peggy
|