peggy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/Copy of builder.rb +363 -0
- data/lib/Copy of peg.rb +135 -0
- data/lib/ast.rb +86 -0
- data/lib/builder.rb +359 -0
- data/lib/parser.rb +203 -0
- data/lib/peg.rb +68 -0
- data/lib/peggy.rb +50 -0
- data/test/test_builder.rb +72 -0
- data/test/test_parser.rb +119 -0
- data/test/test_peg.rb +54 -0
- data/test/test_peggy.rb +66 -0
- data/test/tests.rb +6 -0
- metadata +61 -0
@@ -0,0 +1,363 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parser'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'parser')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Base syntax element class.
|
8
|
+
class Element
|
9
|
+
# Create an element.
|
10
|
+
def self::build *args
|
11
|
+
new *args
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test to see if there is a match of this element at the current index.
|
15
|
+
# Return's the index following if match is found, or NO_MATCH if not
|
16
|
+
def match parser, index
|
17
|
+
raise "Must override match"
|
18
|
+
end
|
19
|
+
|
20
|
+
def report index
|
21
|
+
# puts "#{to_s} #{index}"
|
22
|
+
index
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# An element that matches a sequence of elements. All must match for the sequence to match.
|
27
|
+
class Sequence < Element
|
28
|
+
# Add a child element.
|
29
|
+
def add element
|
30
|
+
@list = [] unless @list
|
31
|
+
@list << element
|
32
|
+
end
|
33
|
+
|
34
|
+
# Synonym for add(element)
|
35
|
+
alias :'<<' :add
|
36
|
+
|
37
|
+
# Reference a child by index.
|
38
|
+
def [] index
|
39
|
+
@list[index]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child iterator.
|
43
|
+
def each &blk
|
44
|
+
@list.each &blk
|
45
|
+
end
|
46
|
+
|
47
|
+
# Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
|
48
|
+
# returns the end index of the last.
|
49
|
+
def match parser, index
|
50
|
+
raise "no children added to sequence" unless @list
|
51
|
+
each do |element|
|
52
|
+
index = element.match parser, index
|
53
|
+
return NO_MATCH unless index
|
54
|
+
end
|
55
|
+
report index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# An element which matches any one of its children. The children are tested in order. The first
|
60
|
+
# to match wins.
|
61
|
+
class Alternatives < Sequence
|
62
|
+
# Match any one of the children. The children are tried in order. The first to match wins.
|
63
|
+
# The result is the end index of the first matching child. If none match this returns NO_MATCH.
|
64
|
+
def match parser, index
|
65
|
+
raise "no children added to alternate" unless @list
|
66
|
+
each do |element|
|
67
|
+
found = element.match parser, index
|
68
|
+
return report(found) if found
|
69
|
+
end
|
70
|
+
report NO_MATCH
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# An element which tries its single child multiple times. It is greedy, meaning it will continue
|
75
|
+
# to match as long as possible, unless the range specifies a maximum number of matches.
|
76
|
+
class Multiple < Element
|
77
|
+
# A big number
|
78
|
+
MANY = 32767
|
79
|
+
# The minimum and maximum number of tries
|
80
|
+
attr_accessor :range
|
81
|
+
# The single child
|
82
|
+
attr_accessor :child
|
83
|
+
|
84
|
+
# Init the range
|
85
|
+
def initialize range
|
86
|
+
@range = range
|
87
|
+
end
|
88
|
+
|
89
|
+
# synonym for child=(element)
|
90
|
+
alias :'<<' :'child='
|
91
|
+
|
92
|
+
# Matches the child multiple times. The range specifies the least and most number of matches.
|
93
|
+
# If the number of matches is less than the minimim of the range then NO_MATCH is returned.
|
94
|
+
# If equal or more than the minimim then the end index of the last match is returned.
|
95
|
+
def match parser, index
|
96
|
+
raise "multiple element child not set" unless child
|
97
|
+
raise "multiple element range not set" unless range
|
98
|
+
count = 0
|
99
|
+
while count < range.last
|
100
|
+
found = child.match parser, index
|
101
|
+
break unless found
|
102
|
+
index = found
|
103
|
+
count += 1
|
104
|
+
end
|
105
|
+
report range === count ? index : NO_MATCH
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Matcher of 0 or more times.
|
110
|
+
class AnyNumber < Multiple
|
111
|
+
def initialize
|
112
|
+
super 0..MANY
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Matcher of 1 or more times.
|
117
|
+
class AtLeastOne < Multiple
|
118
|
+
def initialize
|
119
|
+
super 1..MANY
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Matcher of 0 or 1 time.
|
124
|
+
class Optional < Multiple
|
125
|
+
def initialize
|
126
|
+
super 0..1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# An element which tries its single child but does not advance the index if found.
|
131
|
+
# If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
|
132
|
+
class Positive < Element
|
133
|
+
# The single child
|
134
|
+
attr_accessor :child
|
135
|
+
|
136
|
+
# synonym for child=(element)
|
137
|
+
alias :'<<' :'child='
|
138
|
+
|
139
|
+
# Matches the child once. If found the original index is returned.
|
140
|
+
# If not found NO_MATCH is returned.
|
141
|
+
def match parser, index
|
142
|
+
raise "positive element child not set" unless child
|
143
|
+
found = child.match parser, index
|
144
|
+
found ? index : NO_MATCH
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# An element which tries its single child but does not advance the index if not found.
|
149
|
+
# If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
|
150
|
+
class Negative < Positive
|
151
|
+
def match parser, index
|
152
|
+
raise "negative element child not set" unless child
|
153
|
+
found = child.match parser, index
|
154
|
+
found ? NO_MATCH : index
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Match another production in the grammar.
|
159
|
+
class Reference < Element
|
160
|
+
# The name of the production to lookup and match.
|
161
|
+
attr_reader :name
|
162
|
+
|
163
|
+
# Init the name
|
164
|
+
def initialize name=nil
|
165
|
+
self.name = name
|
166
|
+
end
|
167
|
+
|
168
|
+
# Set the name of production to match.
|
169
|
+
def name= value
|
170
|
+
@name = value.to_sym
|
171
|
+
end
|
172
|
+
|
173
|
+
# Match the entire production from the parser grammar. If it matches
|
174
|
+
# the end index is returned. If not, NO_MATCH is returned.
|
175
|
+
def match parser, index
|
176
|
+
raise "reference name not set" unless name
|
177
|
+
parser.match? name, index
|
178
|
+
end
|
179
|
+
|
180
|
+
def to_s
|
181
|
+
name
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Matcher of a grammar production. The one and only child defines the production.
|
186
|
+
class Production < Reference
|
187
|
+
# The production definition.
|
188
|
+
attr_accessor :child
|
189
|
+
|
190
|
+
# Init the name and child.
|
191
|
+
def initialize name=nil, child=nil
|
192
|
+
super name
|
193
|
+
@child = child
|
194
|
+
end
|
195
|
+
|
196
|
+
# Synonym of child=(element)
|
197
|
+
alias :'<<' :'child='
|
198
|
+
|
199
|
+
# Match the production one time. If it matches the end index is returned. If not,
|
200
|
+
# NO_MATCH is returned.
|
201
|
+
def match parser, index
|
202
|
+
raise "production name not set" unless name
|
203
|
+
raise "production child not set" unless child
|
204
|
+
report @child.match(parser, index)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Matcher of a literal string or regular expression.
|
209
|
+
class Literal < Element
|
210
|
+
# Value to match.
|
211
|
+
attr_reader :value
|
212
|
+
|
213
|
+
# Init the value.
|
214
|
+
def initialize value=nil
|
215
|
+
@value = value
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set the value to match.
|
219
|
+
def value= literal
|
220
|
+
# Make sure regular expressions check at the beginnig of the string
|
221
|
+
literal = correct_regexp literal if literal.is_a? Regexp
|
222
|
+
@value = literal
|
223
|
+
end
|
224
|
+
|
225
|
+
# Match the literal value. If it matches the end index is returned.
|
226
|
+
# If no, NO_MATCH is returned.
|
227
|
+
def match parser, index
|
228
|
+
report parser.literal?(value, index)
|
229
|
+
end
|
230
|
+
|
231
|
+
def to_s
|
232
|
+
value.inspect
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Parser builder. The built in methods create syntax elements. Any other
|
237
|
+
# method called on this object create references to production, or actual
|
238
|
+
# productions, if called at the top level.
|
239
|
+
# Todo: Change to a class and separate from Parser.
|
240
|
+
class Builder < Parser
|
241
|
+
# Productions to build
|
242
|
+
attr_reader :productions
|
243
|
+
# Current parent being built
|
244
|
+
attr_reader :parent
|
245
|
+
|
246
|
+
def initialize
|
247
|
+
reset!
|
248
|
+
end
|
249
|
+
|
250
|
+
def reset!
|
251
|
+
@building = true
|
252
|
+
@productions = {}
|
253
|
+
end
|
254
|
+
|
255
|
+
# Reference a production by its name index.
|
256
|
+
def [] index
|
257
|
+
productions[index]
|
258
|
+
end
|
259
|
+
|
260
|
+
# Create a production if at the top level, or a reference to a production a
|
261
|
+
# production is being built.
|
262
|
+
def method_missing name, *args
|
263
|
+
if @building
|
264
|
+
if @parent
|
265
|
+
ref = Reference.new name
|
266
|
+
@parent << ref
|
267
|
+
elsif block_given?
|
268
|
+
prod = Production.new name
|
269
|
+
@parent = prod
|
270
|
+
yield
|
271
|
+
@parent = nil
|
272
|
+
@productions[name] = prod
|
273
|
+
else
|
274
|
+
super
|
275
|
+
end
|
276
|
+
else
|
277
|
+
prod = @productions[name]
|
278
|
+
super unless prod
|
279
|
+
# puts "matching #{name} at #{args.first}"
|
280
|
+
prod.match self, args.first
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Add an Alternatives element to the parent.
|
285
|
+
def one &blk
|
286
|
+
build_piece Alternatives, blk
|
287
|
+
end
|
288
|
+
# Synonym for one().
|
289
|
+
alias :alt :one
|
290
|
+
|
291
|
+
def eof *args
|
292
|
+
if args.length == 1 then super args.first
|
293
|
+
else method_missing :eof, *args
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Add an Sequence element to the parent.
|
298
|
+
def each &blk
|
299
|
+
build_piece Sequence, blk
|
300
|
+
end
|
301
|
+
# Synonym for each()
|
302
|
+
alias :seq :each
|
303
|
+
|
304
|
+
# Add an Literal element to the parent.
|
305
|
+
def lit *values
|
306
|
+
if values.size == 1
|
307
|
+
build_piece Literal, nil, values.first
|
308
|
+
else
|
309
|
+
one{
|
310
|
+
for v in values
|
311
|
+
build_piece Literal, nil, v
|
312
|
+
end
|
313
|
+
}
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Add an AnyNumber element to the parent.
|
318
|
+
def many &blk
|
319
|
+
build_piece AnyNumber, blk
|
320
|
+
end
|
321
|
+
|
322
|
+
# Add an Optional element to the parent.
|
323
|
+
def opt &blk
|
324
|
+
build_piece Optional, blk
|
325
|
+
end
|
326
|
+
|
327
|
+
# Add an AtLeastOne element to the parent.
|
328
|
+
def some &blk
|
329
|
+
build_piece AtLeastOne, blk
|
330
|
+
end
|
331
|
+
|
332
|
+
def neg &blk
|
333
|
+
build_piece Negative, blk
|
334
|
+
end
|
335
|
+
|
336
|
+
def pos &blk
|
337
|
+
build_piece Positive, blk
|
338
|
+
end
|
339
|
+
|
340
|
+
def parse? goal, index=0
|
341
|
+
@building = nil
|
342
|
+
super
|
343
|
+
end
|
344
|
+
|
345
|
+
private
|
346
|
+
|
347
|
+
# Add an object of klass to the parent and yield to its block. If
|
348
|
+
# value is specified it is passed to the klass constructor.
|
349
|
+
def build_piece klass, blk=nil, value=nil
|
350
|
+
# puts "building #{klass.name} with #{value.inspect}"
|
351
|
+
elem = value ? klass.new(value) : klass.new
|
352
|
+
@parent << elem
|
353
|
+
if blk
|
354
|
+
parent = @parent
|
355
|
+
@parent = elem
|
356
|
+
blk.call
|
357
|
+
@parent = parent
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
end # Builder
|
362
|
+
|
363
|
+
end # Peggy
|
data/lib/Copy of peg.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'builder')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Implements the Parser Expression Grammar (PEG), one of several grammars supported.
|
8
|
+
class PEG < Parser
|
9
|
+
|
10
|
+
class PEGCompiler < Builder
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
super
|
14
|
+
build
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def build
|
20
|
+
# Hierarchical syntax
|
21
|
+
grammar{each{spacing; some{definition}; eof}}
|
22
|
+
definition{each{identifier; left_arrow; expression}}
|
23
|
+
expression{each{sequence; many{each{slash; sequence}}}}
|
24
|
+
sequence{many{prefix}}
|
25
|
+
prefix{each{opt{one{peek; disallow}}; suffix}}
|
26
|
+
suffix{each{primary; opt{one{question; star; plus}}}}
|
27
|
+
primary{one{each{identifier; neg{left_arrow}}
|
28
|
+
each{lparen; expression; rparen}
|
29
|
+
literal; klass; dot}
|
30
|
+
}
|
31
|
+
# Lexical syntax
|
32
|
+
identifier{each{ident_start; many{ident_cont}; spacing}}
|
33
|
+
ident_start{lit /[a-zA-Z_]/}
|
34
|
+
ident_cont{one{ident_start; lit /[0-9]/}}
|
35
|
+
literal{one{
|
36
|
+
each{quote; many{each{neg{quote}; char}}; quote; spacing}
|
37
|
+
each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
|
38
|
+
}}
|
39
|
+
klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
|
40
|
+
range{one{each{char; lit '-'; char}; char}}
|
41
|
+
char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
|
42
|
+
#char{one{
|
43
|
+
# each{lit '\\'; one{
|
44
|
+
# quote
|
45
|
+
# quotes
|
46
|
+
# lit /[nrt\[\]\\]/
|
47
|
+
# lit /[0-2][0-7][0-7]/
|
48
|
+
# lit /[0-7][0-7]?/
|
49
|
+
# }}
|
50
|
+
# lit /[^\\]/
|
51
|
+
#}}
|
52
|
+
left_arrow{each{lit '<-'; spacing}}
|
53
|
+
slash{each{lit '/'; spacing}}
|
54
|
+
peek{each{lit '&'; spacing}}
|
55
|
+
disallow{each{lit '!'; spacing}}
|
56
|
+
question{each{lit '?'; spacing}}
|
57
|
+
star{each{lit '*'; spacing}}
|
58
|
+
plus{each{lit '+'; spacing}}
|
59
|
+
lparen{each{lit '('; spacing}}
|
60
|
+
rparen{each{lit ')'; spacing}}
|
61
|
+
dot{each{lit '.'; spacing}}
|
62
|
+
quote{lit "'"}
|
63
|
+
quotes{lit '"'}
|
64
|
+
spacing{many{one{space; comment}}}
|
65
|
+
comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
|
66
|
+
space{one{lit ' '; lit "\t"; eol}}
|
67
|
+
eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
|
68
|
+
end
|
69
|
+
end # PEGCompiler
|
70
|
+
|
71
|
+
def compile? text
|
72
|
+
reset!
|
73
|
+
compiler = PEGCompiler.new
|
74
|
+
raise "Invalid grammar" unless compiler.parse! :grammar, text
|
75
|
+
ast = compiler.ast
|
76
|
+
ast.find_each 'definition' do |definition|
|
77
|
+
send definition.identifier.to_sym do
|
78
|
+
if definition.count('sequence') == 1
|
79
|
+
build_sequence definition.expression.sequence
|
80
|
+
else
|
81
|
+
one do
|
82
|
+
definition.find_each 'sequence' do |sequence|
|
83
|
+
build_sequence sequence
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def build_sequence sequence
|
93
|
+
if sequence.count('prefix') == 1
|
94
|
+
build_prefix sequence.prefix
|
95
|
+
else
|
96
|
+
each do
|
97
|
+
sequence.find_each 'prefix' do |prefix|
|
98
|
+
build_prefix prefix
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_prefix prefix
|
105
|
+
if sequence.peek
|
106
|
+
pos do
|
107
|
+
build_suffix prefix.suffix
|
108
|
+
end
|
109
|
+
elsif sequence.disallow
|
110
|
+
neg do
|
111
|
+
build_suffix prefix.suffix
|
112
|
+
end
|
113
|
+
else
|
114
|
+
build_suffix prefix.suffix
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def build_suffix suffix
|
119
|
+
if suffix.question
|
120
|
+
opt do
|
121
|
+
build_primary suffix.primary
|
122
|
+
end
|
123
|
+
elsif suffix.
|
124
|
+
neg do
|
125
|
+
build_suffix prefix.suffix
|
126
|
+
end
|
127
|
+
else
|
128
|
+
build_suffix prefix.suffix
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
end #PEG
|
135
|
+
end # Peggy
|