peggy 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/Copy of builder.rb +363 -0
- data/lib/Copy of peg.rb +135 -0
- data/lib/ast.rb +86 -0
- data/lib/builder.rb +359 -0
- data/lib/parser.rb +203 -0
- data/lib/peg.rb +68 -0
- data/lib/peggy.rb +50 -0
- data/test/test_builder.rb +72 -0
- data/test/test_parser.rb +119 -0
- data/test/test_peg.rb +54 -0
- data/test/test_peggy.rb +66 -0
- data/test/tests.rb +6 -0
- metadata +61 -0
@@ -0,0 +1,363 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'parser'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'parser')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Base syntax element class.
|
8
|
+
class Element
|
9
|
+
# Create an element.
|
10
|
+
def self::build *args
|
11
|
+
new *args
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test to see if there is a match of this element at the current index.
|
15
|
+
# Return's the index following if match is found, or NO_MATCH if not
|
16
|
+
def match parser, index
|
17
|
+
raise "Must override match"
|
18
|
+
end
|
19
|
+
|
20
|
+
def report index
|
21
|
+
# puts "#{to_s} #{index}"
|
22
|
+
index
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# An element that matches a sequence of elements. All must match for the sequence to match.
|
27
|
+
class Sequence < Element
|
28
|
+
# Add a child element.
|
29
|
+
def add element
|
30
|
+
@list = [] unless @list
|
31
|
+
@list << element
|
32
|
+
end
|
33
|
+
|
34
|
+
# Synonym for add(element)
|
35
|
+
alias :'<<' :add
|
36
|
+
|
37
|
+
# Reference a child by index.
|
38
|
+
def [] index
|
39
|
+
@list[index]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Child iterator.
|
43
|
+
def each &blk
|
44
|
+
@list.each &blk
|
45
|
+
end
|
46
|
+
|
47
|
+
# Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
|
48
|
+
# returns the end index of the last.
|
49
|
+
def match parser, index
|
50
|
+
raise "no children added to sequence" unless @list
|
51
|
+
each do |element|
|
52
|
+
index = element.match parser, index
|
53
|
+
return NO_MATCH unless index
|
54
|
+
end
|
55
|
+
report index
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# An element which matches any one of its children. The children are tested in order. The first
|
60
|
+
# to match wins.
|
61
|
+
class Alternatives < Sequence
|
62
|
+
# Match any one of the children. The children are tried in order. The first to match wins.
|
63
|
+
# The result is the end index of the first matching child. If none match this returns NO_MATCH.
|
64
|
+
def match parser, index
|
65
|
+
raise "no children added to alternate" unless @list
|
66
|
+
each do |element|
|
67
|
+
found = element.match parser, index
|
68
|
+
return report(found) if found
|
69
|
+
end
|
70
|
+
report NO_MATCH
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
# An element which tries its single child multiple times. It is greedy, meaning it will continue
|
75
|
+
# to match as long as possible, unless the range specifies a maximum number of matches.
|
76
|
+
class Multiple < Element
|
77
|
+
# A big number
|
78
|
+
MANY = 32767
|
79
|
+
# The minimum and maximum number of tries
|
80
|
+
attr_accessor :range
|
81
|
+
# The single child
|
82
|
+
attr_accessor :child
|
83
|
+
|
84
|
+
# Init the range
|
85
|
+
def initialize range
|
86
|
+
@range = range
|
87
|
+
end
|
88
|
+
|
89
|
+
# synonym for child=(element)
|
90
|
+
alias :'<<' :'child='
|
91
|
+
|
92
|
+
# Matches the child multiple times. The range specifies the least and most number of matches.
|
93
|
+
# If the number of matches is less than the minimim of the range then NO_MATCH is returned.
|
94
|
+
# If equal or more than the minimim then the end index of the last match is returned.
|
95
|
+
def match parser, index
|
96
|
+
raise "multiple element child not set" unless child
|
97
|
+
raise "multiple element range not set" unless range
|
98
|
+
count = 0
|
99
|
+
while count < range.last
|
100
|
+
found = child.match parser, index
|
101
|
+
break unless found
|
102
|
+
index = found
|
103
|
+
count += 1
|
104
|
+
end
|
105
|
+
report range === count ? index : NO_MATCH
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
# Matcher of 0 or more times.
|
110
|
+
class AnyNumber < Multiple
|
111
|
+
def initialize
|
112
|
+
super 0..MANY
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# Matcher of 1 or more times.
|
117
|
+
class AtLeastOne < Multiple
|
118
|
+
def initialize
|
119
|
+
super 1..MANY
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Matcher of 0 or 1 time.
|
124
|
+
class Optional < Multiple
|
125
|
+
def initialize
|
126
|
+
super 0..1
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
# An element which tries its single child but does not advance the index if found.
|
131
|
+
# If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
|
132
|
+
class Positive < Element
|
133
|
+
# The single child
|
134
|
+
attr_accessor :child
|
135
|
+
|
136
|
+
# synonym for child=(element)
|
137
|
+
alias :'<<' :'child='
|
138
|
+
|
139
|
+
# Matches the child once. If found the original index is returned.
|
140
|
+
# If not found NO_MATCH is returned.
|
141
|
+
def match parser, index
|
142
|
+
raise "positive element child not set" unless child
|
143
|
+
found = child.match parser, index
|
144
|
+
found ? index : NO_MATCH
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# An element which tries its single child but does not advance the index if not found.
|
149
|
+
# If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
|
150
|
+
class Negative < Positive
|
151
|
+
def match parser, index
|
152
|
+
raise "negative element child not set" unless child
|
153
|
+
found = child.match parser, index
|
154
|
+
found ? NO_MATCH : index
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# Match another production in the grammar.
|
159
|
+
class Reference < Element
|
160
|
+
# The name of the production to lookup and match.
|
161
|
+
attr_reader :name
|
162
|
+
|
163
|
+
# Init the name
|
164
|
+
def initialize name=nil
|
165
|
+
self.name = name
|
166
|
+
end
|
167
|
+
|
168
|
+
# Set the name of production to match.
|
169
|
+
def name= value
|
170
|
+
@name = value.to_sym
|
171
|
+
end
|
172
|
+
|
173
|
+
# Match the entire production from the parser grammar. If it matches
|
174
|
+
# the end index is returned. If not, NO_MATCH is returned.
|
175
|
+
def match parser, index
|
176
|
+
raise "reference name not set" unless name
|
177
|
+
parser.match? name, index
|
178
|
+
end
|
179
|
+
|
180
|
+
def to_s
|
181
|
+
name
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
# Matcher of a grammar production. The one and only child defines the production.
|
186
|
+
class Production < Reference
|
187
|
+
# The production definition.
|
188
|
+
attr_accessor :child
|
189
|
+
|
190
|
+
# Init the name and child.
|
191
|
+
def initialize name=nil, child=nil
|
192
|
+
super name
|
193
|
+
@child = child
|
194
|
+
end
|
195
|
+
|
196
|
+
# Synonym of child=(element)
|
197
|
+
alias :'<<' :'child='
|
198
|
+
|
199
|
+
# Match the production one time. If it matches the end index is returned. If not,
|
200
|
+
# NO_MATCH is returned.
|
201
|
+
def match parser, index
|
202
|
+
raise "production name not set" unless name
|
203
|
+
raise "production child not set" unless child
|
204
|
+
report @child.match(parser, index)
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Matcher of a literal string or regular expression.
|
209
|
+
class Literal < Element
|
210
|
+
# Value to match.
|
211
|
+
attr_reader :value
|
212
|
+
|
213
|
+
# Init the value.
|
214
|
+
def initialize value=nil
|
215
|
+
@value = value
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set the value to match.
|
219
|
+
def value= literal
|
220
|
+
# Make sure regular expressions check at the beginnig of the string
|
221
|
+
literal = correct_regexp literal if literal.is_a? Regexp
|
222
|
+
@value = literal
|
223
|
+
end
|
224
|
+
|
225
|
+
# Match the literal value. If it matches the end index is returned.
|
226
|
+
# If no, NO_MATCH is returned.
|
227
|
+
def match parser, index
|
228
|
+
report parser.literal?(value, index)
|
229
|
+
end
|
230
|
+
|
231
|
+
def to_s
|
232
|
+
value.inspect
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
# Parser builder. The built in methods create syntax elements. Any other
|
237
|
+
# method called on this object create references to production, or actual
|
238
|
+
# productions, if called at the top level.
|
239
|
+
# Todo: Change to a class and separate from Parser.
|
240
|
+
class Builder < Parser
|
241
|
+
# Productions to build
|
242
|
+
attr_reader :productions
|
243
|
+
# Current parent being built
|
244
|
+
attr_reader :parent
|
245
|
+
|
246
|
+
def initialize
|
247
|
+
reset!
|
248
|
+
end
|
249
|
+
|
250
|
+
def reset!
|
251
|
+
@building = true
|
252
|
+
@productions = {}
|
253
|
+
end
|
254
|
+
|
255
|
+
# Reference a production by its name index.
|
256
|
+
def [] index
|
257
|
+
productions[index]
|
258
|
+
end
|
259
|
+
|
260
|
+
# Create a production if at the top level, or a reference to a production a
|
261
|
+
# production is being built.
|
262
|
+
def method_missing name, *args
|
263
|
+
if @building
|
264
|
+
if @parent
|
265
|
+
ref = Reference.new name
|
266
|
+
@parent << ref
|
267
|
+
elsif block_given?
|
268
|
+
prod = Production.new name
|
269
|
+
@parent = prod
|
270
|
+
yield
|
271
|
+
@parent = nil
|
272
|
+
@productions[name] = prod
|
273
|
+
else
|
274
|
+
super
|
275
|
+
end
|
276
|
+
else
|
277
|
+
prod = @productions[name]
|
278
|
+
super unless prod
|
279
|
+
# puts "matching #{name} at #{args.first}"
|
280
|
+
prod.match self, args.first
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Add an Alternatives element to the parent.
|
285
|
+
def one &blk
|
286
|
+
build_piece Alternatives, blk
|
287
|
+
end
|
288
|
+
# Synonym for one().
|
289
|
+
alias :alt :one
|
290
|
+
|
291
|
+
def eof *args
|
292
|
+
if args.length == 1 then super args.first
|
293
|
+
else method_missing :eof, *args
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# Add an Sequence element to the parent.
|
298
|
+
def each &blk
|
299
|
+
build_piece Sequence, blk
|
300
|
+
end
|
301
|
+
# Synonym for each()
|
302
|
+
alias :seq :each
|
303
|
+
|
304
|
+
# Add an Literal element to the parent.
|
305
|
+
def lit *values
|
306
|
+
if values.size == 1
|
307
|
+
build_piece Literal, nil, values.first
|
308
|
+
else
|
309
|
+
one{
|
310
|
+
for v in values
|
311
|
+
build_piece Literal, nil, v
|
312
|
+
end
|
313
|
+
}
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# Add an AnyNumber element to the parent.
|
318
|
+
def many &blk
|
319
|
+
build_piece AnyNumber, blk
|
320
|
+
end
|
321
|
+
|
322
|
+
# Add an Optional element to the parent.
|
323
|
+
def opt &blk
|
324
|
+
build_piece Optional, blk
|
325
|
+
end
|
326
|
+
|
327
|
+
# Add an AtLeastOne element to the parent.
|
328
|
+
def some &blk
|
329
|
+
build_piece AtLeastOne, blk
|
330
|
+
end
|
331
|
+
|
332
|
+
def neg &blk
|
333
|
+
build_piece Negative, blk
|
334
|
+
end
|
335
|
+
|
336
|
+
def pos &blk
|
337
|
+
build_piece Positive, blk
|
338
|
+
end
|
339
|
+
|
340
|
+
def parse? goal, index=0
|
341
|
+
@building = nil
|
342
|
+
super
|
343
|
+
end
|
344
|
+
|
345
|
+
private
|
346
|
+
|
347
|
+
# Add an object of klass to the parent and yield to its block. If
|
348
|
+
# value is specified it is passed to the klass constructor.
|
349
|
+
def build_piece klass, blk=nil, value=nil
|
350
|
+
# puts "building #{klass.name} with #{value.inspect}"
|
351
|
+
elem = value ? klass.new(value) : klass.new
|
352
|
+
@parent << elem
|
353
|
+
if blk
|
354
|
+
parent = @parent
|
355
|
+
@parent = elem
|
356
|
+
blk.call
|
357
|
+
@parent = parent
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
end # Builder
|
362
|
+
|
363
|
+
end # Peggy
|
data/lib/Copy of peg.rb
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'builder'
|
3
|
+
# require File.join(File.dirname(__FILE__), 'builder')
|
4
|
+
|
5
|
+
module Peggy
|
6
|
+
|
7
|
+
# Implements the Parser Expression Grammar (PEG), one of several grammars supported.
|
8
|
+
class PEG < Parser
|
9
|
+
|
10
|
+
class PEGCompiler < Builder
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
super
|
14
|
+
build
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def build
|
20
|
+
# Hierarchical syntax
|
21
|
+
grammar{each{spacing; some{definition}; eof}}
|
22
|
+
definition{each{identifier; left_arrow; expression}}
|
23
|
+
expression{each{sequence; many{each{slash; sequence}}}}
|
24
|
+
sequence{many{prefix}}
|
25
|
+
prefix{each{opt{one{peek; disallow}}; suffix}}
|
26
|
+
suffix{each{primary; opt{one{question; star; plus}}}}
|
27
|
+
primary{one{each{identifier; neg{left_arrow}}
|
28
|
+
each{lparen; expression; rparen}
|
29
|
+
literal; klass; dot}
|
30
|
+
}
|
31
|
+
# Lexical syntax
|
32
|
+
identifier{each{ident_start; many{ident_cont}; spacing}}
|
33
|
+
ident_start{lit /[a-zA-Z_]/}
|
34
|
+
ident_cont{one{ident_start; lit /[0-9]/}}
|
35
|
+
literal{one{
|
36
|
+
each{quote; many{each{neg{quote}; char}}; quote; spacing}
|
37
|
+
each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
|
38
|
+
}}
|
39
|
+
klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
|
40
|
+
range{one{each{char; lit '-'; char}; char}}
|
41
|
+
char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
|
42
|
+
#char{one{
|
43
|
+
# each{lit '\\'; one{
|
44
|
+
# quote
|
45
|
+
# quotes
|
46
|
+
# lit /[nrt\[\]\\]/
|
47
|
+
# lit /[0-2][0-7][0-7]/
|
48
|
+
# lit /[0-7][0-7]?/
|
49
|
+
# }}
|
50
|
+
# lit /[^\\]/
|
51
|
+
#}}
|
52
|
+
left_arrow{each{lit '<-'; spacing}}
|
53
|
+
slash{each{lit '/'; spacing}}
|
54
|
+
peek{each{lit '&'; spacing}}
|
55
|
+
disallow{each{lit '!'; spacing}}
|
56
|
+
question{each{lit '?'; spacing}}
|
57
|
+
star{each{lit '*'; spacing}}
|
58
|
+
plus{each{lit '+'; spacing}}
|
59
|
+
lparen{each{lit '('; spacing}}
|
60
|
+
rparen{each{lit ')'; spacing}}
|
61
|
+
dot{each{lit '.'; spacing}}
|
62
|
+
quote{lit "'"}
|
63
|
+
quotes{lit '"'}
|
64
|
+
spacing{many{one{space; comment}}}
|
65
|
+
comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
|
66
|
+
space{one{lit ' '; lit "\t"; eol}}
|
67
|
+
eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
|
68
|
+
end
|
69
|
+
end # PEGCompiler
|
70
|
+
|
71
|
+
def compile? text
|
72
|
+
reset!
|
73
|
+
compiler = PEGCompiler.new
|
74
|
+
raise "Invalid grammar" unless compiler.parse! :grammar, text
|
75
|
+
ast = compiler.ast
|
76
|
+
ast.find_each 'definition' do |definition|
|
77
|
+
send definition.identifier.to_sym do
|
78
|
+
if definition.count('sequence') == 1
|
79
|
+
build_sequence definition.expression.sequence
|
80
|
+
else
|
81
|
+
one do
|
82
|
+
definition.find_each 'sequence' do |sequence|
|
83
|
+
build_sequence sequence
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def build_sequence sequence
|
93
|
+
if sequence.count('prefix') == 1
|
94
|
+
build_prefix sequence.prefix
|
95
|
+
else
|
96
|
+
each do
|
97
|
+
sequence.find_each 'prefix' do |prefix|
|
98
|
+
build_prefix prefix
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def build_prefix prefix
|
105
|
+
if sequence.peek
|
106
|
+
pos do
|
107
|
+
build_suffix prefix.suffix
|
108
|
+
end
|
109
|
+
elsif sequence.disallow
|
110
|
+
neg do
|
111
|
+
build_suffix prefix.suffix
|
112
|
+
end
|
113
|
+
else
|
114
|
+
build_suffix prefix.suffix
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def build_suffix suffix
|
119
|
+
if suffix.question
|
120
|
+
opt do
|
121
|
+
build_primary suffix.primary
|
122
|
+
end
|
123
|
+
elsif suffix.
|
124
|
+
neg do
|
125
|
+
build_suffix prefix.suffix
|
126
|
+
end
|
127
|
+
else
|
128
|
+
build_suffix prefix.suffix
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
end #PEG
|
135
|
+
end # Peggy
|