peggy 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,363 @@
1
+ require 'rubygems'
2
+ require 'parser'
3
+ # require File.join(File.dirname(__FILE__), 'parser')
4
+
5
+ module Peggy
6
+
7
+ # Base syntax element class.
8
+ class Element
9
+ # Create an element.
10
+ def self::build *args
11
+ new *args
12
+ end
13
+
14
+ # Test to see if there is a match of this element at the current index.
15
+ # Return's the index following if match is found, or NO_MATCH if not
16
+ def match parser, index
17
+ raise "Must override match"
18
+ end
19
+
20
+ def report index
21
+ # puts "#{to_s} #{index}"
22
+ index
23
+ end
24
+ end
25
+
26
+ # An element that matches a sequence of elements. All must match for the sequence to match.
27
+ class Sequence < Element
28
+ # Add a child element.
29
+ def add element
30
+ @list = [] unless @list
31
+ @list << element
32
+ end
33
+
34
+ # Synonym for add(element)
35
+ alias :'<<' :add
36
+
37
+ # Reference a child by index.
38
+ def [] index
39
+ @list[index]
40
+ end
41
+
42
+ # Child iterator.
43
+ def each &blk
44
+ @list.each &blk
45
+ end
46
+
47
+ # Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
48
+ # returns the end index of the last.
49
+ def match parser, index
50
+ raise "no children added to sequence" unless @list
51
+ each do |element|
52
+ index = element.match parser, index
53
+ return NO_MATCH unless index
54
+ end
55
+ report index
56
+ end
57
+ end
58
+
59
+ # An element which matches any one of its children. The children are tested in order. The first
60
+ # to match wins.
61
+ class Alternatives < Sequence
62
+ # Match any one of the children. The children are tried in order. The first to match wins.
63
+ # The result is the end index of the first matching child. If none match this returns NO_MATCH.
64
+ def match parser, index
65
+ raise "no children added to alternate" unless @list
66
+ each do |element|
67
+ found = element.match parser, index
68
+ return report(found) if found
69
+ end
70
+ report NO_MATCH
71
+ end
72
+ end
73
+
74
+ # An element which tries its single child multiple times. It is greedy, meaning it will continue
75
+ # to match as long as possible, unless the range specifies a maximum number of matches.
76
+ class Multiple < Element
77
+ # A big number
78
+ MANY = 32767
79
+ # The minimum and maximum number of tries
80
+ attr_accessor :range
81
+ # The single child
82
+ attr_accessor :child
83
+
84
+ # Init the range
85
+ def initialize range
86
+ @range = range
87
+ end
88
+
89
+ # synonym for child=(element)
90
+ alias :'<<' :'child='
91
+
92
+ # Matches the child multiple times. The range specifies the least and most number of matches.
93
+ # If the number of matches is less than the minimim of the range then NO_MATCH is returned.
94
+ # If equal or more than the minimim then the end index of the last match is returned.
95
+ def match parser, index
96
+ raise "multiple element child not set" unless child
97
+ raise "multiple element range not set" unless range
98
+ count = 0
99
+ while count < range.last
100
+ found = child.match parser, index
101
+ break unless found
102
+ index = found
103
+ count += 1
104
+ end
105
+ report range === count ? index : NO_MATCH
106
+ end
107
+ end
108
+
109
+ # Matcher of 0 or more times.
110
+ class AnyNumber < Multiple
111
+ def initialize
112
+ super 0..MANY
113
+ end
114
+ end
115
+
116
+ # Matcher of 1 or more times.
117
+ class AtLeastOne < Multiple
118
+ def initialize
119
+ super 1..MANY
120
+ end
121
+ end
122
+
123
+ # Matcher of 0 or 1 time.
124
+ class Optional < Multiple
125
+ def initialize
126
+ super 0..1
127
+ end
128
+ end
129
+
130
+ # An element which tries its single child but does not advance the index if found.
131
+ # If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
132
+ class Positive < Element
133
+ # The single child
134
+ attr_accessor :child
135
+
136
+ # synonym for child=(element)
137
+ alias :'<<' :'child='
138
+
139
+ # Matches the child once. If found the original index is returned.
140
+ # If not found NO_MATCH is returned.
141
+ def match parser, index
142
+ raise "positive element child not set" unless child
143
+ found = child.match parser, index
144
+ found ? index : NO_MATCH
145
+ end
146
+ end
147
+
148
+ # An element which tries its single child but does not advance the index if not found.
149
+ # If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
150
+ class Negative < Positive
151
+ def match parser, index
152
+ raise "negative element child not set" unless child
153
+ found = child.match parser, index
154
+ found ? NO_MATCH : index
155
+ end
156
+ end
157
+
158
+ # Match another production in the grammar.
159
+ class Reference < Element
160
+ # The name of the production to lookup and match.
161
+ attr_reader :name
162
+
163
+ # Init the name
164
+ def initialize name=nil
165
+ self.name = name
166
+ end
167
+
168
+ # Set the name of production to match.
169
+ def name= value
170
+ @name = value.to_sym
171
+ end
172
+
173
+ # Match the entire production from the parser grammar. If it matches
174
+ # the end index is returned. If not, NO_MATCH is returned.
175
+ def match parser, index
176
+ raise "reference name not set" unless name
177
+ parser.match? name, index
178
+ end
179
+
180
+ def to_s
181
+ name
182
+ end
183
+ end
184
+
185
+ # Matcher of a grammar production. The one and only child defines the production.
186
+ class Production < Reference
187
+ # The production definition.
188
+ attr_accessor :child
189
+
190
+ # Init the name and child.
191
+ def initialize name=nil, child=nil
192
+ super name
193
+ @child = child
194
+ end
195
+
196
+ # Synonym of child=(element)
197
+ alias :'<<' :'child='
198
+
199
+ # Match the production one time. If it matches the end index is returned. If not,
200
+ # NO_MATCH is returned.
201
+ def match parser, index
202
+ raise "production name not set" unless name
203
+ raise "production child not set" unless child
204
+ report @child.match(parser, index)
205
+ end
206
+ end
207
+
208
+ # Matcher of a literal string or regular expression.
209
+ class Literal < Element
210
+ # Value to match.
211
+ attr_reader :value
212
+
213
+ # Init the value.
214
+ def initialize value=nil
215
+ @value = value
216
+ end
217
+
218
+ # Set the value to match.
219
+ def value= literal
220
+ # Make sure regular expressions check at the beginnig of the string
221
+ literal = correct_regexp literal if literal.is_a? Regexp
222
+ @value = literal
223
+ end
224
+
225
+ # Match the literal value. If it matches the end index is returned.
226
+ # If no, NO_MATCH is returned.
227
+ def match parser, index
228
+ report parser.literal?(value, index)
229
+ end
230
+
231
+ def to_s
232
+ value.inspect
233
+ end
234
+ end
235
+
236
+ # Parser builder. The built in methods create syntax elements. Any other
237
+ # method called on this object create references to production, or actual
238
+ # productions, if called at the top level.
239
+ # Todo: Change to a class and separate from Parser.
240
+ class Builder < Parser
241
+ # Productions to build
242
+ attr_reader :productions
243
+ # Current parent being built
244
+ attr_reader :parent
245
+
246
+ def initialize
247
+ reset!
248
+ end
249
+
250
+ def reset!
251
+ @building = true
252
+ @productions = {}
253
+ end
254
+
255
+ # Reference a production by its name index.
256
+ def [] index
257
+ productions[index]
258
+ end
259
+
260
+ # Create a production if at the top level, or a reference to a production a
261
+ # production is being built.
262
+ def method_missing name, *args
263
+ if @building
264
+ if @parent
265
+ ref = Reference.new name
266
+ @parent << ref
267
+ elsif block_given?
268
+ prod = Production.new name
269
+ @parent = prod
270
+ yield
271
+ @parent = nil
272
+ @productions[name] = prod
273
+ else
274
+ super
275
+ end
276
+ else
277
+ prod = @productions[name]
278
+ super unless prod
279
+ # puts "matching #{name} at #{args.first}"
280
+ prod.match self, args.first
281
+ end
282
+ end
283
+
284
+ # Add an Alternatives element to the parent.
285
+ def one &blk
286
+ build_piece Alternatives, blk
287
+ end
288
+ # Synonym for one().
289
+ alias :alt :one
290
+
291
+ def eof *args
292
+ if args.length == 1 then super args.first
293
+ else method_missing :eof, *args
294
+ end
295
+ end
296
+
297
+ # Add an Sequence element to the parent.
298
+ def each &blk
299
+ build_piece Sequence, blk
300
+ end
301
+ # Synonym for each()
302
+ alias :seq :each
303
+
304
+ # Add an Literal element to the parent.
305
+ def lit *values
306
+ if values.size == 1
307
+ build_piece Literal, nil, values.first
308
+ else
309
+ one{
310
+ for v in values
311
+ build_piece Literal, nil, v
312
+ end
313
+ }
314
+ end
315
+ end
316
+
317
+ # Add an AnyNumber element to the parent.
318
+ def many &blk
319
+ build_piece AnyNumber, blk
320
+ end
321
+
322
+ # Add an Optional element to the parent.
323
+ def opt &blk
324
+ build_piece Optional, blk
325
+ end
326
+
327
+ # Add an AtLeastOne element to the parent.
328
+ def some &blk
329
+ build_piece AtLeastOne, blk
330
+ end
331
+
332
+ def neg &blk
333
+ build_piece Negative, blk
334
+ end
335
+
336
+ def pos &blk
337
+ build_piece Positive, blk
338
+ end
339
+
340
+ def parse? goal, index=0
341
+ @building = nil
342
+ super
343
+ end
344
+
345
+ private
346
+
347
+ # Add an object of klass to the parent and yield to its block. If
348
+ # value is specified it is passed to the klass constructor.
349
+ def build_piece klass, blk=nil, value=nil
350
+ # puts "building #{klass.name} with #{value.inspect}"
351
+ elem = value ? klass.new(value) : klass.new
352
+ @parent << elem
353
+ if blk
354
+ parent = @parent
355
+ @parent = elem
356
+ blk.call
357
+ @parent = parent
358
+ end
359
+ end
360
+
361
+ end # Builder
362
+
363
+ end # Peggy
@@ -0,0 +1,135 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ # require File.join(File.dirname(__FILE__), 'builder')
4
+
5
+ module Peggy
6
+
7
+ # Implements the Parser Expression Grammar (PEG), one of several grammars supported.
8
+ class PEG < Parser
9
+
10
+ class PEGCompiler < Builder
11
+
12
+ def initialize
13
+ super
14
+ build
15
+ end
16
+
17
+ private
18
+
19
+ def build
20
+ # Hierarchical syntax
21
+ grammar{each{spacing; some{definition}; eof}}
22
+ definition{each{identifier; left_arrow; expression}}
23
+ expression{each{sequence; many{each{slash; sequence}}}}
24
+ sequence{many{prefix}}
25
+ prefix{each{opt{one{peek; disallow}}; suffix}}
26
+ suffix{each{primary; opt{one{question; star; plus}}}}
27
+ primary{one{each{identifier; neg{left_arrow}}
28
+ each{lparen; expression; rparen}
29
+ literal; klass; dot}
30
+ }
31
+ # Lexical syntax
32
+ identifier{each{ident_start; many{ident_cont}; spacing}}
33
+ ident_start{lit /[a-zA-Z_]/}
34
+ ident_cont{one{ident_start; lit /[0-9]/}}
35
+ literal{one{
36
+ each{quote; many{each{neg{quote}; char}}; quote; spacing}
37
+ each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
38
+ }}
39
+ klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
40
+ range{one{each{char; lit '-'; char}; char}}
41
+ char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
42
+ #char{one{
43
+ # each{lit '\\'; one{
44
+ # quote
45
+ # quotes
46
+ # lit /[nrt\[\]\\]/
47
+ # lit /[0-2][0-7][0-7]/
48
+ # lit /[0-7][0-7]?/
49
+ # }}
50
+ # lit /[^\\]/
51
+ #}}
52
+ left_arrow{each{lit '<-'; spacing}}
53
+ slash{each{lit '/'; spacing}}
54
+ peek{each{lit '&'; spacing}}
55
+ disallow{each{lit '!'; spacing}}
56
+ question{each{lit '?'; spacing}}
57
+ star{each{lit '*'; spacing}}
58
+ plus{each{lit '+'; spacing}}
59
+ lparen{each{lit '('; spacing}}
60
+ rparen{each{lit ')'; spacing}}
61
+ dot{each{lit '.'; spacing}}
62
+ quote{lit "'"}
63
+ quotes{lit '"'}
64
+ spacing{many{one{space; comment}}}
65
+ comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
66
+ space{one{lit ' '; lit "\t"; eol}}
67
+ eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
68
+ end
69
+ end # PEGCompiler
70
+
71
+ def compile? text
72
+ reset!
73
+ compiler = PEGCompiler.new
74
+ raise "Invalid grammar" unless compiler.parse! :grammar, text
75
+ ast = compiler.ast
76
+ ast.find_each 'definition' do |definition|
77
+ send definition.identifier.to_sym do
78
+ if definition.count('sequence') == 1
79
+ build_sequence definition.expression.sequence
80
+ else
81
+ one do
82
+ definition.find_each 'sequence' do |sequence|
83
+ build_sequence sequence
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ def build_sequence sequence
93
+ if sequence.count('prefix') == 1
94
+ build_prefix sequence.prefix
95
+ else
96
+ each do
97
+ sequence.find_each 'prefix' do |prefix|
98
+ build_prefix prefix
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ def build_prefix prefix
105
+ if sequence.peek
106
+ pos do
107
+ build_suffix prefix.suffix
108
+ end
109
+ elsif sequence.disallow
110
+ neg do
111
+ build_suffix prefix.suffix
112
+ end
113
+ else
114
+ build_suffix prefix.suffix
115
+ end
116
+ end
117
+
118
+ def build_suffix suffix
119
+ if suffix.question
120
+ opt do
121
+ build_primary suffix.primary
122
+ end
123
+ elsif suffix.
124
+ neg do
125
+ build_suffix prefix.suffix
126
+ end
127
+ else
128
+ build_suffix prefix.suffix
129
+ end
130
+ end
131
+
132
+ end
133
+
134
+ end #PEG
135
+ end # Peggy