peggy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,363 @@
1
+ require 'rubygems'
2
+ require 'parser'
3
+ # require File.join(File.dirname(__FILE__), 'parser')
4
+
5
+ module Peggy
6
+
7
+ # Base syntax element class.
8
+ class Element
9
+ # Create an element.
10
+ def self::build *args
11
+ new *args
12
+ end
13
+
14
+ # Test to see if there is a match of this element at the current index.
15
+ # Return's the index following if match is found, or NO_MATCH if not
16
+ def match parser, index
17
+ raise "Must override match"
18
+ end
19
+
20
+ def report index
21
+ # puts "#{to_s} #{index}"
22
+ index
23
+ end
24
+ end
25
+
26
+ # An element that matches a sequence of elements. All must match for the sequence to match.
27
+ class Sequence < Element
28
+ # Add a child element.
29
+ def add element
30
+ @list = [] unless @list
31
+ @list << element
32
+ end
33
+
34
+ # Synonym for add(element)
35
+ alias :'<<' :add
36
+
37
+ # Reference a child by index.
38
+ def [] index
39
+ @list[index]
40
+ end
41
+
42
+ # Child iterator.
43
+ def each &blk
44
+ @list.each &blk
45
+ end
46
+
47
+ # Match each child in sequence. If any fail this returns NO_MATCH. If all succeed this
48
+ # returns the end index of the last.
49
+ def match parser, index
50
+ raise "no children added to sequence" unless @list
51
+ each do |element|
52
+ index = element.match parser, index
53
+ return NO_MATCH unless index
54
+ end
55
+ report index
56
+ end
57
+ end
58
+
59
+ # An element which matches any one of its children. The children are tested in order. The first
60
+ # to match wins.
61
+ class Alternatives < Sequence
62
+ # Match any one of the children. The children are tried in order. The first to match wins.
63
+ # The result is the end index of the first matching child. If none match this returns NO_MATCH.
64
+ def match parser, index
65
+ raise "no children added to alternate" unless @list
66
+ each do |element|
67
+ found = element.match parser, index
68
+ return report(found) if found
69
+ end
70
+ report NO_MATCH
71
+ end
72
+ end
73
+
74
+ # An element which tries its single child multiple times. It is greedy, meaning it will continue
75
+ # to match as long as possible, unless the range specifies a maximum number of matches.
76
+ class Multiple < Element
77
+ # A big number
78
+ MANY = 32767
79
+ # The minimum and maximum number of tries
80
+ attr_accessor :range
81
+ # The single child
82
+ attr_accessor :child
83
+
84
+ # Init the range
85
+ def initialize range
86
+ @range = range
87
+ end
88
+
89
+ # synonym for child=(element)
90
+ alias :'<<' :'child='
91
+
92
+ # Matches the child multiple times. The range specifies the least and most number of matches.
93
+ # If the number of matches is less than the minimim of the range then NO_MATCH is returned.
94
+ # If equal or more than the minimim then the end index of the last match is returned.
95
+ def match parser, index
96
+ raise "multiple element child not set" unless child
97
+ raise "multiple element range not set" unless range
98
+ count = 0
99
+ while count < range.last
100
+ found = child.match parser, index
101
+ break unless found
102
+ index = found
103
+ count += 1
104
+ end
105
+ report range === count ? index : NO_MATCH
106
+ end
107
+ end
108
+
109
+ # Matcher of 0 or more times.
110
+ class AnyNumber < Multiple
111
+ def initialize
112
+ super 0..MANY
113
+ end
114
+ end
115
+
116
+ # Matcher of 1 or more times.
117
+ class AtLeastOne < Multiple
118
+ def initialize
119
+ super 1..MANY
120
+ end
121
+ end
122
+
123
+ # Matcher of 0 or 1 time.
124
+ class Optional < Multiple
125
+ def initialize
126
+ super 0..1
127
+ end
128
+ end
129
+
130
+ # An element which tries its single child but does not advance the index if found.
131
+ # If not found, however, it returns NO_MATCH. Used for a positive semantic predicate.
132
+ class Positive < Element
133
+ # The single child
134
+ attr_accessor :child
135
+
136
+ # synonym for child=(element)
137
+ alias :'<<' :'child='
138
+
139
+ # Matches the child once. If found the original index is returned.
140
+ # If not found NO_MATCH is returned.
141
+ def match parser, index
142
+ raise "positive element child not set" unless child
143
+ found = child.match parser, index
144
+ found ? index : NO_MATCH
145
+ end
146
+ end
147
+
148
+ # An element which tries its single child but does not advance the index if not found.
149
+ # If found, however, it returns NO_MATCH. Used for a negative semantic predicate.
150
+ class Negative < Positive
151
+ def match parser, index
152
+ raise "negative element child not set" unless child
153
+ found = child.match parser, index
154
+ found ? NO_MATCH : index
155
+ end
156
+ end
157
+
158
+ # Match another production in the grammar.
159
+ class Reference < Element
160
+ # The name of the production to lookup and match.
161
+ attr_reader :name
162
+
163
+ # Init the name
164
+ def initialize name=nil
165
+ self.name = name
166
+ end
167
+
168
+ # Set the name of production to match.
169
+ def name= value
170
+ @name = value.to_sym
171
+ end
172
+
173
+ # Match the entire production from the parser grammar. If it matches
174
+ # the end index is returned. If not, NO_MATCH is returned.
175
+ def match parser, index
176
+ raise "reference name not set" unless name
177
+ parser.match? name, index
178
+ end
179
+
180
+ def to_s
181
+ name
182
+ end
183
+ end
184
+
185
+ # Matcher of a grammar production. The one and only child defines the production.
186
+ class Production < Reference
187
+ # The production definition.
188
+ attr_accessor :child
189
+
190
+ # Init the name and child.
191
+ def initialize name=nil, child=nil
192
+ super name
193
+ @child = child
194
+ end
195
+
196
+ # Synonym of child=(element)
197
+ alias :'<<' :'child='
198
+
199
+ # Match the production one time. If it matches the end index is returned. If not,
200
+ # NO_MATCH is returned.
201
+ def match parser, index
202
+ raise "production name not set" unless name
203
+ raise "production child not set" unless child
204
+ report @child.match(parser, index)
205
+ end
206
+ end
207
+
208
+ # Matcher of a literal string or regular expression.
209
+ class Literal < Element
210
+ # Value to match.
211
+ attr_reader :value
212
+
213
+ # Init the value.
214
+ def initialize value=nil
215
+ @value = value
216
+ end
217
+
218
+ # Set the value to match.
219
+ def value= literal
220
+ # Make sure regular expressions check at the beginnig of the string
221
+ literal = correct_regexp literal if literal.is_a? Regexp
222
+ @value = literal
223
+ end
224
+
225
+ # Match the literal value. If it matches the end index is returned.
226
+ # If no, NO_MATCH is returned.
227
+ def match parser, index
228
+ report parser.literal?(value, index)
229
+ end
230
+
231
+ def to_s
232
+ value.inspect
233
+ end
234
+ end
235
+
236
+ # Parser builder. The built in methods create syntax elements. Any other
237
+ # method called on this object create references to production, or actual
238
+ # productions, if called at the top level.
239
+ # Todo: Change to a class and separate from Parser.
240
+ class Builder < Parser
241
+ # Productions to build
242
+ attr_reader :productions
243
+ # Current parent being built
244
+ attr_reader :parent
245
+
246
+ def initialize
247
+ reset!
248
+ end
249
+
250
+ def reset!
251
+ @building = true
252
+ @productions = {}
253
+ end
254
+
255
+ # Reference a production by its name index.
256
+ def [] index
257
+ productions[index]
258
+ end
259
+
260
+ # Create a production if at the top level, or a reference to a production a
261
+ # production is being built.
262
+ def method_missing name, *args
263
+ if @building
264
+ if @parent
265
+ ref = Reference.new name
266
+ @parent << ref
267
+ elsif block_given?
268
+ prod = Production.new name
269
+ @parent = prod
270
+ yield
271
+ @parent = nil
272
+ @productions[name] = prod
273
+ else
274
+ super
275
+ end
276
+ else
277
+ prod = @productions[name]
278
+ super unless prod
279
+ # puts "matching #{name} at #{args.first}"
280
+ prod.match self, args.first
281
+ end
282
+ end
283
+
284
+ # Add an Alternatives element to the parent.
285
+ def one &blk
286
+ build_piece Alternatives, blk
287
+ end
288
+ # Synonym for one().
289
+ alias :alt :one
290
+
291
+ def eof *args
292
+ if args.length == 1 then super args.first
293
+ else method_missing :eof, *args
294
+ end
295
+ end
296
+
297
+ # Add an Sequence element to the parent.
298
+ def each &blk
299
+ build_piece Sequence, blk
300
+ end
301
+ # Synonym for each()
302
+ alias :seq :each
303
+
304
+ # Add an Literal element to the parent.
305
+ def lit *values
306
+ if values.size == 1
307
+ build_piece Literal, nil, values.first
308
+ else
309
+ one{
310
+ for v in values
311
+ build_piece Literal, nil, v
312
+ end
313
+ }
314
+ end
315
+ end
316
+
317
+ # Add an AnyNumber element to the parent.
318
+ def many &blk
319
+ build_piece AnyNumber, blk
320
+ end
321
+
322
+ # Add an Optional element to the parent.
323
+ def opt &blk
324
+ build_piece Optional, blk
325
+ end
326
+
327
+ # Add an AtLeastOne element to the parent.
328
+ def some &blk
329
+ build_piece AtLeastOne, blk
330
+ end
331
+
332
+ def neg &blk
333
+ build_piece Negative, blk
334
+ end
335
+
336
+ def pos &blk
337
+ build_piece Positive, blk
338
+ end
339
+
340
+ def parse? goal, index=0
341
+ @building = nil
342
+ super
343
+ end
344
+
345
+ private
346
+
347
+ # Add an object of klass to the parent and yield to its block. If
348
+ # value is specified it is passed to the klass constructor.
349
+ def build_piece klass, blk=nil, value=nil
350
+ # puts "building #{klass.name} with #{value.inspect}"
351
+ elem = value ? klass.new(value) : klass.new
352
+ @parent << elem
353
+ if blk
354
+ parent = @parent
355
+ @parent = elem
356
+ blk.call
357
+ @parent = parent
358
+ end
359
+ end
360
+
361
+ end # Builder
362
+
363
+ end # Peggy
@@ -0,0 +1,135 @@
1
+ require 'rubygems'
2
+ require 'builder'
3
+ # require File.join(File.dirname(__FILE__), 'builder')
4
+
5
+ module Peggy
6
+
7
+ # Implements the Parser Expression Grammar (PEG), one of several grammars supported.
8
+ class PEG < Parser
9
+
10
+ class PEGCompiler < Builder
11
+
12
+ def initialize
13
+ super
14
+ build
15
+ end
16
+
17
+ private
18
+
19
+ def build
20
+ # Hierarchical syntax
21
+ grammar{each{spacing; some{definition}; eof}}
22
+ definition{each{identifier; left_arrow; expression}}
23
+ expression{each{sequence; many{each{slash; sequence}}}}
24
+ sequence{many{prefix}}
25
+ prefix{each{opt{one{peek; disallow}}; suffix}}
26
+ suffix{each{primary; opt{one{question; star; plus}}}}
27
+ primary{one{each{identifier; neg{left_arrow}}
28
+ each{lparen; expression; rparen}
29
+ literal; klass; dot}
30
+ }
31
+ # Lexical syntax
32
+ identifier{each{ident_start; many{ident_cont}; spacing}}
33
+ ident_start{lit /[a-zA-Z_]/}
34
+ ident_cont{one{ident_start; lit /[0-9]/}}
35
+ literal{one{
36
+ each{quote; many{each{neg{quote}; char}}; quote; spacing}
37
+ each{quotes; many{each{neg{quotes}; char}}; quotes; spacing}
38
+ }}
39
+ klass{each{lit '['; many{each{neg{lit ']'}; range}}; lit ']'; spacing}}
40
+ range{one{each{char; lit '-'; char}; char}}
41
+ char{lit /\\([nrt'"\[\]\\]|[0-2][0-7][0-7]|[0-7][0-7]?)|[^\\]/}
42
+ #char{one{
43
+ # each{lit '\\'; one{
44
+ # quote
45
+ # quotes
46
+ # lit /[nrt\[\]\\]/
47
+ # lit /[0-2][0-7][0-7]/
48
+ # lit /[0-7][0-7]?/
49
+ # }}
50
+ # lit /[^\\]/
51
+ #}}
52
+ left_arrow{each{lit '<-'; spacing}}
53
+ slash{each{lit '/'; spacing}}
54
+ peek{each{lit '&'; spacing}}
55
+ disallow{each{lit '!'; spacing}}
56
+ question{each{lit '?'; spacing}}
57
+ star{each{lit '*'; spacing}}
58
+ plus{each{lit '+'; spacing}}
59
+ lparen{each{lit '('; spacing}}
60
+ rparen{each{lit ')'; spacing}}
61
+ dot{each{lit '.'; spacing}}
62
+ quote{lit "'"}
63
+ quotes{lit '"'}
64
+ spacing{many{one{space; comment}}}
65
+ comment{each{lit '#'; many{each{neg{eol}; lit /./}}; eol}}
66
+ space{one{lit ' '; lit "\t"; eol}}
67
+ eol{one{lit "\r\n"; lit "\n"; lit "\r"}}
68
+ end
69
+ end # PEGCompiler
70
+
71
+ def compile? text
72
+ reset!
73
+ compiler = PEGCompiler.new
74
+ raise "Invalid grammar" unless compiler.parse! :grammar, text
75
+ ast = compiler.ast
76
+ ast.find_each 'definition' do |definition|
77
+ send definition.identifier.to_sym do
78
+ if definition.count('sequence') == 1
79
+ build_sequence definition.expression.sequence
80
+ else
81
+ one do
82
+ definition.find_each 'sequence' do |sequence|
83
+ build_sequence sequence
84
+ end
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ private
91
+
92
+ def build_sequence sequence
93
+ if sequence.count('prefix') == 1
94
+ build_prefix sequence.prefix
95
+ else
96
+ each do
97
+ sequence.find_each 'prefix' do |prefix|
98
+ build_prefix prefix
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ def build_prefix prefix
105
+ if sequence.peek
106
+ pos do
107
+ build_suffix prefix.suffix
108
+ end
109
+ elsif sequence.disallow
110
+ neg do
111
+ build_suffix prefix.suffix
112
+ end
113
+ else
114
+ build_suffix prefix.suffix
115
+ end
116
+ end
117
+
118
+ def build_suffix suffix
119
+ if suffix.question
120
+ opt do
121
+ build_primary suffix.primary
122
+ end
123
+ elsif suffix.
124
+ neg do
125
+ build_suffix prefix.suffix
126
+ end
127
+ else
128
+ build_suffix prefix.suffix
129
+ end
130
+ end
131
+
132
+ end
133
+
134
+ end #PEG
135
+ end # Peggy