tdp4r 1.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/doc/faq.txt ADDED
@@ -0,0 +1,37 @@
1
+ * How do I write a rule that represents left/right-associative
2
+ infix operators.
3
+
4
+ One of the good example is an arithmetic expression for "*", "/",
5
+ "+" and "-". If you use racc (yacc-style parser for ruby), you
6
+ would write the following rule:
7
+
8
+ prechigh
9
+ left '*','/'
10
+ left '+','-'
11
+ preclow
12
+ ...
13
+ expr : expr '*' expr { result = val[0] * val[2]}
14
+ | expr '/' expr { result = val[0] / val[2]}
15
+ | expr '+' expr { result = val[0] + val[2]}
16
+ | expr '-' expr { result = val[0] - val[2]}
17
+ | NUMBER { result = val[0].to_i() }
18
+
19
+ In TDP4R, you can write the above rule as follows:
20
+
21
+ TDParser.define{|g|
22
+ g.expr = chainl(NUMBER >> Proc.new{|x| x[0].to_i},
23
+ token("*")|token("/"),
24
+ token("+")|token("-")){|x|
25
+ case x[1]
26
+ when "*"
27
+ x[0] * x[2]
28
+ when "/"
29
+ x[0] / x[2]
30
+ when "+"
31
+ x[0] + x[2]
32
+ when "-"
33
+ x[0] - x[2]
34
+ end
35
+ }
36
+ ...
37
+ }
data/doc/guide.txt ADDED
@@ -0,0 +1,150 @@
1
+ TDP4R Programmers Guide
2
+
3
+ Introduction
4
+ ------------
5
+ TDP4R is a ruby component that helps us to construct a top-down parser using
6
+ method calls. This document describes how to use TDP4R in two styles.
7
+ Both of styles are similar to one of JavaCC on the surface. However, one is a
8
+ style in which we define rules of a grammar as methods (like shown in sample4.rb).
9
+ The other is a style in which each rule is defined as if it is a property of
10
+ a grammar (see also sample5.rb).
11
+
12
+ Defining Rules in Module
13
+ -------------------------
14
+ The following class is a parser class, and it accepts expressions that consists
15
+ of digits and "+".
16
+
17
+ class MyParser
18
+ include TDParser
19
+
20
+ def expr
21
+ token(/\d+/) - token("+") - rule(:expr) >> proc{|x| x[0].to_i + x[2] } |
22
+ token(/\d+/) >> proc{|x| x[0].to_i }
23
+ end
24
+ end
25
+
26
+ In this class, the method expr represents the following production rule.
27
+
28
+ expr := int '+' expr
29
+ | int
30
+
31
+ In addition, at the first line of the method expr, values accepted by
32
+ token(/\d+/), token("+") and rule(:expr) are assigned to x[0], x[1] and
33
+ x[2] respectively.
34
+ After that, in order to parse "1 + 2", we first split it into an array
35
+ of tokens like ["1", "+", "2"], and then call the parse method of a parser
36
+ object, which is created by MyParser.new(), as follows.
37
+
38
+ parser = MyParser.new()
39
+ parser.expr.parse(["1", "+", "2"])
40
+
41
+ Note that we can pass one of the following objects to the parse method.
42
+
43
+ - an Enumerable object
44
+ E.g.: expr.parse(["1", "+", "2"])
45
+
46
+ - an object which has methods 'shift' and 'unshift'
47
+ E.g.: expr.parse(TDParser::TokenGenerator{|x|
48
+ x.yield("1"); x.yield("+"); x.yield("2")
49
+ })
50
+
51
+ - a block
52
+ E.g.: expr.parse{|x| x.yield("1"); x.yield("+"); x.yield("2") }
53
+
54
+ In that syntax, '+' is right-associative. However, we *can't* write as
55
+ follows.
56
+
57
+ def expr
58
+ rule(:expr) - token("+") - token(/\d+/) >> proc{|x| x[0].to_i + x[2].to_i }
59
+ token(/\d+/) >> proc{|x| x[0].to_i }
60
+ end
61
+
62
+ This problem is called left-recursion problem. So we have to use one of the
63
+ following rules instead.
64
+
65
+ def expr
66
+ token(/\d+/) - (token("+") - token(/\d+/))*0 >> proc{|x|
67
+ x[1].inject(x[0]){|acc,y|
68
+ case y[0]
69
+ when "+"
70
+ acc + y[1]
71
+ end
72
+ }
73
+ }
74
+ end
75
+
76
+ def expr # javacc style
77
+ n = nil
78
+ (token(/\d+/) >> proc{|x| n = x }) -
79
+ (token("+") - rule(/\d+/) >> proc{|y|
80
+ case y[0]
81
+ when "+"
82
+ n += y[1].to_i
83
+ end
84
+ })*0 >> proc{|x| n }
85
+ end
86
+
87
+ In the rules, '(...)*N' represents N or more rules '(...)'. x[1] has multiple
88
+ sequences of tokens accepted by '(...)*0'. For example, if ["1", "+","1","+","2"]
89
+ is parsed by the rule:
90
+ token(/\d+/) - (token("+") - token(/\d+/))*0,
91
+ we obtain [["+", "1"], ["+", "2"]] by x[1].
92
+
93
+
94
+ Defining Rules using TDParser.define()
95
+ ---------------------------------------
96
+ The rule defined in the first sample script, shown in the previous section, can
97
+ also be defined as follows.
98
+
99
+ parser = TDParser.define{|g|
100
+ g.expr =
101
+ g.token(/\d+/) - g.token("+") - g.expr >> proc{|x| x[0].to_i + x[2] } |
102
+ g.token(/\d+/) >> proc{|x| x[0].to_i }
103
+ }
104
+
105
+ (See also sample5.rb and sample6.rb)
106
+
107
+ Parser Combinators
108
+ -------------------
109
+
110
+ * Constructors
111
+ token(obj)
112
+ rule(method)
113
+ any() any token
114
+ none() no more token
115
+ empty() empty
116
+ fail() failure
117
+ backref(label) back reference
118
+ stackref(stack) stack reference
119
+
120
+ * Operators
121
+ rule - rule sequence
122
+ rule | rule choice
123
+ rule * n iteration
124
+ rule * n..m iteration
125
+ rule / label label
126
+ rule % stack stack
127
+ ~ rule negative lookahead
128
+
129
+ * Utility Functions
130
+ leftrec(base, rule1, ..., ruleN, &action)
131
+ This constructs the following rule:
132
+ base - ruleN* >> action' |
133
+ ... |
134
+ base - rule1* >> action' |
135
+ fail()
136
+ rightrec(rule1, ..., ruleN, base, &action)
137
+ This constructs the following rule:
138
+ ruleN* - base >> action' |
139
+ ... |
140
+ rule1* - base >> action' |
141
+ fail()
142
+ chainl(base, infix1, ..., infixN, &action)
143
+ chainr(base, infix1, ..., infixN, &action)
144
+
145
+
146
+ StringTokenizer
147
+ -----------------
148
+ There is a simple tokenizer called TDPUtils::StringTokenizer in the library
149
+ "tdputils".
150
+ (See MyParser#parse in sample2.rb)
data/lib/tdp.rb ADDED
@@ -0,0 +1,463 @@
1
+ # -*- ruby -*-
2
+ #
3
+ # Top-down parser for embedded in a ruby script.
4
+ #
5
+
6
+ require 'generator'
7
+
8
+ module TDParser
9
+ class ParserException < RuntimeError
10
+ end
11
+
12
+ class TokenGenerator < Generator
13
+ def initialize(*args)
14
+ super(*args)
15
+ @buffer = []
16
+ end
17
+
18
+ def shift()
19
+ if( @buffer.empty? )
20
+ if( self.next? )
21
+ token = self.next()
22
+ else
23
+ token = nil
24
+ end
25
+ else
26
+ token = @buffer.shift()
27
+ end
28
+ token
29
+ end
30
+
31
+ def unshift(*token)
32
+ @buffer.unshift(*token)
33
+ end
34
+ end
35
+
36
+ class TokenBuffer < Array
37
+ attr_accessor :map
38
+
39
+ def initialize(*args)
40
+ super(*args)
41
+ @map = {}
42
+ end
43
+
44
+ def [](idx)
45
+ case idx
46
+ when Symbol, String
47
+ @map[idx]
48
+ else
49
+ super(idx)
50
+ end
51
+ end
52
+
53
+ def []=(idx, val)
54
+ case idx
55
+ when Symbol, String
56
+ @map[idx] = val
57
+ else
58
+ super(idx, val)
59
+ end
60
+ end
61
+
62
+ def state()
63
+ @map[:__state__]
64
+ end
65
+
66
+ def state=(s)
67
+ @map[:__state__] = s
68
+ end
69
+
70
+ def clear()
71
+ super()
72
+ @map.clear()
73
+ end
74
+ end
75
+
76
+ class Sequence < Array
77
+ def +(seq)
78
+ self.dup.concat(seq)
79
+ end
80
+ end
81
+
82
+ module BufferUtils
83
+ def prepare(buff)
84
+ b = TokenBuffer.new()
85
+ b.map = buff.map
86
+ b
87
+ end
88
+
89
+ def recover(buff, ts)
90
+ buff.each{|b| ts.unshift(b)}
91
+ buff.clear()
92
+ end
93
+ end
94
+ include BufferUtils
95
+
96
+ class Rule < Proc
97
+ include BufferUtils
98
+
99
+ def -(r)
100
+ Rule.new{|ts, buff|
101
+ if( (x = self[ts, buff]).nil? )
102
+ nil
103
+ else
104
+ if( (y = r[ts, buff]).nil? )
105
+ nil
106
+ else
107
+ x + y
108
+ end
109
+ end
110
+ }
111
+ end
112
+
113
+ def |(r)
114
+ Rule.new{|ts, buff|
115
+ b = prepare(buff)
116
+ if( (x = self[ts, b]).nil? )
117
+ recover(b, ts)
118
+ r[ts, buff]
119
+ else
120
+ buff.insert(0, *b)
121
+ x
122
+ end
123
+ }
124
+ end
125
+
126
+ def *(n)
127
+ if( n.is_a?(Range) )
128
+ range = n
129
+ n = range.min
130
+ else
131
+ range = nil
132
+ end
133
+ Rule.new{|ts, buff|
134
+ x = true
135
+ xs = []
136
+ while( n > 0 )
137
+ n -= 1
138
+ b = prepare(buff)
139
+ if( (x = self[ts, b]).nil? )
140
+ recover(b, ts)
141
+ break
142
+ else
143
+ buff.insert(0, *b)
144
+ xs.push(x)
145
+ end
146
+ end
147
+ if ( x.nil? )
148
+ nil
149
+ else
150
+ if( range )
151
+ range.each{
152
+ while( true )
153
+ y = x
154
+ b = prepare(buff)
155
+ if( (x = self[ts, b]).nil? )
156
+ recover(b, ts)
157
+ x = y
158
+ break
159
+ else
160
+ buff.insert(0, *b)
161
+ xs.push(x)
162
+ end
163
+ end
164
+ }
165
+ else
166
+ while( true )
167
+ y = x
168
+ b = prepare(buff)
169
+ if( (x = self[ts, b]).nil? )
170
+ recover(b, ts)
171
+ x = y
172
+ break
173
+ else
174
+ buff.insert(0, *b)
175
+ xs.push(x)
176
+ end
177
+ end
178
+ end
179
+ Sequence[xs]
180
+ end
181
+ }
182
+ end
183
+
184
+ def >>(act)
185
+ Rule.new{|tokens, buff|
186
+ if( (x = self[tokens, buff]).nil? )
187
+ nil
188
+ else
189
+ x = TokenBuffer[*x]
190
+ x.map = buff.map
191
+ Sequence[act[x]]
192
+ end
193
+ }
194
+ end
195
+
196
+ def /(symbol)
197
+ Rule.new{|tokens, buff|
198
+ x = self[tokens, buff]
199
+ buff.map[symbol] = x
200
+ x
201
+ }
202
+ end
203
+
204
+ def %(stack)
205
+ Rule.new{|tokens, buff|
206
+ x = self[tokens, buff]
207
+ stack.push(x)
208
+ x
209
+ }
210
+ end
211
+
212
+ def >(symbol)
213
+ Rule.new{|tokens, buff|
214
+ buff[symbol] = buff.dup()
215
+ self[tokens, buff]
216
+ }
217
+ end
218
+
219
+ def ~@()
220
+ Rule.new{|tokens, buff|
221
+ b = prepare(buff)
222
+ r = self[tokens,b]
223
+ rev = b.reverse
224
+ recover(b, tokens)
225
+ if( r.nil? )
226
+ Sequence[Sequence[*rev]]
227
+ else
228
+ nil
229
+ end
230
+ }
231
+ end
232
+
233
+ def parse(tokens=nil, &blk)
234
+ if( blk.nil? )
235
+ if( tokens.respond_to?(:shift) && tokens.respond_to?(:unshift) )
236
+ @tokens = tokens
237
+ elsif( tokens.respond_to?(:each) )
238
+ @tokens = TokenGenerator.new(tokens)
239
+ else
240
+ @tokens = tokens
241
+ end
242
+ else
243
+ @tokens = TokenGenerator.new(&blk)
244
+ end
245
+ r = self[@tokens, TokenBuffer.new()]
246
+ if( r.nil? )
247
+ nil
248
+ else
249
+ r[0]
250
+ end
251
+ end
252
+
253
+ def peek()
254
+ t = @tokens.shift()
255
+ if( ! t.nil? )
256
+ @tokens.unshift(t)
257
+ end
258
+ t
259
+ end
260
+
261
+ def do(&block)
262
+ self >> block
263
+ end
264
+ end
265
+ # end of Rule
266
+
267
+ def rule(sym, *opts)
268
+ Rule.new{|tokens, buff|
269
+ res = nil
270
+ case sym
271
+ when Symbol, String
272
+ res = __send__(sym,*opts)[tokens, buff]
273
+ when Rule
274
+ res = sym[tokens, buff]
275
+ end
276
+ if( block_given? && !res.nil? )
277
+ res = yield(res)
278
+ end
279
+ res
280
+ }
281
+ end
282
+
283
+ def token(x, eqsym=:===)
284
+ Rule.new{|tokens, buff|
285
+ t = tokens.shift
286
+ buff.unshift(t)
287
+ if( x.__send__(eqsym,t) || t.__send__(eqsym,x) )
288
+ t = yield(t) if( block_given? )
289
+ Sequence[t]
290
+ else
291
+ nil
292
+ end
293
+ }
294
+ end
295
+
296
+ def __backref__(xs, eqsym)
297
+ x = xs.shift()
298
+ xs.inject(token(x, eqsym)){|acc,x|
299
+ case x
300
+ when Sequence
301
+ acc - __backref__(x, eqsym)
302
+ else
303
+ acc - token(x, eqsym)
304
+ end
305
+ }
306
+ end
307
+
308
+ def backref(x, eqsym=:===)
309
+ Rule.new{|tokens, buff|
310
+ ys = buff.map[x]
311
+ if (ys.nil? || ys.empty?)
312
+ nil
313
+ else
314
+ __backref__(ys.dup(), eqsym)[tokens,buff]
315
+ end
316
+ }
317
+ end
318
+
319
+ def stackref(stack, eqsym=:===)
320
+ Rule.new{|tokens, buff|
321
+ ys = stack.pop()
322
+ if (ys.nil? || ys.empty?)
323
+ nil
324
+ else
325
+ __backref__(ys.dup(), eqsym)[tokens,buff]
326
+ end
327
+ }
328
+ end
329
+
330
+ def state(s)
331
+ Rule.new{|tokens, buff|
332
+ if (buff.map[:state] == s)
333
+ Sequence[s]
334
+ else
335
+ nil
336
+ end
337
+ }
338
+ end
339
+
340
+ def empty_rule()
341
+ Rule.new{|tokens, buff| Sequence[nil] }
342
+ end
343
+ alias empty empty_rule
344
+
345
+ def any_rule()
346
+ Rule.new{|tokens, buff|
347
+ t = tokens.shift
348
+ if (t.nil?)
349
+ nil
350
+ else
351
+ Sequence[t]
352
+ end
353
+ }
354
+ end
355
+ alias any any_rule
356
+
357
+ def none_rule()
358
+ Rule.new{|tokens, buff|
359
+ t = tokens.shift
360
+ if (t.nil?)
361
+ Sequence[nil]
362
+ else
363
+ nil
364
+ end
365
+ }
366
+ end
367
+ alias none none_rule
368
+
369
+ def fail_rule()
370
+ Rule.new{|tokens, buff| nil }
371
+ end
372
+ alias fail fail_rule
373
+
374
+ def leftrec(*rules, &act)
375
+ f = Proc.new{|x|
376
+ x[1].inject(x[0]){|acc,y|
377
+ act.call(Sequence[acc,*y])
378
+ }
379
+ }
380
+ base = rules.shift()
381
+ rules.collect{|r| base - r*0 >> f}.inject(fail()){|acc,r| r | acc}
382
+ end
383
+
384
+ def rightrec(*rules, &act)
385
+ f = Proc.new{|x|
386
+ x[0].reverse.inject(x[1]){|acc,y|
387
+ ys = y.dup()
388
+ ys.push(acc)
389
+ act.call(Sequence[*ys])
390
+ }
391
+ }
392
+ base = rules.pop()
393
+ rules.collect{|r| r*0 - base >> f}.inject(fail()){|acc,r| r | acc}
394
+ end
395
+
396
+ def chainl(base, *infixes, &act)
397
+ infixes.inject(base){|acc,r|
398
+ leftrec(acc, r - acc, &act)
399
+ }
400
+ end
401
+
402
+ def chainr(base, *infixes, &act)
403
+ infixes.inject(base){|acc,r|
404
+ rightrec(acc - r, acc, &act)
405
+ }
406
+ end
407
+
408
+ class Grammar
409
+ include TDParser
410
+
411
+ def define(&block)
412
+ instance_eval{
413
+ begin
414
+ alias method_missing g_method_missing
415
+ block.call(self)
416
+ ensure
417
+ undef method_missing
418
+ end
419
+ }
420
+ end
421
+
422
+ def g_method_missing(sym, *args)
423
+ arg0 = args[0]
424
+ sym = sym.to_s()
425
+ if (sym[-1,1] == "=")
426
+ case arg0
427
+ when Rule
428
+ self.class.instance_eval{
429
+ define_method(sym[0..-2]){ arg0 }
430
+ }
431
+ else
432
+ t = token(arg0)
433
+ self.class.instance_eval{
434
+ define_method(sym[0..-2]){ t }
435
+ }
436
+ end
437
+ elsif (args.size == 0)
438
+ rule(sym)
439
+ else
440
+ raise(NoMethodError, "undefined method `#{sym}' for #{self.inspect}")
441
+ end
442
+ end
443
+
444
+ alias method_missing g_method_missing
445
+ end
446
+
447
+ def TDParser.define(*args, &block)
448
+ klass = Class.new(Grammar)
449
+ g = klass.new()
450
+ begin
451
+ if defined?(g.instance_exec)
452
+ g.instance_exec(g, &block)
453
+ else
454
+ g.instance_eval(&block)
455
+ end
456
+ ensure
457
+ g.instance_eval{
458
+ undef method_missing
459
+ }
460
+ end
461
+ g
462
+ end
463
+ end