tdp4r 1.3.3 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/tdp.rb +600 -162
- data/samples/sample_optimize.rb +45 -0
- data/samples/sample_xml.rb +123 -0
- data/test/test_tdp.rb +24 -3
- metadata +5 -4
- data/samples/sample7.rb +0 -79
- /data/samples/{sample6.rb → sample_expr.rb} +0 -0
data/lib/tdp.rb
CHANGED
@@ -10,8 +10,8 @@ module TDParser
|
|
10
10
|
end
|
11
11
|
|
12
12
|
class TokenGenerator < Generator
|
13
|
-
def initialize(*args)
|
14
|
-
super(*args)
|
13
|
+
def initialize(*args, &block)
|
14
|
+
super(*args, &block)
|
15
15
|
@buffer = []
|
16
16
|
end
|
17
17
|
|
@@ -93,141 +93,80 @@ module TDParser
|
|
93
93
|
end
|
94
94
|
include BufferUtils
|
95
95
|
|
96
|
-
class
|
96
|
+
class Parser
|
97
97
|
include BufferUtils
|
98
|
+
include TDParser
|
99
|
+
|
100
|
+
def to_proc()
|
101
|
+
Proc.new{|*x| self.call(*x) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def to_s()
|
105
|
+
"??"
|
106
|
+
end
|
107
|
+
|
108
|
+
def call(*args)
|
109
|
+
end
|
110
|
+
|
111
|
+
#def [](*args)
|
112
|
+
# call(*args)
|
113
|
+
#end
|
114
|
+
|
115
|
+
def optimize(default=false)
|
116
|
+
self.dup()
|
117
|
+
end
|
118
|
+
|
119
|
+
def ==(r)
|
120
|
+
false
|
121
|
+
end
|
122
|
+
|
123
|
+
def same?(r)
|
124
|
+
self == r
|
125
|
+
end
|
98
126
|
|
99
127
|
def -(r)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
nil
|
106
|
-
else
|
107
|
-
x + y
|
108
|
-
end
|
109
|
-
end
|
110
|
-
}
|
128
|
+
ConcatParser.new(self,r)
|
129
|
+
end
|
130
|
+
|
131
|
+
def +(r)
|
132
|
+
ParallelParser.new(self,r)
|
111
133
|
end
|
112
134
|
|
113
135
|
def |(r)
|
114
|
-
|
115
|
-
b = prepare(buff)
|
116
|
-
if( (x = self[ts, b]).nil? )
|
117
|
-
recover(b, ts)
|
118
|
-
r[ts, buff]
|
119
|
-
else
|
120
|
-
buff.insert(0, *b)
|
121
|
-
x
|
122
|
-
end
|
123
|
-
}
|
136
|
+
ChoiceParser.new(self,r).optimize(true)
|
124
137
|
end
|
125
138
|
|
126
|
-
def *(
|
127
|
-
if(
|
128
|
-
range = n
|
139
|
+
def *(range)
|
140
|
+
if( range.is_a?(Range) )
|
129
141
|
n = range.min
|
130
142
|
else
|
143
|
+
n = range
|
131
144
|
range = nil
|
132
145
|
end
|
133
|
-
|
134
|
-
x = true
|
135
|
-
xs = []
|
136
|
-
while( n > 0 )
|
137
|
-
n -= 1
|
138
|
-
b = prepare(buff)
|
139
|
-
if( (x = self[ts, b]).nil? )
|
140
|
-
recover(b, ts)
|
141
|
-
break
|
142
|
-
else
|
143
|
-
buff.insert(0, *b)
|
144
|
-
xs.push(x)
|
145
|
-
end
|
146
|
-
end
|
147
|
-
if ( x.nil? )
|
148
|
-
nil
|
149
|
-
else
|
150
|
-
if( range )
|
151
|
-
range.each{
|
152
|
-
while( true )
|
153
|
-
y = x
|
154
|
-
b = prepare(buff)
|
155
|
-
if( (x = self[ts, b]).nil? )
|
156
|
-
recover(b, ts)
|
157
|
-
x = y
|
158
|
-
break
|
159
|
-
else
|
160
|
-
buff.insert(0, *b)
|
161
|
-
xs.push(x)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
}
|
165
|
-
else
|
166
|
-
while( true )
|
167
|
-
y = x
|
168
|
-
b = prepare(buff)
|
169
|
-
if( (x = self[ts, b]).nil? )
|
170
|
-
recover(b, ts)
|
171
|
-
x = y
|
172
|
-
break
|
173
|
-
else
|
174
|
-
buff.insert(0, *b)
|
175
|
-
xs.push(x)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
Sequence[xs]
|
180
|
-
end
|
181
|
-
}
|
146
|
+
IterationParser.new(self,n,range)
|
182
147
|
end
|
183
148
|
|
184
149
|
def >>(act)
|
185
|
-
|
186
|
-
if( (x = self[tokens, buff]).nil? )
|
187
|
-
nil
|
188
|
-
else
|
189
|
-
x = TokenBuffer[*x]
|
190
|
-
x.map = buff.map
|
191
|
-
Sequence[act[x]]
|
192
|
-
end
|
193
|
-
}
|
150
|
+
ActionParser.new(self,act)
|
194
151
|
end
|
195
152
|
|
196
|
-
def /(
|
197
|
-
|
198
|
-
x = self[tokens, buff]
|
199
|
-
buff.map[symbol] = x
|
200
|
-
x
|
201
|
-
}
|
153
|
+
def /(label)
|
154
|
+
LabelParser.new(self,label)
|
202
155
|
end
|
203
156
|
|
204
157
|
def %(stack)
|
205
|
-
|
206
|
-
x = self[tokens, buff]
|
207
|
-
stack.push(x)
|
208
|
-
x
|
209
|
-
}
|
158
|
+
StackParser.new(self,stack)
|
210
159
|
end
|
211
160
|
|
212
161
|
def >(symbol)
|
213
|
-
|
162
|
+
Parser.new{|tokens, buff|
|
214
163
|
buff[symbol] = buff.dup()
|
215
164
|
self[tokens, buff]
|
216
165
|
}
|
217
166
|
end
|
218
167
|
|
219
168
|
def ~@()
|
220
|
-
|
221
|
-
b = prepare(buff)
|
222
|
-
r = self[tokens,b]
|
223
|
-
rev = b.reverse
|
224
|
-
recover(b, tokens)
|
225
|
-
if( r.nil? )
|
226
|
-
Sequence[Sequence[*rev]]
|
227
|
-
else
|
228
|
-
nil
|
229
|
-
end
|
230
|
-
}
|
169
|
+
NegativeParser.new(self)
|
231
170
|
end
|
232
171
|
|
233
172
|
def parse(tokens=nil, &blk)
|
@@ -242,7 +181,7 @@ module TDParser
|
|
242
181
|
else
|
243
182
|
@tokens = TokenGenerator.new(&blk)
|
244
183
|
end
|
245
|
-
r =
|
184
|
+
r = call(@tokens, TokenBuffer.new())
|
246
185
|
if( r.nil? )
|
247
186
|
nil
|
248
187
|
else
|
@@ -262,112 +201,611 @@ module TDParser
|
|
262
201
|
self >> block
|
263
202
|
end
|
264
203
|
end
|
265
|
-
# end of
|
204
|
+
# end of Parser
|
205
|
+
|
206
|
+
class NonTerminalParser < Parser
|
207
|
+
attr_reader :context, :symbol, :options
|
208
|
+
def initialize(context, sym, *options)
|
209
|
+
@context = context
|
210
|
+
@symbol = sym
|
211
|
+
@options = options
|
212
|
+
end
|
266
213
|
|
267
|
-
|
268
|
-
Rule.new{|tokens, buff|
|
214
|
+
def call(tokens, buff)
|
269
215
|
res = nil
|
270
|
-
case
|
216
|
+
case @symbol
|
271
217
|
when Symbol, String
|
272
|
-
res = __send__(
|
273
|
-
when
|
274
|
-
res =
|
275
|
-
end
|
276
|
-
if( block_given? && !res.nil? )
|
277
|
-
res = yield(res)
|
218
|
+
res = @context.__send__(@symbol,*@options).call(tokens, buff)
|
219
|
+
when Parser
|
220
|
+
res = @symbol.call(tokens, buff)
|
278
221
|
end
|
279
222
|
res
|
280
|
-
|
223
|
+
end
|
224
|
+
|
225
|
+
def ==(r)
|
226
|
+
(self.class == r.class) &&
|
227
|
+
(@context == r.context) &&
|
228
|
+
(@symbol == r.symbol) &&
|
229
|
+
(@options == r.options)
|
230
|
+
end
|
231
|
+
|
232
|
+
def to_s()
|
233
|
+
"#{@symbol.to_s}"
|
234
|
+
end
|
281
235
|
end
|
282
236
|
|
283
|
-
|
284
|
-
|
237
|
+
class TerminalParser < Parser
|
238
|
+
attr_reader :symbol, :equality
|
239
|
+
|
240
|
+
def initialize(obj, eqsym)
|
241
|
+
@symbol = obj
|
242
|
+
@equality = eqsym
|
243
|
+
end
|
244
|
+
|
245
|
+
def call(tokens, buff)
|
285
246
|
t = tokens.shift
|
286
247
|
buff.unshift(t)
|
287
|
-
if(
|
288
|
-
t = yield(t) if( block_given? )
|
248
|
+
if( @symbol.__send__(@equality,t) || t.__send__(@equality,@symbol) )
|
289
249
|
Sequence[t]
|
290
250
|
else
|
291
251
|
nil
|
292
252
|
end
|
293
|
-
|
253
|
+
end
|
254
|
+
|
255
|
+
def ==(r)
|
256
|
+
(self.class == r.class) &&
|
257
|
+
(@symbol == r.symbol) &&
|
258
|
+
(@equality == r.equality)
|
259
|
+
end
|
260
|
+
|
261
|
+
def to_s()
|
262
|
+
"#{@symbol}"
|
263
|
+
end
|
294
264
|
end
|
295
265
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
266
|
+
class CompositeParser < Parser
|
267
|
+
attr_accessor :parsers
|
268
|
+
|
269
|
+
def initialize(*parsers, &b)
|
270
|
+
@parsers = parsers
|
271
|
+
end
|
272
|
+
|
273
|
+
def optimize(default=false)
|
274
|
+
parser = dup()
|
275
|
+
parser.parsers = @parsers.collect{|x| x.optimize(default)}
|
276
|
+
parser
|
277
|
+
end
|
278
|
+
|
279
|
+
def ==(r)
|
280
|
+
(self.class == r.class) &&
|
281
|
+
(@parsers == r.parsers)
|
282
|
+
end
|
283
|
+
|
284
|
+
def same?(r)
|
285
|
+
super(r) &&
|
286
|
+
@parsers.zip(r.parsers).all?{|x,y| x.same?(y)}
|
287
|
+
end
|
288
|
+
|
289
|
+
def to_s()
|
290
|
+
"<composite: #{@parsers.collect{|x| x.to_s()}}>"
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
class ActionParser < CompositeParser
|
295
|
+
attr_reader :action
|
296
|
+
|
297
|
+
def initialize(parser, act)
|
298
|
+
@action = act
|
299
|
+
super(parser)
|
300
|
+
end
|
301
|
+
|
302
|
+
def call(tokens, buff)
|
303
|
+
if( (x = @parsers[0].call(tokens, buff)).nil? )
|
304
|
+
nil
|
302
305
|
else
|
303
|
-
|
306
|
+
x = TokenBuffer[*x]
|
307
|
+
x.map = buff.map
|
308
|
+
Sequence[@action[x]]
|
304
309
|
end
|
305
|
-
|
310
|
+
end
|
311
|
+
|
312
|
+
def ==(r)
|
313
|
+
super(r) &&
|
314
|
+
(@action == r.action)
|
315
|
+
end
|
316
|
+
|
317
|
+
def to_s()
|
318
|
+
"(#{@parsers[0].to_s()} <action>)"
|
319
|
+
end
|
306
320
|
end
|
307
321
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
322
|
+
class LabelParser < CompositeParser
|
323
|
+
attr_reader :label
|
324
|
+
|
325
|
+
def initialize(parser, label)
|
326
|
+
@label = label
|
327
|
+
super(parser)
|
328
|
+
end
|
329
|
+
|
330
|
+
def call(tokens, buff)
|
331
|
+
x = @parsers[0].call(tokens, buff)
|
332
|
+
buff.map[@label] = x
|
333
|
+
x
|
334
|
+
end
|
335
|
+
|
336
|
+
def ==(r)
|
337
|
+
super(r) &&
|
338
|
+
(@label == r.label)
|
339
|
+
end
|
340
|
+
|
341
|
+
def to_s()
|
342
|
+
"(#{@parsers[0].to_s()}/#{@label})"
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
class StackParser < CompositeParser
|
347
|
+
attr_reader :stack
|
348
|
+
def initialize(parser, stack)
|
349
|
+
@stack = stack
|
350
|
+
super(parser)
|
351
|
+
end
|
352
|
+
|
353
|
+
def call(tokens, buff)
|
354
|
+
x = @parsers[0].call(tokens, buff)
|
355
|
+
@stack.push(x)
|
356
|
+
x
|
357
|
+
end
|
358
|
+
|
359
|
+
def ==(r)
|
360
|
+
super(r) &&
|
361
|
+
(@stack == r.stack)
|
362
|
+
end
|
363
|
+
|
364
|
+
def same?(r)
|
365
|
+
false
|
366
|
+
end
|
367
|
+
|
368
|
+
def to_s()
|
369
|
+
"<stack:#{@stack.object_id}>"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
class ConcatParser < CompositeParser
|
374
|
+
def initialize(r1, r2)
|
375
|
+
super(r1, r2)
|
376
|
+
end
|
377
|
+
|
378
|
+
def call(tokens, buff)
|
379
|
+
if( (x = @parsers[0].call(tokens, buff)).nil? )
|
312
380
|
nil
|
313
381
|
else
|
314
|
-
|
382
|
+
if( (y = @parsers[1].call(tokens, buff)).nil? )
|
383
|
+
nil
|
384
|
+
else
|
385
|
+
x + y
|
386
|
+
end
|
315
387
|
end
|
316
|
-
|
388
|
+
end
|
389
|
+
|
390
|
+
def -(r)
|
391
|
+
@parsers[0] - (@parsers[1] - r)
|
392
|
+
end
|
393
|
+
|
394
|
+
def to_s()
|
395
|
+
"(#{@parsers[0].to_s()} #{@parsers[1].to_s()})"
|
396
|
+
end
|
317
397
|
end
|
318
398
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
399
|
+
class ChoiceParser < CompositeParser
|
400
|
+
def initialize(r1, r2)
|
401
|
+
super(r1, r2)
|
402
|
+
end
|
403
|
+
|
404
|
+
def call(tokens, buff)
|
405
|
+
b = prepare(buff)
|
406
|
+
if( (x = @parsers[0].call(tokens, b)).nil? )
|
407
|
+
recover(b, tokens)
|
408
|
+
@parsers[1].call(tokens, buff)
|
409
|
+
else
|
410
|
+
buff.insert(0, *b)
|
411
|
+
x
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
def to_s()
|
416
|
+
"(#{@parsers[0].to_s()} | #{@parsers[1].to_s()})"
|
417
|
+
end
|
418
|
+
|
419
|
+
def shared_sequence(r1, r2)
|
420
|
+
if (r1.is_a?(ConcatParser) && r2.is_a?(ConcatParser))
|
421
|
+
r11 = r1.parsers[0]
|
422
|
+
r12 = r1.parsers[1]
|
423
|
+
r21 = r2.parsers[0]
|
424
|
+
r22 = r2.parsers[1]
|
425
|
+
if (r11.same?(r21))
|
426
|
+
share,r12,r22, = shared_sequence(r12, r22)
|
427
|
+
if (share)
|
428
|
+
return [r11 - share, r12, r22]
|
429
|
+
else
|
430
|
+
return [r11, r12, r22]
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
[nil, r1, r2]
|
435
|
+
end
|
436
|
+
|
437
|
+
def optimize(default=false)
|
438
|
+
r1 = @parsers[0]
|
439
|
+
r2 = @parsers[1]
|
440
|
+
if (r1.is_a?(ActionParser))
|
441
|
+
act1 = r1.action
|
442
|
+
r1 = r1.parsers[0]
|
443
|
+
end
|
444
|
+
if (r2.is_a?(ActionParser))
|
445
|
+
act2 = r2.action
|
446
|
+
r2 = r2.parsers[0]
|
447
|
+
end
|
448
|
+
share,r12,r22, = shared_sequence(r1, r2)
|
449
|
+
if (share)
|
450
|
+
r = share - (r12 + r22)
|
451
|
+
if (act1)
|
452
|
+
if (act2)
|
453
|
+
r = r >> Proc.new{|x|
|
454
|
+
y0,y1,*ys = x.pop()
|
455
|
+
if (y0)
|
456
|
+
act1.call(x.push(*y0))
|
457
|
+
else
|
458
|
+
act2.call(x.push(*y1))
|
459
|
+
end
|
460
|
+
}
|
461
|
+
else
|
462
|
+
r = r >> Proc.new{|x|
|
463
|
+
y0,y1,*ys = x.pop()
|
464
|
+
if (y0)
|
465
|
+
act1.call(x.push(*y0))
|
466
|
+
end
|
467
|
+
}
|
468
|
+
end
|
469
|
+
else
|
470
|
+
if (act2)
|
471
|
+
r = r >> Proc.new{|x|
|
472
|
+
y0,y1,*ys = x.pop()
|
473
|
+
if (y1)
|
474
|
+
act2.call(x.push(*y1))
|
475
|
+
end
|
476
|
+
}
|
477
|
+
end
|
478
|
+
end
|
479
|
+
return r
|
480
|
+
end
|
481
|
+
if (default)
|
482
|
+
self.dup()
|
483
|
+
else
|
484
|
+
super(default)
|
485
|
+
end
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
489
|
+
class ParallelParser < CompositeParser
|
490
|
+
def initialize(r1, r2)
|
491
|
+
super(r1, r2)
|
492
|
+
end
|
493
|
+
|
494
|
+
def call(tokens, buff)
|
495
|
+
b = prepare(buff)
|
496
|
+
if( (x = @parsers[0].call(tokens, b)).nil? )
|
497
|
+
recover(b, tokens)
|
498
|
+
Sequence[Sequence[nil, @parsers[1].call(tokens, buff)]]
|
499
|
+
else
|
500
|
+
buff.insert(0, *b)
|
501
|
+
Sequence[Sequence[x, nil]]
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
def to_s()
|
506
|
+
"(#{@parsers[0].to_s()} + #{@parsers[1].to_s()})"
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
class IterationParser < CompositeParser
|
511
|
+
attr_reader :min, :range
|
512
|
+
def initialize(parser, n, range)
|
513
|
+
@min = n
|
514
|
+
@range = range
|
515
|
+
super(parser)
|
516
|
+
end
|
517
|
+
|
518
|
+
def call(ts, buff)
|
519
|
+
r = @parsers[0]
|
520
|
+
n = @min
|
521
|
+
x = true
|
522
|
+
xs = []
|
523
|
+
while( n > 0 )
|
524
|
+
n -= 1
|
525
|
+
b = prepare(buff)
|
526
|
+
if( (x = r.call(ts, b)).nil? )
|
527
|
+
recover(b, ts)
|
528
|
+
break
|
529
|
+
else
|
530
|
+
buff.insert(0, *b)
|
531
|
+
xs.push(x)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
if ( x.nil? )
|
323
535
|
nil
|
324
536
|
else
|
325
|
-
|
537
|
+
if( range )
|
538
|
+
range.each{
|
539
|
+
while( true )
|
540
|
+
y = x
|
541
|
+
b = prepare(buff)
|
542
|
+
if( (x = r.call(ts, b)).nil? )
|
543
|
+
recover(b, ts)
|
544
|
+
x = y
|
545
|
+
break
|
546
|
+
else
|
547
|
+
buff.insert(0, *b)
|
548
|
+
xs.push(x)
|
549
|
+
end
|
550
|
+
end
|
551
|
+
}
|
552
|
+
else
|
553
|
+
while( true )
|
554
|
+
y = x
|
555
|
+
b = prepare(buff)
|
556
|
+
if( (x = r.call(ts, b)).nil? )
|
557
|
+
recover(b, ts)
|
558
|
+
x = y
|
559
|
+
break
|
560
|
+
else
|
561
|
+
buff.insert(0, *b)
|
562
|
+
xs.push(x)
|
563
|
+
end
|
564
|
+
end
|
565
|
+
end
|
566
|
+
Sequence[xs]
|
326
567
|
end
|
327
|
-
|
568
|
+
end
|
569
|
+
|
570
|
+
def to_s()
|
571
|
+
"(#{@parsers[0].to_s()})*#{@range ? @range.to_s : @min.to_s}"
|
572
|
+
end
|
573
|
+
|
574
|
+
def ==(r)
|
575
|
+
super(r) &&
|
576
|
+
(@min == r.min) &&
|
577
|
+
(@range == r.range)
|
578
|
+
end
|
328
579
|
end
|
329
580
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
581
|
+
class NegativeParser < CompositeParser
|
582
|
+
def initialize(parser)
|
583
|
+
super(parser)
|
584
|
+
end
|
585
|
+
|
586
|
+
def call(tokens, buff)
|
587
|
+
b = prepare(buff)
|
588
|
+
r = @parsers[0].call(tokens,b)
|
589
|
+
rev = b.reverse
|
590
|
+
recover(b, tokens)
|
591
|
+
if( r.nil? )
|
592
|
+
Sequence[Sequence[*rev]]
|
334
593
|
else
|
335
594
|
nil
|
336
595
|
end
|
337
|
-
|
596
|
+
end
|
597
|
+
|
598
|
+
def to_s()
|
599
|
+
"~#{@parsers[0]}"
|
600
|
+
end
|
338
601
|
end
|
339
602
|
|
340
|
-
|
341
|
-
|
603
|
+
class FailParser < Parser
|
604
|
+
def call(tokens, buff)
|
605
|
+
nil
|
606
|
+
end
|
607
|
+
|
608
|
+
def to_s()
|
609
|
+
"<fail>"
|
610
|
+
end
|
611
|
+
|
612
|
+
def ==()
|
613
|
+
(self.class == r.class)
|
614
|
+
end
|
342
615
|
end
|
343
|
-
alias empty empty_rule
|
344
616
|
|
345
|
-
|
346
|
-
|
617
|
+
class EmptyParser < Parser
|
618
|
+
def call(tokens, buff)
|
619
|
+
Sequence[nil]
|
620
|
+
end
|
621
|
+
|
622
|
+
def to_s()
|
623
|
+
"<empty>"
|
624
|
+
end
|
625
|
+
|
626
|
+
def ==(r)
|
627
|
+
true
|
628
|
+
end
|
629
|
+
end
|
630
|
+
|
631
|
+
class AnyParser < Parser
|
632
|
+
def call(tokens, buff)
|
347
633
|
t = tokens.shift
|
348
634
|
if (t.nil?)
|
349
635
|
nil
|
350
636
|
else
|
351
637
|
Sequence[t]
|
352
638
|
end
|
353
|
-
|
639
|
+
end
|
640
|
+
|
641
|
+
def to_s()
|
642
|
+
"<any>"
|
643
|
+
end
|
644
|
+
|
645
|
+
def ==(r)
|
646
|
+
true
|
647
|
+
end
|
354
648
|
end
|
355
|
-
alias any any_rule
|
356
649
|
|
357
|
-
|
358
|
-
|
650
|
+
class NoneParser < Parser
|
651
|
+
def call(tokens, buff)
|
359
652
|
t = tokens.shift
|
360
653
|
if (t.nil?)
|
361
654
|
Sequence[nil]
|
362
655
|
else
|
363
656
|
nil
|
364
657
|
end
|
365
|
-
|
658
|
+
end
|
659
|
+
|
660
|
+
def to_s()
|
661
|
+
"<none>"
|
662
|
+
end
|
663
|
+
|
664
|
+
def ==(r)
|
665
|
+
true
|
666
|
+
end
|
667
|
+
end
|
668
|
+
|
669
|
+
class ReferenceParser < Parser
|
670
|
+
def __backref__(xs, eqsym)
|
671
|
+
x = xs.shift()
|
672
|
+
xs.inject(token(x, eqsym)){|acc,x|
|
673
|
+
case x
|
674
|
+
when Sequence
|
675
|
+
acc - __backref__(x, eqsym)
|
676
|
+
else
|
677
|
+
acc - token(x, eqsym)
|
678
|
+
end
|
679
|
+
}
|
680
|
+
end
|
681
|
+
|
682
|
+
def same?(r)
|
683
|
+
false
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
687
|
+
class BackrefParser < ReferenceParser
|
688
|
+
attr_reader :reference, :equality
|
689
|
+
|
690
|
+
def initialize(ref, eqsym)
|
691
|
+
@reference = ref
|
692
|
+
@equality = eqsym
|
693
|
+
end
|
694
|
+
|
695
|
+
def call(tokens, buff)
|
696
|
+
ys = buff.map[@reference]
|
697
|
+
if (ys.nil? || ys.empty?)
|
698
|
+
nil
|
699
|
+
else
|
700
|
+
__backref__(ys.dup(), @equality).call(tokens,buff)
|
701
|
+
end
|
702
|
+
end
|
703
|
+
|
704
|
+
def to_s()
|
705
|
+
"<backref:#{@reference}>"
|
706
|
+
end
|
707
|
+
|
708
|
+
def ==(r)
|
709
|
+
super(r) &&
|
710
|
+
(@reference == r.reference) &&
|
711
|
+
(@equality == r.equality)
|
712
|
+
end
|
713
|
+
end
|
714
|
+
|
715
|
+
class StackrefParser < ReferenceParser
|
716
|
+
attr_reader :stack, :equality
|
717
|
+
|
718
|
+
def initialize(stack, eqsym)
|
719
|
+
@stack = stack
|
720
|
+
@equality = eqsym
|
721
|
+
end
|
722
|
+
|
723
|
+
def call(tokens, buff)
|
724
|
+
ys = @stack.pop()
|
725
|
+
if (ys.nil? || ys.empty?)
|
726
|
+
nil
|
727
|
+
else
|
728
|
+
__backref__(ys.dup(), @equality).call(tokens,buff)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
def to_s()
|
733
|
+
"<stackref:#{@stack.object_id}>"
|
734
|
+
end
|
735
|
+
|
736
|
+
def ==(r)
|
737
|
+
super(r) &&
|
738
|
+
(@stack == r.stack) &&
|
739
|
+
(@equality == r.equality)
|
740
|
+
end
|
741
|
+
end
|
742
|
+
|
743
|
+
class StateParser < Parser
|
744
|
+
attr_reader :state
|
745
|
+
|
746
|
+
def initialize(s)
|
747
|
+
@state = s
|
748
|
+
end
|
749
|
+
|
750
|
+
def call(tokens, buff)
|
751
|
+
if (buff.map[:state] == @state)
|
752
|
+
Sequence[@state]
|
753
|
+
else
|
754
|
+
nil
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
def to_s()
|
759
|
+
"<state:#{@state}>"
|
760
|
+
end
|
761
|
+
|
762
|
+
def ==(r)
|
763
|
+
super(r) &&
|
764
|
+
(@state == r.state)
|
765
|
+
end
|
766
|
+
|
767
|
+
def same?(r)
|
768
|
+
false
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
772
|
+
def rule(sym, *opts)
|
773
|
+
NonTerminalParser.new(self, sym, *opts)
|
774
|
+
end
|
775
|
+
|
776
|
+
def token(x, eqsym=:===)
|
777
|
+
TerminalParser.new(x, eqsym)
|
778
|
+
end
|
779
|
+
|
780
|
+
def backref(x, eqsym=:===)
|
781
|
+
BackrefParser.new(x, eqsym)
|
782
|
+
end
|
783
|
+
|
784
|
+
def stackref(stack, eqsym=:===)
|
785
|
+
StackrefParser.new(stack, eqsym)
|
786
|
+
end
|
787
|
+
|
788
|
+
def state(s)
|
789
|
+
StateParser.new(s)
|
790
|
+
end
|
791
|
+
|
792
|
+
def empty_rule(&b)
|
793
|
+
EmptyParser.new(&b)
|
794
|
+
end
|
795
|
+
alias empty empty_rule
|
796
|
+
|
797
|
+
def any_rule()
|
798
|
+
AnyParser.new()
|
799
|
+
end
|
800
|
+
alias any any_rule
|
801
|
+
|
802
|
+
def none_rule()
|
803
|
+
NoneParser.new()
|
366
804
|
end
|
367
805
|
alias none none_rule
|
368
806
|
|
369
807
|
def fail_rule()
|
370
|
-
|
808
|
+
FailParser.new()
|
371
809
|
end
|
372
810
|
alias fail fail_rule
|
373
811
|
|
@@ -424,7 +862,7 @@ module TDParser
|
|
424
862
|
sym = sym.to_s()
|
425
863
|
if (sym[-1,1] == "=")
|
426
864
|
case arg0
|
427
|
-
when
|
865
|
+
when Parser
|
428
866
|
self.class.instance_eval{
|
429
867
|
define_method(sym[0..-2]){ arg0 }
|
430
868
|
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'benchmark'
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
# disable auto optimization
|
6
|
+
module TDParser
|
7
|
+
class Parser
|
8
|
+
def |(r)
|
9
|
+
ChoiceParser.new(self, r)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
parser = TDParser.define{|g|
|
15
|
+
f = Proc.new{|x| x.flatten}
|
16
|
+
g.rule1 =
|
17
|
+
token("1") - token("2") - rule1 - token("a") >> f |
|
18
|
+
token("1") - token("2") - rule1 - token("b") >> f |
|
19
|
+
empty()
|
20
|
+
|
21
|
+
|
22
|
+
g.rule2 =
|
23
|
+
(token("1") - token("2") - rule2 - token("a") >> f |
|
24
|
+
token("1") - token("2") - rule2 - token("b") >> f |
|
25
|
+
empty()).optimize()
|
26
|
+
|
27
|
+
g.rule3 =
|
28
|
+
(token("1") - token("2") - rule3 - (token("a")|token("b")) >> f |
|
29
|
+
empty())
|
30
|
+
}
|
31
|
+
|
32
|
+
puts(parser.rule1.to_s)
|
33
|
+
puts(parser.rule2.to_s)
|
34
|
+
puts(parser.rule3.to_s)
|
35
|
+
|
36
|
+
Benchmark.bm{|x|
|
37
|
+
buff = ["1","2"]
|
38
|
+
b = ["b"]
|
39
|
+
for i in [5,10,15]
|
40
|
+
puts("--")
|
41
|
+
x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
|
42
|
+
x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
|
43
|
+
x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
|
44
|
+
end
|
45
|
+
}
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'tdputils'
|
3
|
+
require 'rexml/parsers/pullparser'
|
4
|
+
require 'rexml/document'
|
5
|
+
|
6
|
+
class Array
|
7
|
+
def ===(ary)
|
8
|
+
if super(ary)
|
9
|
+
return true
|
10
|
+
end
|
11
|
+
if !ary.is_a?(Array)
|
12
|
+
return false
|
13
|
+
end
|
14
|
+
each_with_index{|v,idx|
|
15
|
+
case ary[idx]
|
16
|
+
when v
|
17
|
+
else
|
18
|
+
return false
|
19
|
+
end
|
20
|
+
}
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Hash
|
26
|
+
def ===(h)
|
27
|
+
if super(h)
|
28
|
+
return true
|
29
|
+
end
|
30
|
+
if !h.is_a?(Hash)
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
each{|k,v|
|
34
|
+
case h[k]
|
35
|
+
when v
|
36
|
+
else
|
37
|
+
return false
|
38
|
+
end
|
39
|
+
}
|
40
|
+
true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
module XMLParser
|
45
|
+
def xml_stag(name)
|
46
|
+
token([:start_element, name, Hash])
|
47
|
+
end
|
48
|
+
alias stag xml_stag
|
49
|
+
|
50
|
+
def xml_etag(name)
|
51
|
+
token([:end_element, name])
|
52
|
+
end
|
53
|
+
alias etag xml_etag
|
54
|
+
|
55
|
+
def dom_element(elem, &inner)
|
56
|
+
stag(elem) - (inner.call()|empty()) - etag(elem)
|
57
|
+
end
|
58
|
+
alias element dom_element
|
59
|
+
|
60
|
+
def dom_filter(&act)
|
61
|
+
Proc.new{|x|
|
62
|
+
name = x[0][1]
|
63
|
+
attrs = x[0][2]
|
64
|
+
node = REXML::Element.new()
|
65
|
+
node.name = name
|
66
|
+
node.attributes.merge!(attrs)
|
67
|
+
act[node,x[1]]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
alias filter dom_filter
|
71
|
+
|
72
|
+
def dom_construct(&act)
|
73
|
+
dom_filter{|node,child|
|
74
|
+
if (child.is_a?(Array))
|
75
|
+
child.each{|c| node.add(c) }
|
76
|
+
else
|
77
|
+
node.add(child)
|
78
|
+
end
|
79
|
+
if (act)
|
80
|
+
act[node]
|
81
|
+
else
|
82
|
+
node
|
83
|
+
end
|
84
|
+
}
|
85
|
+
end
|
86
|
+
alias construct dom_construct
|
87
|
+
end
|
88
|
+
|
89
|
+
translator = TDParser.define{|g|
|
90
|
+
extend XMLParser
|
91
|
+
|
92
|
+
g.xml =
|
93
|
+
element("a"){
|
94
|
+
element("b"){
|
95
|
+
g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
|
96
|
+
} >> construct{|node| node.name = "bar"; node }
|
97
|
+
} >> construct{|node| node.name = "foo"; node } |
|
98
|
+
element(String){
|
99
|
+
g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
|
100
|
+
} >> construct{|node|
|
101
|
+
node.name = node.name.upcase()
|
102
|
+
node
|
103
|
+
} |
|
104
|
+
~etag(String) - any() - g.xml >> Proc.new{|x| x[2]}
|
105
|
+
|
106
|
+
def translate(src)
|
107
|
+
xparser = REXML::Parsers::BaseParser.new(src)
|
108
|
+
xml.parse{|g|
|
109
|
+
while(xparser.has_next?)
|
110
|
+
g.yield(xparser.pull())
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
}
|
115
|
+
|
116
|
+
puts(translator.translate(<<EOS))
|
117
|
+
<?xml version="1.0" ?>
|
118
|
+
<list>
|
119
|
+
<a><b><c>hoge</c></b></a>
|
120
|
+
<b>b?</b>
|
121
|
+
</list>
|
122
|
+
EOS
|
123
|
+
# => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
|
data/test/test_tdp.rb
CHANGED
@@ -325,7 +325,7 @@ class TestTDParser < Test::Unit::TestCase
|
|
325
325
|
end
|
326
326
|
|
327
327
|
def test_regex_match()
|
328
|
-
rule = token(/\d+/, :=~)
|
328
|
+
rule = token(/\d+/, :=~) >> proc{|x| x[0].to_i }
|
329
329
|
assert_equal(10, rule.parse(["10"]))
|
330
330
|
end
|
331
331
|
|
@@ -374,14 +374,14 @@ class TestTDParser < Test::Unit::TestCase
|
|
374
374
|
assert_equal(["a","b","a"], rule.parse(buff))
|
375
375
|
end
|
376
376
|
|
377
|
-
def
|
377
|
+
def test_stackref2()
|
378
378
|
buff = ["a","b","c"]
|
379
379
|
stack = []
|
380
380
|
rule = token(/\w/)%stack - token("b") - stackref(stack) >> proc{|x| x}
|
381
381
|
assert_equal(nil, rule.parse(buff))
|
382
382
|
end
|
383
383
|
|
384
|
-
def
|
384
|
+
def test_stackref3()
|
385
385
|
buff = ["a","b","a","b","a","b"]
|
386
386
|
stack = []
|
387
387
|
rule = (token(/\w/) - token(/\w/))%stack - (stackref(stack)%stack)*0 >> proc{|x| x}
|
@@ -393,6 +393,27 @@ class TestTDParser < Test::Unit::TestCase
|
|
393
393
|
assert_equal(["a","b",[["a","b"]]], rule.parse(buff))
|
394
394
|
end
|
395
395
|
|
396
|
+
def test_parallel1()
|
397
|
+
rule = token("a") - (token("b") + token("c")) >> Proc.new{|x| x}
|
398
|
+
assert_equal(["a",[["b"],nil]], rule.parse(["a","b"]))
|
399
|
+
assert_equal(["a",[nil,["c"]]], rule.parse(["a","c"]))
|
400
|
+
end
|
401
|
+
|
402
|
+
def test_parallel2()
|
403
|
+
rule = token("a") - token("b") - (token("c") + token("d")) >> Proc.new{|x| x}
|
404
|
+
assert_equal(["a","b",[["c"],nil]], rule.parse(["a","b","c"]))
|
405
|
+
assert_equal(["a","b",[nil,["d"]]], rule.parse(["a","b","d"]))
|
406
|
+
end
|
407
|
+
|
408
|
+
def test_optimize1()
|
409
|
+
rule =
|
410
|
+
token("a") - token("b") - token("c") >> Proc.new{|x| x} |
|
411
|
+
token("a") - token("b") - token("d") >> Proc.new{|x| x}
|
412
|
+
rule = rule.optimize(false)
|
413
|
+
assert_equal(["a","b","c"], rule.parse(["a","b","c"]))
|
414
|
+
assert_equal(["a","b","d"], rule.parse(["a","b","d"]))
|
415
|
+
end
|
416
|
+
|
396
417
|
def test_chainl1()
|
397
418
|
buff = ["3", "-", "2", "-", "1"]
|
398
419
|
rule = chainl(token(/\d+/) >> Proc.new{|x|x[0].to_i}, token("-")){|x|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: tdp4r
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2006-07-
|
6
|
+
version: 1.4.0
|
7
|
+
date: 2006-07-22 00:00:00 +09:00
|
8
8
|
summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,9 +36,10 @@ files:
|
|
36
36
|
- samples/sample3.rb
|
37
37
|
- samples/sample4.rb
|
38
38
|
- samples/sample5.rb
|
39
|
-
- samples/
|
40
|
-
- samples/sample7.rb
|
39
|
+
- samples/sample_expr.rb
|
41
40
|
- samples/sample_list.rb
|
41
|
+
- samples/sample_optimize.rb
|
42
|
+
- samples/sample_xml.rb
|
42
43
|
- test/test_tdp.rb
|
43
44
|
- doc/faq.txt
|
44
45
|
- doc/guide.txt
|
data/samples/sample7.rb
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'tdp'
|
2
|
-
require 'tdputils'
|
3
|
-
require 'rexml/parsers/pullparser'
|
4
|
-
|
5
|
-
class Array
|
6
|
-
def ===(ary)
|
7
|
-
if super(ary)
|
8
|
-
return true
|
9
|
-
end
|
10
|
-
if !ary.is_a?(Array)
|
11
|
-
return false
|
12
|
-
end
|
13
|
-
each_with_index{|v,idx|
|
14
|
-
case ary[idx]
|
15
|
-
when v
|
16
|
-
else
|
17
|
-
return false
|
18
|
-
end
|
19
|
-
}
|
20
|
-
true
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class Hash
|
25
|
-
def ===(h)
|
26
|
-
if super(h)
|
27
|
-
return true
|
28
|
-
end
|
29
|
-
if !h.is_a?(Hash)
|
30
|
-
return false
|
31
|
-
end
|
32
|
-
each{|k,v|
|
33
|
-
case h[k]
|
34
|
-
when v
|
35
|
-
else
|
36
|
-
return false
|
37
|
-
end
|
38
|
-
}
|
39
|
-
true
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
translator = TDParser.define{|g|
|
44
|
-
g.xml =
|
45
|
-
token([:start_element, "a", Hash]) -
|
46
|
-
token([:start_element, "b", Hash]) -
|
47
|
-
g.xml*0 -
|
48
|
-
token([:end_element, "b"]) -
|
49
|
-
token([:end_element, "a"]) >> Proc.new{|x|
|
50
|
-
"<foo><bar>#{x[2]}</bar></foo>"
|
51
|
-
} |
|
52
|
-
token([:start_element, String, Hash]) -
|
53
|
-
g.xml*0 -
|
54
|
-
token([:end_element, String]) >> Proc.new{|x|
|
55
|
-
stag = x[0][1].upcase()
|
56
|
-
etag = x[2][1].upcase()
|
57
|
-
"<#{stag}>#{x[1]}</#{etag}>"
|
58
|
-
} |
|
59
|
-
~token([:end_element, String]) -
|
60
|
-
any() - g.xml >> Proc.new{|x| x[2]}
|
61
|
-
|
62
|
-
def translate(src)
|
63
|
-
xparser = REXML::Parsers::BaseParser.new(src)
|
64
|
-
xml.parse{|g|
|
65
|
-
while(xparser.has_next?)
|
66
|
-
g.yield(xparser.pull())
|
67
|
-
end
|
68
|
-
}
|
69
|
-
end
|
70
|
-
}
|
71
|
-
|
72
|
-
puts(translator.translate(<<EOS))
|
73
|
-
<?xml version="1.0" ?>
|
74
|
-
<list>
|
75
|
-
<a><b><c>hoge</c></b></a>
|
76
|
-
<b>b?</b>
|
77
|
-
</list>
|
78
|
-
EOS
|
79
|
-
# => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
|
File without changes
|