tdp4r 1.3.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/tdp.rb +600 -162
- data/samples/sample_optimize.rb +45 -0
- data/samples/sample_xml.rb +123 -0
- data/test/test_tdp.rb +24 -3
- metadata +5 -4
- data/samples/sample7.rb +0 -79
- /data/samples/{sample6.rb → sample_expr.rb} +0 -0
data/lib/tdp.rb
CHANGED
@@ -10,8 +10,8 @@ module TDParser
|
|
10
10
|
end
|
11
11
|
|
12
12
|
class TokenGenerator < Generator
|
13
|
-
def initialize(*args)
|
14
|
-
super(*args)
|
13
|
+
def initialize(*args, &block)
|
14
|
+
super(*args, &block)
|
15
15
|
@buffer = []
|
16
16
|
end
|
17
17
|
|
@@ -93,141 +93,80 @@ module TDParser
|
|
93
93
|
end
|
94
94
|
include BufferUtils
|
95
95
|
|
96
|
-
class
|
96
|
+
class Parser
|
97
97
|
include BufferUtils
|
98
|
+
include TDParser
|
99
|
+
|
100
|
+
def to_proc()
|
101
|
+
Proc.new{|*x| self.call(*x) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def to_s()
|
105
|
+
"??"
|
106
|
+
end
|
107
|
+
|
108
|
+
def call(*args)
|
109
|
+
end
|
110
|
+
|
111
|
+
#def [](*args)
|
112
|
+
# call(*args)
|
113
|
+
#end
|
114
|
+
|
115
|
+
def optimize(default=false)
|
116
|
+
self.dup()
|
117
|
+
end
|
118
|
+
|
119
|
+
def ==(r)
|
120
|
+
false
|
121
|
+
end
|
122
|
+
|
123
|
+
def same?(r)
|
124
|
+
self == r
|
125
|
+
end
|
98
126
|
|
99
127
|
def -(r)
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
nil
|
106
|
-
else
|
107
|
-
x + y
|
108
|
-
end
|
109
|
-
end
|
110
|
-
}
|
128
|
+
ConcatParser.new(self,r)
|
129
|
+
end
|
130
|
+
|
131
|
+
def +(r)
|
132
|
+
ParallelParser.new(self,r)
|
111
133
|
end
|
112
134
|
|
113
135
|
def |(r)
|
114
|
-
|
115
|
-
b = prepare(buff)
|
116
|
-
if( (x = self[ts, b]).nil? )
|
117
|
-
recover(b, ts)
|
118
|
-
r[ts, buff]
|
119
|
-
else
|
120
|
-
buff.insert(0, *b)
|
121
|
-
x
|
122
|
-
end
|
123
|
-
}
|
136
|
+
ChoiceParser.new(self,r).optimize(true)
|
124
137
|
end
|
125
138
|
|
126
|
-
def *(
|
127
|
-
if(
|
128
|
-
range = n
|
139
|
+
def *(range)
|
140
|
+
if( range.is_a?(Range) )
|
129
141
|
n = range.min
|
130
142
|
else
|
143
|
+
n = range
|
131
144
|
range = nil
|
132
145
|
end
|
133
|
-
|
134
|
-
x = true
|
135
|
-
xs = []
|
136
|
-
while( n > 0 )
|
137
|
-
n -= 1
|
138
|
-
b = prepare(buff)
|
139
|
-
if( (x = self[ts, b]).nil? )
|
140
|
-
recover(b, ts)
|
141
|
-
break
|
142
|
-
else
|
143
|
-
buff.insert(0, *b)
|
144
|
-
xs.push(x)
|
145
|
-
end
|
146
|
-
end
|
147
|
-
if ( x.nil? )
|
148
|
-
nil
|
149
|
-
else
|
150
|
-
if( range )
|
151
|
-
range.each{
|
152
|
-
while( true )
|
153
|
-
y = x
|
154
|
-
b = prepare(buff)
|
155
|
-
if( (x = self[ts, b]).nil? )
|
156
|
-
recover(b, ts)
|
157
|
-
x = y
|
158
|
-
break
|
159
|
-
else
|
160
|
-
buff.insert(0, *b)
|
161
|
-
xs.push(x)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
}
|
165
|
-
else
|
166
|
-
while( true )
|
167
|
-
y = x
|
168
|
-
b = prepare(buff)
|
169
|
-
if( (x = self[ts, b]).nil? )
|
170
|
-
recover(b, ts)
|
171
|
-
x = y
|
172
|
-
break
|
173
|
-
else
|
174
|
-
buff.insert(0, *b)
|
175
|
-
xs.push(x)
|
176
|
-
end
|
177
|
-
end
|
178
|
-
end
|
179
|
-
Sequence[xs]
|
180
|
-
end
|
181
|
-
}
|
146
|
+
IterationParser.new(self,n,range)
|
182
147
|
end
|
183
148
|
|
184
149
|
def >>(act)
|
185
|
-
|
186
|
-
if( (x = self[tokens, buff]).nil? )
|
187
|
-
nil
|
188
|
-
else
|
189
|
-
x = TokenBuffer[*x]
|
190
|
-
x.map = buff.map
|
191
|
-
Sequence[act[x]]
|
192
|
-
end
|
193
|
-
}
|
150
|
+
ActionParser.new(self,act)
|
194
151
|
end
|
195
152
|
|
196
|
-
def /(
|
197
|
-
|
198
|
-
x = self[tokens, buff]
|
199
|
-
buff.map[symbol] = x
|
200
|
-
x
|
201
|
-
}
|
153
|
+
def /(label)
|
154
|
+
LabelParser.new(self,label)
|
202
155
|
end
|
203
156
|
|
204
157
|
def %(stack)
|
205
|
-
|
206
|
-
x = self[tokens, buff]
|
207
|
-
stack.push(x)
|
208
|
-
x
|
209
|
-
}
|
158
|
+
StackParser.new(self,stack)
|
210
159
|
end
|
211
160
|
|
212
161
|
def >(symbol)
|
213
|
-
|
162
|
+
Parser.new{|tokens, buff|
|
214
163
|
buff[symbol] = buff.dup()
|
215
164
|
self[tokens, buff]
|
216
165
|
}
|
217
166
|
end
|
218
167
|
|
219
168
|
def ~@()
|
220
|
-
|
221
|
-
b = prepare(buff)
|
222
|
-
r = self[tokens,b]
|
223
|
-
rev = b.reverse
|
224
|
-
recover(b, tokens)
|
225
|
-
if( r.nil? )
|
226
|
-
Sequence[Sequence[*rev]]
|
227
|
-
else
|
228
|
-
nil
|
229
|
-
end
|
230
|
-
}
|
169
|
+
NegativeParser.new(self)
|
231
170
|
end
|
232
171
|
|
233
172
|
def parse(tokens=nil, &blk)
|
@@ -242,7 +181,7 @@ module TDParser
|
|
242
181
|
else
|
243
182
|
@tokens = TokenGenerator.new(&blk)
|
244
183
|
end
|
245
|
-
r =
|
184
|
+
r = call(@tokens, TokenBuffer.new())
|
246
185
|
if( r.nil? )
|
247
186
|
nil
|
248
187
|
else
|
@@ -262,112 +201,611 @@ module TDParser
|
|
262
201
|
self >> block
|
263
202
|
end
|
264
203
|
end
|
265
|
-
# end of
|
204
|
+
# end of Parser
|
205
|
+
|
206
|
+
class NonTerminalParser < Parser
|
207
|
+
attr_reader :context, :symbol, :options
|
208
|
+
def initialize(context, sym, *options)
|
209
|
+
@context = context
|
210
|
+
@symbol = sym
|
211
|
+
@options = options
|
212
|
+
end
|
266
213
|
|
267
|
-
|
268
|
-
Rule.new{|tokens, buff|
|
214
|
+
def call(tokens, buff)
|
269
215
|
res = nil
|
270
|
-
case
|
216
|
+
case @symbol
|
271
217
|
when Symbol, String
|
272
|
-
res = __send__(
|
273
|
-
when
|
274
|
-
res =
|
275
|
-
end
|
276
|
-
if( block_given? && !res.nil? )
|
277
|
-
res = yield(res)
|
218
|
+
res = @context.__send__(@symbol,*@options).call(tokens, buff)
|
219
|
+
when Parser
|
220
|
+
res = @symbol.call(tokens, buff)
|
278
221
|
end
|
279
222
|
res
|
280
|
-
|
223
|
+
end
|
224
|
+
|
225
|
+
def ==(r)
|
226
|
+
(self.class == r.class) &&
|
227
|
+
(@context == r.context) &&
|
228
|
+
(@symbol == r.symbol) &&
|
229
|
+
(@options == r.options)
|
230
|
+
end
|
231
|
+
|
232
|
+
def to_s()
|
233
|
+
"#{@symbol.to_s}"
|
234
|
+
end
|
281
235
|
end
|
282
236
|
|
283
|
-
|
284
|
-
|
237
|
+
class TerminalParser < Parser
|
238
|
+
attr_reader :symbol, :equality
|
239
|
+
|
240
|
+
def initialize(obj, eqsym)
|
241
|
+
@symbol = obj
|
242
|
+
@equality = eqsym
|
243
|
+
end
|
244
|
+
|
245
|
+
def call(tokens, buff)
|
285
246
|
t = tokens.shift
|
286
247
|
buff.unshift(t)
|
287
|
-
if(
|
288
|
-
t = yield(t) if( block_given? )
|
248
|
+
if( @symbol.__send__(@equality,t) || t.__send__(@equality,@symbol) )
|
289
249
|
Sequence[t]
|
290
250
|
else
|
291
251
|
nil
|
292
252
|
end
|
293
|
-
|
253
|
+
end
|
254
|
+
|
255
|
+
def ==(r)
|
256
|
+
(self.class == r.class) &&
|
257
|
+
(@symbol == r.symbol) &&
|
258
|
+
(@equality == r.equality)
|
259
|
+
end
|
260
|
+
|
261
|
+
def to_s()
|
262
|
+
"#{@symbol}"
|
263
|
+
end
|
294
264
|
end
|
295
265
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
266
|
+
class CompositeParser < Parser
|
267
|
+
attr_accessor :parsers
|
268
|
+
|
269
|
+
def initialize(*parsers, &b)
|
270
|
+
@parsers = parsers
|
271
|
+
end
|
272
|
+
|
273
|
+
def optimize(default=false)
|
274
|
+
parser = dup()
|
275
|
+
parser.parsers = @parsers.collect{|x| x.optimize(default)}
|
276
|
+
parser
|
277
|
+
end
|
278
|
+
|
279
|
+
def ==(r)
|
280
|
+
(self.class == r.class) &&
|
281
|
+
(@parsers == r.parsers)
|
282
|
+
end
|
283
|
+
|
284
|
+
def same?(r)
|
285
|
+
super(r) &&
|
286
|
+
@parsers.zip(r.parsers).all?{|x,y| x.same?(y)}
|
287
|
+
end
|
288
|
+
|
289
|
+
def to_s()
|
290
|
+
"<composite: #{@parsers.collect{|x| x.to_s()}}>"
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
class ActionParser < CompositeParser
|
295
|
+
attr_reader :action
|
296
|
+
|
297
|
+
def initialize(parser, act)
|
298
|
+
@action = act
|
299
|
+
super(parser)
|
300
|
+
end
|
301
|
+
|
302
|
+
def call(tokens, buff)
|
303
|
+
if( (x = @parsers[0].call(tokens, buff)).nil? )
|
304
|
+
nil
|
302
305
|
else
|
303
|
-
|
306
|
+
x = TokenBuffer[*x]
|
307
|
+
x.map = buff.map
|
308
|
+
Sequence[@action[x]]
|
304
309
|
end
|
305
|
-
|
310
|
+
end
|
311
|
+
|
312
|
+
def ==(r)
|
313
|
+
super(r) &&
|
314
|
+
(@action == r.action)
|
315
|
+
end
|
316
|
+
|
317
|
+
def to_s()
|
318
|
+
"(#{@parsers[0].to_s()} <action>)"
|
319
|
+
end
|
306
320
|
end
|
307
321
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
322
|
+
class LabelParser < CompositeParser
|
323
|
+
attr_reader :label
|
324
|
+
|
325
|
+
def initialize(parser, label)
|
326
|
+
@label = label
|
327
|
+
super(parser)
|
328
|
+
end
|
329
|
+
|
330
|
+
def call(tokens, buff)
|
331
|
+
x = @parsers[0].call(tokens, buff)
|
332
|
+
buff.map[@label] = x
|
333
|
+
x
|
334
|
+
end
|
335
|
+
|
336
|
+
def ==(r)
|
337
|
+
super(r) &&
|
338
|
+
(@label == r.label)
|
339
|
+
end
|
340
|
+
|
341
|
+
def to_s()
|
342
|
+
"(#{@parsers[0].to_s()}/#{@label})"
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
class StackParser < CompositeParser
|
347
|
+
attr_reader :stack
|
348
|
+
def initialize(parser, stack)
|
349
|
+
@stack = stack
|
350
|
+
super(parser)
|
351
|
+
end
|
352
|
+
|
353
|
+
def call(tokens, buff)
|
354
|
+
x = @parsers[0].call(tokens, buff)
|
355
|
+
@stack.push(x)
|
356
|
+
x
|
357
|
+
end
|
358
|
+
|
359
|
+
def ==(r)
|
360
|
+
super(r) &&
|
361
|
+
(@stack == r.stack)
|
362
|
+
end
|
363
|
+
|
364
|
+
def same?(r)
|
365
|
+
false
|
366
|
+
end
|
367
|
+
|
368
|
+
def to_s()
|
369
|
+
"<stack:#{@stack.object_id}>"
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
class ConcatParser < CompositeParser
|
374
|
+
def initialize(r1, r2)
|
375
|
+
super(r1, r2)
|
376
|
+
end
|
377
|
+
|
378
|
+
def call(tokens, buff)
|
379
|
+
if( (x = @parsers[0].call(tokens, buff)).nil? )
|
312
380
|
nil
|
313
381
|
else
|
314
|
-
|
382
|
+
if( (y = @parsers[1].call(tokens, buff)).nil? )
|
383
|
+
nil
|
384
|
+
else
|
385
|
+
x + y
|
386
|
+
end
|
315
387
|
end
|
316
|
-
|
388
|
+
end
|
389
|
+
|
390
|
+
def -(r)
|
391
|
+
@parsers[0] - (@parsers[1] - r)
|
392
|
+
end
|
393
|
+
|
394
|
+
def to_s()
|
395
|
+
"(#{@parsers[0].to_s()} #{@parsers[1].to_s()})"
|
396
|
+
end
|
317
397
|
end
|
318
398
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
399
|
+
class ChoiceParser < CompositeParser
|
400
|
+
def initialize(r1, r2)
|
401
|
+
super(r1, r2)
|
402
|
+
end
|
403
|
+
|
404
|
+
def call(tokens, buff)
|
405
|
+
b = prepare(buff)
|
406
|
+
if( (x = @parsers[0].call(tokens, b)).nil? )
|
407
|
+
recover(b, tokens)
|
408
|
+
@parsers[1].call(tokens, buff)
|
409
|
+
else
|
410
|
+
buff.insert(0, *b)
|
411
|
+
x
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
def to_s()
|
416
|
+
"(#{@parsers[0].to_s()} | #{@parsers[1].to_s()})"
|
417
|
+
end
|
418
|
+
|
419
|
+
def shared_sequence(r1, r2)
|
420
|
+
if (r1.is_a?(ConcatParser) && r2.is_a?(ConcatParser))
|
421
|
+
r11 = r1.parsers[0]
|
422
|
+
r12 = r1.parsers[1]
|
423
|
+
r21 = r2.parsers[0]
|
424
|
+
r22 = r2.parsers[1]
|
425
|
+
if (r11.same?(r21))
|
426
|
+
share,r12,r22, = shared_sequence(r12, r22)
|
427
|
+
if (share)
|
428
|
+
return [r11 - share, r12, r22]
|
429
|
+
else
|
430
|
+
return [r11, r12, r22]
|
431
|
+
end
|
432
|
+
end
|
433
|
+
end
|
434
|
+
[nil, r1, r2]
|
435
|
+
end
|
436
|
+
|
437
|
+
def optimize(default=false)
|
438
|
+
r1 = @parsers[0]
|
439
|
+
r2 = @parsers[1]
|
440
|
+
if (r1.is_a?(ActionParser))
|
441
|
+
act1 = r1.action
|
442
|
+
r1 = r1.parsers[0]
|
443
|
+
end
|
444
|
+
if (r2.is_a?(ActionParser))
|
445
|
+
act2 = r2.action
|
446
|
+
r2 = r2.parsers[0]
|
447
|
+
end
|
448
|
+
share,r12,r22, = shared_sequence(r1, r2)
|
449
|
+
if (share)
|
450
|
+
r = share - (r12 + r22)
|
451
|
+
if (act1)
|
452
|
+
if (act2)
|
453
|
+
r = r >> Proc.new{|x|
|
454
|
+
y0,y1,*ys = x.pop()
|
455
|
+
if (y0)
|
456
|
+
act1.call(x.push(*y0))
|
457
|
+
else
|
458
|
+
act2.call(x.push(*y1))
|
459
|
+
end
|
460
|
+
}
|
461
|
+
else
|
462
|
+
r = r >> Proc.new{|x|
|
463
|
+
y0,y1,*ys = x.pop()
|
464
|
+
if (y0)
|
465
|
+
act1.call(x.push(*y0))
|
466
|
+
end
|
467
|
+
}
|
468
|
+
end
|
469
|
+
else
|
470
|
+
if (act2)
|
471
|
+
r = r >> Proc.new{|x|
|
472
|
+
y0,y1,*ys = x.pop()
|
473
|
+
if (y1)
|
474
|
+
act2.call(x.push(*y1))
|
475
|
+
end
|
476
|
+
}
|
477
|
+
end
|
478
|
+
end
|
479
|
+
return r
|
480
|
+
end
|
481
|
+
if (default)
|
482
|
+
self.dup()
|
483
|
+
else
|
484
|
+
super(default)
|
485
|
+
end
|
486
|
+
end
|
487
|
+
end
|
488
|
+
|
489
|
+
class ParallelParser < CompositeParser
|
490
|
+
def initialize(r1, r2)
|
491
|
+
super(r1, r2)
|
492
|
+
end
|
493
|
+
|
494
|
+
def call(tokens, buff)
|
495
|
+
b = prepare(buff)
|
496
|
+
if( (x = @parsers[0].call(tokens, b)).nil? )
|
497
|
+
recover(b, tokens)
|
498
|
+
Sequence[Sequence[nil, @parsers[1].call(tokens, buff)]]
|
499
|
+
else
|
500
|
+
buff.insert(0, *b)
|
501
|
+
Sequence[Sequence[x, nil]]
|
502
|
+
end
|
503
|
+
end
|
504
|
+
|
505
|
+
def to_s()
|
506
|
+
"(#{@parsers[0].to_s()} + #{@parsers[1].to_s()})"
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
class IterationParser < CompositeParser
|
511
|
+
attr_reader :min, :range
|
512
|
+
def initialize(parser, n, range)
|
513
|
+
@min = n
|
514
|
+
@range = range
|
515
|
+
super(parser)
|
516
|
+
end
|
517
|
+
|
518
|
+
def call(ts, buff)
|
519
|
+
r = @parsers[0]
|
520
|
+
n = @min
|
521
|
+
x = true
|
522
|
+
xs = []
|
523
|
+
while( n > 0 )
|
524
|
+
n -= 1
|
525
|
+
b = prepare(buff)
|
526
|
+
if( (x = r.call(ts, b)).nil? )
|
527
|
+
recover(b, ts)
|
528
|
+
break
|
529
|
+
else
|
530
|
+
buff.insert(0, *b)
|
531
|
+
xs.push(x)
|
532
|
+
end
|
533
|
+
end
|
534
|
+
if ( x.nil? )
|
323
535
|
nil
|
324
536
|
else
|
325
|
-
|
537
|
+
if( range )
|
538
|
+
range.each{
|
539
|
+
while( true )
|
540
|
+
y = x
|
541
|
+
b = prepare(buff)
|
542
|
+
if( (x = r.call(ts, b)).nil? )
|
543
|
+
recover(b, ts)
|
544
|
+
x = y
|
545
|
+
break
|
546
|
+
else
|
547
|
+
buff.insert(0, *b)
|
548
|
+
xs.push(x)
|
549
|
+
end
|
550
|
+
end
|
551
|
+
}
|
552
|
+
else
|
553
|
+
while( true )
|
554
|
+
y = x
|
555
|
+
b = prepare(buff)
|
556
|
+
if( (x = r.call(ts, b)).nil? )
|
557
|
+
recover(b, ts)
|
558
|
+
x = y
|
559
|
+
break
|
560
|
+
else
|
561
|
+
buff.insert(0, *b)
|
562
|
+
xs.push(x)
|
563
|
+
end
|
564
|
+
end
|
565
|
+
end
|
566
|
+
Sequence[xs]
|
326
567
|
end
|
327
|
-
|
568
|
+
end
|
569
|
+
|
570
|
+
def to_s()
|
571
|
+
"(#{@parsers[0].to_s()})*#{@range ? @range.to_s : @min.to_s}"
|
572
|
+
end
|
573
|
+
|
574
|
+
def ==(r)
|
575
|
+
super(r) &&
|
576
|
+
(@min == r.min) &&
|
577
|
+
(@range == r.range)
|
578
|
+
end
|
328
579
|
end
|
329
580
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
581
|
+
class NegativeParser < CompositeParser
|
582
|
+
def initialize(parser)
|
583
|
+
super(parser)
|
584
|
+
end
|
585
|
+
|
586
|
+
def call(tokens, buff)
|
587
|
+
b = prepare(buff)
|
588
|
+
r = @parsers[0].call(tokens,b)
|
589
|
+
rev = b.reverse
|
590
|
+
recover(b, tokens)
|
591
|
+
if( r.nil? )
|
592
|
+
Sequence[Sequence[*rev]]
|
334
593
|
else
|
335
594
|
nil
|
336
595
|
end
|
337
|
-
|
596
|
+
end
|
597
|
+
|
598
|
+
def to_s()
|
599
|
+
"~#{@parsers[0]}"
|
600
|
+
end
|
338
601
|
end
|
339
602
|
|
340
|
-
|
341
|
-
|
603
|
+
class FailParser < Parser
|
604
|
+
def call(tokens, buff)
|
605
|
+
nil
|
606
|
+
end
|
607
|
+
|
608
|
+
def to_s()
|
609
|
+
"<fail>"
|
610
|
+
end
|
611
|
+
|
612
|
+
def ==()
|
613
|
+
(self.class == r.class)
|
614
|
+
end
|
342
615
|
end
|
343
|
-
alias empty empty_rule
|
344
616
|
|
345
|
-
|
346
|
-
|
617
|
+
class EmptyParser < Parser
|
618
|
+
def call(tokens, buff)
|
619
|
+
Sequence[nil]
|
620
|
+
end
|
621
|
+
|
622
|
+
def to_s()
|
623
|
+
"<empty>"
|
624
|
+
end
|
625
|
+
|
626
|
+
def ==(r)
|
627
|
+
true
|
628
|
+
end
|
629
|
+
end
|
630
|
+
|
631
|
+
class AnyParser < Parser
|
632
|
+
def call(tokens, buff)
|
347
633
|
t = tokens.shift
|
348
634
|
if (t.nil?)
|
349
635
|
nil
|
350
636
|
else
|
351
637
|
Sequence[t]
|
352
638
|
end
|
353
|
-
|
639
|
+
end
|
640
|
+
|
641
|
+
def to_s()
|
642
|
+
"<any>"
|
643
|
+
end
|
644
|
+
|
645
|
+
def ==(r)
|
646
|
+
true
|
647
|
+
end
|
354
648
|
end
|
355
|
-
alias any any_rule
|
356
649
|
|
357
|
-
|
358
|
-
|
650
|
+
class NoneParser < Parser
|
651
|
+
def call(tokens, buff)
|
359
652
|
t = tokens.shift
|
360
653
|
if (t.nil?)
|
361
654
|
Sequence[nil]
|
362
655
|
else
|
363
656
|
nil
|
364
657
|
end
|
365
|
-
|
658
|
+
end
|
659
|
+
|
660
|
+
def to_s()
|
661
|
+
"<none>"
|
662
|
+
end
|
663
|
+
|
664
|
+
def ==(r)
|
665
|
+
true
|
666
|
+
end
|
667
|
+
end
|
668
|
+
|
669
|
+
class ReferenceParser < Parser
|
670
|
+
def __backref__(xs, eqsym)
|
671
|
+
x = xs.shift()
|
672
|
+
xs.inject(token(x, eqsym)){|acc,x|
|
673
|
+
case x
|
674
|
+
when Sequence
|
675
|
+
acc - __backref__(x, eqsym)
|
676
|
+
else
|
677
|
+
acc - token(x, eqsym)
|
678
|
+
end
|
679
|
+
}
|
680
|
+
end
|
681
|
+
|
682
|
+
def same?(r)
|
683
|
+
false
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
687
|
+
class BackrefParser < ReferenceParser
|
688
|
+
attr_reader :reference, :equality
|
689
|
+
|
690
|
+
def initialize(ref, eqsym)
|
691
|
+
@reference = ref
|
692
|
+
@equality = eqsym
|
693
|
+
end
|
694
|
+
|
695
|
+
def call(tokens, buff)
|
696
|
+
ys = buff.map[@reference]
|
697
|
+
if (ys.nil? || ys.empty?)
|
698
|
+
nil
|
699
|
+
else
|
700
|
+
__backref__(ys.dup(), @equality).call(tokens,buff)
|
701
|
+
end
|
702
|
+
end
|
703
|
+
|
704
|
+
def to_s()
|
705
|
+
"<backref:#{@reference}>"
|
706
|
+
end
|
707
|
+
|
708
|
+
def ==(r)
|
709
|
+
super(r) &&
|
710
|
+
(@reference == r.reference) &&
|
711
|
+
(@equality == r.equality)
|
712
|
+
end
|
713
|
+
end
|
714
|
+
|
715
|
+
class StackrefParser < ReferenceParser
|
716
|
+
attr_reader :stack, :equality
|
717
|
+
|
718
|
+
def initialize(stack, eqsym)
|
719
|
+
@stack = stack
|
720
|
+
@equality = eqsym
|
721
|
+
end
|
722
|
+
|
723
|
+
def call(tokens, buff)
|
724
|
+
ys = @stack.pop()
|
725
|
+
if (ys.nil? || ys.empty?)
|
726
|
+
nil
|
727
|
+
else
|
728
|
+
__backref__(ys.dup(), @equality).call(tokens,buff)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
|
732
|
+
def to_s()
|
733
|
+
"<stackref:#{@stack.object_id}>"
|
734
|
+
end
|
735
|
+
|
736
|
+
def ==(r)
|
737
|
+
super(r) &&
|
738
|
+
(@stack == r.stack) &&
|
739
|
+
(@equality == r.equality)
|
740
|
+
end
|
741
|
+
end
|
742
|
+
|
743
|
+
class StateParser < Parser
|
744
|
+
attr_reader :state
|
745
|
+
|
746
|
+
def initialize(s)
|
747
|
+
@state = s
|
748
|
+
end
|
749
|
+
|
750
|
+
def call(tokens, buff)
|
751
|
+
if (buff.map[:state] == @state)
|
752
|
+
Sequence[@state]
|
753
|
+
else
|
754
|
+
nil
|
755
|
+
end
|
756
|
+
end
|
757
|
+
|
758
|
+
def to_s()
|
759
|
+
"<state:#{@state}>"
|
760
|
+
end
|
761
|
+
|
762
|
+
def ==(r)
|
763
|
+
super(r) &&
|
764
|
+
(@state == r.state)
|
765
|
+
end
|
766
|
+
|
767
|
+
def same?(r)
|
768
|
+
false
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
772
|
+
def rule(sym, *opts)
|
773
|
+
NonTerminalParser.new(self, sym, *opts)
|
774
|
+
end
|
775
|
+
|
776
|
+
def token(x, eqsym=:===)
|
777
|
+
TerminalParser.new(x, eqsym)
|
778
|
+
end
|
779
|
+
|
780
|
+
def backref(x, eqsym=:===)
|
781
|
+
BackrefParser.new(x, eqsym)
|
782
|
+
end
|
783
|
+
|
784
|
+
def stackref(stack, eqsym=:===)
|
785
|
+
StackrefParser.new(stack, eqsym)
|
786
|
+
end
|
787
|
+
|
788
|
+
def state(s)
|
789
|
+
StateParser.new(s)
|
790
|
+
end
|
791
|
+
|
792
|
+
def empty_rule(&b)
|
793
|
+
EmptyParser.new(&b)
|
794
|
+
end
|
795
|
+
alias empty empty_rule
|
796
|
+
|
797
|
+
def any_rule()
|
798
|
+
AnyParser.new()
|
799
|
+
end
|
800
|
+
alias any any_rule
|
801
|
+
|
802
|
+
def none_rule()
|
803
|
+
NoneParser.new()
|
366
804
|
end
|
367
805
|
alias none none_rule
|
368
806
|
|
369
807
|
def fail_rule()
|
370
|
-
|
808
|
+
FailParser.new()
|
371
809
|
end
|
372
810
|
alias fail fail_rule
|
373
811
|
|
@@ -424,7 +862,7 @@ module TDParser
|
|
424
862
|
sym = sym.to_s()
|
425
863
|
if (sym[-1,1] == "=")
|
426
864
|
case arg0
|
427
|
-
when
|
865
|
+
when Parser
|
428
866
|
self.class.instance_eval{
|
429
867
|
define_method(sym[0..-2]){ arg0 }
|
430
868
|
}
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'benchmark'
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
# disable auto optimization
|
6
|
+
module TDParser
|
7
|
+
class Parser
|
8
|
+
def |(r)
|
9
|
+
ChoiceParser.new(self, r)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
parser = TDParser.define{|g|
|
15
|
+
f = Proc.new{|x| x.flatten}
|
16
|
+
g.rule1 =
|
17
|
+
token("1") - token("2") - rule1 - token("a") >> f |
|
18
|
+
token("1") - token("2") - rule1 - token("b") >> f |
|
19
|
+
empty()
|
20
|
+
|
21
|
+
|
22
|
+
g.rule2 =
|
23
|
+
(token("1") - token("2") - rule2 - token("a") >> f |
|
24
|
+
token("1") - token("2") - rule2 - token("b") >> f |
|
25
|
+
empty()).optimize()
|
26
|
+
|
27
|
+
g.rule3 =
|
28
|
+
(token("1") - token("2") - rule3 - (token("a")|token("b")) >> f |
|
29
|
+
empty())
|
30
|
+
}
|
31
|
+
|
32
|
+
puts(parser.rule1.to_s)
|
33
|
+
puts(parser.rule2.to_s)
|
34
|
+
puts(parser.rule3.to_s)
|
35
|
+
|
36
|
+
Benchmark.bm{|x|
|
37
|
+
buff = ["1","2"]
|
38
|
+
b = ["b"]
|
39
|
+
for i in [5,10,15]
|
40
|
+
puts("--")
|
41
|
+
x.report{ $r1 = parser.rule1.parse(buff*i + b*i) }
|
42
|
+
x.report{ $r2 = parser.rule2.parse(buff*i + b*i) }
|
43
|
+
x.report{ $r3 = parser.rule3.parse(buff*i + b*i) }
|
44
|
+
end
|
45
|
+
}
|
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'tdp'
|
2
|
+
require 'tdputils'
|
3
|
+
require 'rexml/parsers/pullparser'
|
4
|
+
require 'rexml/document'
|
5
|
+
|
6
|
+
class Array
|
7
|
+
def ===(ary)
|
8
|
+
if super(ary)
|
9
|
+
return true
|
10
|
+
end
|
11
|
+
if !ary.is_a?(Array)
|
12
|
+
return false
|
13
|
+
end
|
14
|
+
each_with_index{|v,idx|
|
15
|
+
case ary[idx]
|
16
|
+
when v
|
17
|
+
else
|
18
|
+
return false
|
19
|
+
end
|
20
|
+
}
|
21
|
+
true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Hash
|
26
|
+
def ===(h)
|
27
|
+
if super(h)
|
28
|
+
return true
|
29
|
+
end
|
30
|
+
if !h.is_a?(Hash)
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
each{|k,v|
|
34
|
+
case h[k]
|
35
|
+
when v
|
36
|
+
else
|
37
|
+
return false
|
38
|
+
end
|
39
|
+
}
|
40
|
+
true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
module XMLParser
|
45
|
+
def xml_stag(name)
|
46
|
+
token([:start_element, name, Hash])
|
47
|
+
end
|
48
|
+
alias stag xml_stag
|
49
|
+
|
50
|
+
def xml_etag(name)
|
51
|
+
token([:end_element, name])
|
52
|
+
end
|
53
|
+
alias etag xml_etag
|
54
|
+
|
55
|
+
def dom_element(elem, &inner)
|
56
|
+
stag(elem) - (inner.call()|empty()) - etag(elem)
|
57
|
+
end
|
58
|
+
alias element dom_element
|
59
|
+
|
60
|
+
def dom_filter(&act)
|
61
|
+
Proc.new{|x|
|
62
|
+
name = x[0][1]
|
63
|
+
attrs = x[0][2]
|
64
|
+
node = REXML::Element.new()
|
65
|
+
node.name = name
|
66
|
+
node.attributes.merge!(attrs)
|
67
|
+
act[node,x[1]]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
alias filter dom_filter
|
71
|
+
|
72
|
+
def dom_construct(&act)
|
73
|
+
dom_filter{|node,child|
|
74
|
+
if (child.is_a?(Array))
|
75
|
+
child.each{|c| node.add(c) }
|
76
|
+
else
|
77
|
+
node.add(child)
|
78
|
+
end
|
79
|
+
if (act)
|
80
|
+
act[node]
|
81
|
+
else
|
82
|
+
node
|
83
|
+
end
|
84
|
+
}
|
85
|
+
end
|
86
|
+
alias construct dom_construct
|
87
|
+
end
|
88
|
+
|
89
|
+
translator = TDParser.define{|g|
|
90
|
+
extend XMLParser
|
91
|
+
|
92
|
+
g.xml =
|
93
|
+
element("a"){
|
94
|
+
element("b"){
|
95
|
+
g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
|
96
|
+
} >> construct{|node| node.name = "bar"; node }
|
97
|
+
} >> construct{|node| node.name = "foo"; node } |
|
98
|
+
element(String){
|
99
|
+
g.xml*0 >> Proc.new{|x| x[0].collect{|y| y[0]} }
|
100
|
+
} >> construct{|node|
|
101
|
+
node.name = node.name.upcase()
|
102
|
+
node
|
103
|
+
} |
|
104
|
+
~etag(String) - any() - g.xml >> Proc.new{|x| x[2]}
|
105
|
+
|
106
|
+
def translate(src)
|
107
|
+
xparser = REXML::Parsers::BaseParser.new(src)
|
108
|
+
xml.parse{|g|
|
109
|
+
while(xparser.has_next?)
|
110
|
+
g.yield(xparser.pull())
|
111
|
+
end
|
112
|
+
}
|
113
|
+
end
|
114
|
+
}
|
115
|
+
|
116
|
+
puts(translator.translate(<<EOS))
|
117
|
+
<?xml version="1.0" ?>
|
118
|
+
<list>
|
119
|
+
<a><b><c>hoge</c></b></a>
|
120
|
+
<b>b?</b>
|
121
|
+
</list>
|
122
|
+
EOS
|
123
|
+
# => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
|
data/test/test_tdp.rb
CHANGED
@@ -325,7 +325,7 @@ class TestTDParser < Test::Unit::TestCase
|
|
325
325
|
end
|
326
326
|
|
327
327
|
def test_regex_match()
|
328
|
-
rule = token(/\d+/, :=~)
|
328
|
+
rule = token(/\d+/, :=~) >> proc{|x| x[0].to_i }
|
329
329
|
assert_equal(10, rule.parse(["10"]))
|
330
330
|
end
|
331
331
|
|
@@ -374,14 +374,14 @@ class TestTDParser < Test::Unit::TestCase
|
|
374
374
|
assert_equal(["a","b","a"], rule.parse(buff))
|
375
375
|
end
|
376
376
|
|
377
|
-
def
|
377
|
+
def test_stackref2()
|
378
378
|
buff = ["a","b","c"]
|
379
379
|
stack = []
|
380
380
|
rule = token(/\w/)%stack - token("b") - stackref(stack) >> proc{|x| x}
|
381
381
|
assert_equal(nil, rule.parse(buff))
|
382
382
|
end
|
383
383
|
|
384
|
-
def
|
384
|
+
def test_stackref3()
|
385
385
|
buff = ["a","b","a","b","a","b"]
|
386
386
|
stack = []
|
387
387
|
rule = (token(/\w/) - token(/\w/))%stack - (stackref(stack)%stack)*0 >> proc{|x| x}
|
@@ -393,6 +393,27 @@ class TestTDParser < Test::Unit::TestCase
|
|
393
393
|
assert_equal(["a","b",[["a","b"]]], rule.parse(buff))
|
394
394
|
end
|
395
395
|
|
396
|
+
def test_parallel1()
|
397
|
+
rule = token("a") - (token("b") + token("c")) >> Proc.new{|x| x}
|
398
|
+
assert_equal(["a",[["b"],nil]], rule.parse(["a","b"]))
|
399
|
+
assert_equal(["a",[nil,["c"]]], rule.parse(["a","c"]))
|
400
|
+
end
|
401
|
+
|
402
|
+
def test_parallel2()
|
403
|
+
rule = token("a") - token("b") - (token("c") + token("d")) >> Proc.new{|x| x}
|
404
|
+
assert_equal(["a","b",[["c"],nil]], rule.parse(["a","b","c"]))
|
405
|
+
assert_equal(["a","b",[nil,["d"]]], rule.parse(["a","b","d"]))
|
406
|
+
end
|
407
|
+
|
408
|
+
def test_optimize1()
|
409
|
+
rule =
|
410
|
+
token("a") - token("b") - token("c") >> Proc.new{|x| x} |
|
411
|
+
token("a") - token("b") - token("d") >> Proc.new{|x| x}
|
412
|
+
rule = rule.optimize(false)
|
413
|
+
assert_equal(["a","b","c"], rule.parse(["a","b","c"]))
|
414
|
+
assert_equal(["a","b","d"], rule.parse(["a","b","d"]))
|
415
|
+
end
|
416
|
+
|
396
417
|
def test_chainl1()
|
397
418
|
buff = ["3", "-", "2", "-", "1"]
|
398
419
|
rule = chainl(token(/\d+/) >> Proc.new{|x|x[0].to_i}, token("-")){|x|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: tdp4r
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2006-07-
|
6
|
+
version: 1.4.0
|
7
|
+
date: 2006-07-22 00:00:00 +09:00
|
8
8
|
summary: TDP4R is a top-down parser library that consists of parser combinators and utility functions.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -36,9 +36,10 @@ files:
|
|
36
36
|
- samples/sample3.rb
|
37
37
|
- samples/sample4.rb
|
38
38
|
- samples/sample5.rb
|
39
|
-
- samples/
|
40
|
-
- samples/sample7.rb
|
39
|
+
- samples/sample_expr.rb
|
41
40
|
- samples/sample_list.rb
|
41
|
+
- samples/sample_optimize.rb
|
42
|
+
- samples/sample_xml.rb
|
42
43
|
- test/test_tdp.rb
|
43
44
|
- doc/faq.txt
|
44
45
|
- doc/guide.txt
|
data/samples/sample7.rb
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'tdp'
|
2
|
-
require 'tdputils'
|
3
|
-
require 'rexml/parsers/pullparser'
|
4
|
-
|
5
|
-
class Array
|
6
|
-
def ===(ary)
|
7
|
-
if super(ary)
|
8
|
-
return true
|
9
|
-
end
|
10
|
-
if !ary.is_a?(Array)
|
11
|
-
return false
|
12
|
-
end
|
13
|
-
each_with_index{|v,idx|
|
14
|
-
case ary[idx]
|
15
|
-
when v
|
16
|
-
else
|
17
|
-
return false
|
18
|
-
end
|
19
|
-
}
|
20
|
-
true
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
class Hash
|
25
|
-
def ===(h)
|
26
|
-
if super(h)
|
27
|
-
return true
|
28
|
-
end
|
29
|
-
if !h.is_a?(Hash)
|
30
|
-
return false
|
31
|
-
end
|
32
|
-
each{|k,v|
|
33
|
-
case h[k]
|
34
|
-
when v
|
35
|
-
else
|
36
|
-
return false
|
37
|
-
end
|
38
|
-
}
|
39
|
-
true
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
translator = TDParser.define{|g|
|
44
|
-
g.xml =
|
45
|
-
token([:start_element, "a", Hash]) -
|
46
|
-
token([:start_element, "b", Hash]) -
|
47
|
-
g.xml*0 -
|
48
|
-
token([:end_element, "b"]) -
|
49
|
-
token([:end_element, "a"]) >> Proc.new{|x|
|
50
|
-
"<foo><bar>#{x[2]}</bar></foo>"
|
51
|
-
} |
|
52
|
-
token([:start_element, String, Hash]) -
|
53
|
-
g.xml*0 -
|
54
|
-
token([:end_element, String]) >> Proc.new{|x|
|
55
|
-
stag = x[0][1].upcase()
|
56
|
-
etag = x[2][1].upcase()
|
57
|
-
"<#{stag}>#{x[1]}</#{etag}>"
|
58
|
-
} |
|
59
|
-
~token([:end_element, String]) -
|
60
|
-
any() - g.xml >> Proc.new{|x| x[2]}
|
61
|
-
|
62
|
-
def translate(src)
|
63
|
-
xparser = REXML::Parsers::BaseParser.new(src)
|
64
|
-
xml.parse{|g|
|
65
|
-
while(xparser.has_next?)
|
66
|
-
g.yield(xparser.pull())
|
67
|
-
end
|
68
|
-
}
|
69
|
-
end
|
70
|
-
}
|
71
|
-
|
72
|
-
puts(translator.translate(<<EOS))
|
73
|
-
<?xml version="1.0" ?>
|
74
|
-
<list>
|
75
|
-
<a><b><c>hoge</c></b></a>
|
76
|
-
<b>b?</b>
|
77
|
-
</list>
|
78
|
-
EOS
|
79
|
-
# => "<LIST><foo><bar><C></C></bar></foo><B></B></LIST>"
|
File without changes
|