reg 0.4.8 → 0.5.0a0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -0
  2. data/COPYING +0 -0
  3. data/History.txt +14 -0
  4. data/Makefile +59 -0
  5. data/README +87 -40
  6. data/article.txt +838 -0
  7. data/{assert.rb → lib/assert.rb} +3 -3
  8. data/{reg.rb → lib/reg.rb} +11 -4
  9. data/lib/reg/version.rb +21 -0
  10. data/lib/regarray.rb +455 -0
  11. data/{regarrayold.rb → lib/regarrayold.rb} +33 -7
  12. data/lib/regbackref.rb +73 -0
  13. data/lib/regbind.rb +230 -0
  14. data/{regcase.rb → lib/regcase.rb} +15 -5
  15. data/lib/regcompiler.rb +2341 -0
  16. data/{regcore.rb → lib/regcore.rb} +196 -85
  17. data/{regdeferred.rb → lib/regdeferred.rb} +35 -4
  18. data/{regposition.rb → lib/regevent.rb} +36 -38
  19. data/lib/reggraphpoint.rb +28 -0
  20. data/lib/reghash.rb +631 -0
  21. data/lib/reginstrumentation.rb +36 -0
  22. data/{regitem_that.rb → lib/regitem_that.rb} +32 -11
  23. data/{regknows.rb → lib/regknows.rb} +4 -2
  24. data/{reglogic.rb → lib/reglogic.rb} +76 -59
  25. data/{reglookab.rb → lib/reglookab.rb} +31 -21
  26. data/lib/regmatchset.rb +323 -0
  27. data/{regold.rb → lib/regold.rb} +27 -27
  28. data/{regpath.rb → lib/regpath.rb} +91 -1
  29. data/lib/regposition.rb +79 -0
  30. data/lib/regprogress.rb +1522 -0
  31. data/lib/regrepeat.rb +307 -0
  32. data/lib/regreplace.rb +254 -0
  33. data/lib/regslicing.rb +581 -0
  34. data/lib/regsubseq.rb +72 -0
  35. data/lib/regsugar.rb +361 -0
  36. data/lib/regvar.rb +180 -0
  37. data/lib/regxform.rb +212 -0
  38. data/{trace.rb → lib/trace_during.rb} +6 -4
  39. data/lib/warning.rb +37 -0
  40. data/parser.txt +26 -8
  41. data/philosophy.txt +18 -0
  42. data/reg.gemspec +58 -25
  43. data/regguide.txt +18 -0
  44. data/test/andtest.rb +46 -0
  45. data/test/regcompiler_test.rb +346 -0
  46. data/test/regdemo.rb +20 -0
  47. data/{item_thattest.rb → test/regitem_thattest.rb} +2 -2
  48. data/test/regtest.rb +2125 -0
  49. data/test/test_all.rb +32 -0
  50. data/test/test_reg.rb +19 -0
  51. metadata +108 -73
  52. data/calc.reg +0 -73
  53. data/forward_to.rb +0 -49
  54. data/numberset.rb +0 -200
  55. data/regarray.rb +0 -675
  56. data/regbackref.rb +0 -126
  57. data/regbind.rb +0 -74
  58. data/reggrid.csv +1 -2
  59. data/reghash.rb +0 -318
  60. data/regprogress.rb +0 -1054
  61. data/regreplace.rb +0 -114
  62. data/regsugar.rb +0 -230
  63. data/regtest.rb +0 -1078
  64. data/regvar.rb +0 -76
@@ -1,675 +0,0 @@
1
- =begin copyright
2
- reg - the ruby extended grammar
3
- Copyright (C) 2005,2009 Caleb Clausen
4
-
5
- This library is free software; you can redistribute it and/or
6
- modify it under the terms of the GNU Lesser General Public
7
- License as published by the Free Software Foundation; either
8
- version 2.1 of the License, or (at your option) any later version.
9
-
10
- This library is distributed in the hope that it will be useful,
11
- but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
- Lesser General Public License for more details.
14
-
15
- You should have received a copy of the GNU Lesser General Public
16
- License along with this library; if not, write to the Free Software
17
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
- =end
19
-
20
- require "assert"
21
- require "pp"
22
-
23
- module Reg
24
- module Reg
25
- def itemrange; 1..1 end #default match 1 item
26
-
27
-
28
- #create a (vector) Reg that will match this pattern repeatedly.
29
- #(creates a Reg::Repeat.)
30
- #the argument determines the number of times to match.
31
- #times may be a positive integer, zero, INFINITY, or a
32
- #range over any of the above. if a range, the lower
33
- #end may not be INFINITY! Reg#- and Reg#+ are shortcuts
34
- #for the most common cases of multiplting by a range.
35
- #(at least 0 and at most INFINITY.) watch out when
36
- #multiplying with zero and INFINITY (including in a
37
- #range), as you can easily create a situation where
38
- #the number of matches to enumerate explodes exponentionaly,
39
- #or even is infinite. i won't say too much here except
40
- #that these are generally the same sorts of problems you
41
- #can run into with Regexps as well.
42
- def *(times=0..INFINITY)
43
- Repeat.new(self,times)
44
- end
45
-
46
- #repeat this pattern up to atmost times. could match
47
- #0 times as the minimum number of matches here is zero.
48
- def -(atmost=1)
49
- self*(0..atmost)
50
- end
51
-
52
- #repeat this pattern atleast times or more
53
- def +(atleast=1)
54
- self*(atleast..INFINITY)
55
- end
56
-
57
- end
58
-
59
- #--------------------------
60
- module Multiple
61
- def ===(other)
62
- method_missing(:===, other)
63
- end
64
-
65
- def maybe_multiple(needsmult) #better name needed
66
- assert( needsmult.respond_to?( :mmatch))
67
- class <<needsmult
68
- undef_method :mmatch
69
- include Multiple
70
- #alias mmatch mmatch_multiple #this doesn't work... why?
71
- def mmatch(a,s) mmatch_multiple(a,s); end #have to do this instead
72
- end
73
- assert( needsmult.respond_to?( :mmatch))
74
- end
75
-
76
- def maybe_multiples(*args) end
77
- #we're already multiple; no need to try to become multiple again
78
-
79
- def mmatch(arr,idx) #multiple match
80
- abstract
81
- end
82
-
83
- #negated Reg::Multiple's are automatically lookaheads (not implemented yet)
84
- def ~
85
- ~(Lookahead.new self)
86
- end
87
-
88
-
89
- def starts_with
90
- abstract
91
- end
92
-
93
- def ends_with
94
- abstract
95
- end
96
-
97
- def matches_class
98
- raise 'multiple regs match no single class'
99
- end
100
- end
101
-
102
- #--------------------------
103
- module Backtrace
104
- # protected
105
-
106
- def regs(ri) @regs[ri] end
107
-
108
- def update_di(di,len); di+len; end
109
- #--------------------------
110
- $RegTraceEnable=$RegTraceDisable=nil
111
- def trace_enabled?
112
- @trace||=nil
113
- $RegTraceEnable or (!$RegTraceDisable && @trace)
114
- end
115
-
116
- #--------------------------
117
- def trace!
118
- @trace=true
119
- self
120
- end
121
-
122
- #--------------------------
123
- def notrace!
124
- @trace=false
125
- self
126
- end
127
- end
128
-
129
- #--------------------------
130
- if false
131
- class RR < ::Array
132
- def inspect
133
- [self,super].to_s
134
- end
135
-
136
- def rrflatten
137
- result=[]
138
- each{|i|
139
- case i
140
- when RR then result +=i.rrflatten
141
- when Literal then result << i.unlit
142
- else result << i
143
- end
144
- }
145
- end
146
-
147
- def +(other)
148
- RR[*super]
149
- end
150
- end
151
- Result=RR
152
- else
153
- RR=::Array
154
- end
155
-
156
- #--------------------------
157
- class MatchSet
158
-
159
- def next_match(ary,start)
160
- abstract
161
- end
162
-
163
- def deep_copy
164
- abstract
165
- end
166
-
167
- def ob_state
168
- instance_variables.sort.map{|i| instance_variable_get i }
169
- end
170
-
171
- def ==(other)
172
- self.class==other.class and ob_state==other.ob_state
173
- end
174
-
175
- end
176
-
177
- #--------------------------
178
- class SingleRepeatMatchSet < MatchSet
179
- def initialize(startcnt,stepper,endcnt)
180
- endcnt==startcnt and raise 'why even make it a set, then?'
181
- (endcnt-startcnt)*stepper>0 or raise "tried to make null match set"
182
- assert startcnt>=0
183
- assert endcnt>=0
184
- @matchtimes,@stepper,@endcnt=startcnt,stepper,endcnt
185
- end
186
-
187
- def next_match(arr,idx)
188
- assert @stepper == -1 #'only greedy matching implemnted for now'
189
- @endcnt<=@matchtimes or return nil
190
- assert @matchtimes >=0
191
- result=[RR[arr[idx...idx+@matchtimes]], @matchtimes]
192
- assert ::Array===result.first.first
193
- @matchtimes+=@stepper
194
-
195
- assert @matchtimes >=-1
196
-
197
- assert ::Array===result.first.first
198
- return result
199
- end
200
-
201
- def deep_copy
202
- dup
203
- end
204
- end
205
-
206
-
207
- #--------------------------
208
- class Repeat
209
- include Reg,Backtrace,Multiple
210
-
211
- attr :times
212
-
213
- def max_matches; @times.end end
214
-
215
- def regs(ri) @reg end
216
-
217
- def initialize(reg,times)
218
- Integer===times and times=times..times
219
- times.exclude_end? and times=times.begin..times.end-1
220
- assert times.begin <= times.end
221
- assert times.begin < INFINITY
222
- assert times.begin >= 0
223
- assert times.end >= 0
224
- if Multiple===reg
225
- class<<self
226
- #alias mmatch mmatch_multiple #this doesn't work... why?
227
- def mmatch(a,s) mmatch_multiple(a,s); end #have to do this instead
228
- end
229
- else
230
- assert reg.itemrange==(1..1)
231
- @itemrange=times
232
- end
233
- @reg,@times=reg,times
234
- end
235
-
236
- def itemrange
237
- defined? @itemrange and return @itemrange
238
-
239
- i=@reg.itemrange
240
- rf,rl=i.first,i.last
241
- tf,tl=times.first,times.last
242
- @itemrange = rf*tf ..
243
- if tl==0 or rl==0
244
- 0
245
- elsif tl==INFINITY
246
- #ought to emit warnings if trouble here...
247
- #rl==INFINITY and maybe trouble
248
- #rf==0 and trouble
249
- INFINITY
250
- elsif rl==INFINITY
251
- #...and here
252
- #maybe trouble #... combinatorial explosion
253
- INFINITY
254
- else
255
- rl*tl
256
- end
257
- end
258
-
259
-
260
- def enough_matches? matchcnt
261
- @times===matchcnt
262
- end
263
-
264
- def inspect
265
- if @times.end==INFINITY
266
- "(#{@reg.inspect})+#{@times.begin}"
267
- elsif @times.begin==0
268
- "(#{@reg.inspect})-#{@times.end}"
269
- elsif @times.begin==@times.end
270
- "(#{@reg.inspect})*#{@times.begin}"
271
- else
272
- "(#{@reg.inspect})*(#{@times.begin}..#{@times.end})"
273
- end
274
- end
275
-
276
- def subregs; @reg end
277
-
278
- private
279
-
280
- end
281
-
282
-
283
-
284
-
285
- #--------------------------
286
- class OrMatchSet < MatchSet
287
- def initialize(orreg,idx,set,firstmatch)
288
- @orreg,@idx,@set,@firstmatch=orreg,idx,set,firstmatch
289
- assert @firstmatch.nil? || ::Array===@firstmatch.first.first
290
- end
291
-
292
- def ob_state
293
- instance_variables.map{|i| instance_variable_get i }
294
- end
295
-
296
- def ==(other)
297
- OrMatchSet===other and ob_state==other.ob_state
298
- end
299
-
300
- def next_match(ary,idx)
301
- if @firstmatch
302
- result,@firstmatch=@firstmatch,nil
303
- assert ::Array===result
304
- assert ::Array===result.first.first
305
- assert 2==result.size
306
- assert Integer===result.last
307
- return result
308
- end
309
- @set and result= @set.next_match(ary,idx)
310
- while result.nil?
311
- @idx+=1
312
- @idx >= @orreg.regs.size and return nil
313
- x=@orreg.regs[@idx].mmatch(ary,idx)
314
- @set,result=*if MatchSet===x then [x,x.next_match] else [nil,x] end
315
- end
316
- a=RR[nil]*@orreg.regs.size
317
- a[idx]=result[0]
318
- result[0]=a
319
- assert ::Array===result.first.first
320
- return result
321
- end
322
-
323
- def deep_copy
324
- result=OrMatchSet.new(@orreg,@idx,@set && @set.deep_copy,@firstmatch)
325
- assert self==result
326
- return result
327
- end
328
- end
329
-
330
- #--------------------------
331
- class Or
332
- def mmatch(arr,start)
333
- assert start <= arr.size
334
- @regs.each_with_index {|reg,i|
335
- reg===arr[start] and
336
- return OrMatchSet.new(self,i,nil,[arr[start]])
337
- } unless start == arr.size
338
- return nil
339
- end
340
-
341
- def itemrange
342
- if true
343
- min,max=INFINITY,0
344
- @regs.each {|r|
345
- min=r.itemrange.first if min>r.itemrange.first
346
- max=r.itemrange.last if max<r.itemrange.last
347
- }
348
- return min..max
349
- else
350
- limits=@regs.map{|r|
351
-
352
- i=(r.respond_to? :itemrange)? r.itemrange : 1..1
353
- [i.first,i.last]
354
- }.transpose
355
- limits.first.sort.first .. limits.last.sort.last
356
- end
357
- end
358
-
359
- private
360
- def mmatch_multiple(arr,start)
361
- mat=nil
362
- @regs.each_with_index{|r,i|
363
- if r.respond_to? :mmatch
364
- mat=r.mmatch(arr,start) or next
365
- if mat.respond_to? :next_match
366
- return OrMatchSet.new(self,i,mat,mat.next_match(arr,start))
367
- else
368
- return OrMatchSet.new(self,i,nil,mat)
369
- end
370
- else
371
- r===arr[start] and
372
- return OrMatchSet.new(self,i,nil,[[[arr[start]]],1])
373
- end
374
- }
375
-
376
- assert mat.nil?
377
- return nil
378
- end
379
- end
380
-
381
- #--------------------------
382
- class Xor
383
- def clean_result
384
- huh
385
- end
386
-
387
- def itemrange
388
- #min,max=INFINITY,0
389
- #@regs.each {|r|
390
- # min=[min,r.itemrange.first].sort.first
391
- # max=[r.itemrange.last,max].sort.last
392
- #}
393
- #return min..max
394
- limits=@regs.map{|r| i=r.itemrange; [i.first,i.last]}.transpose
395
- limits.first.sort.first .. limits.last.sort.last
396
- end
397
-
398
- private
399
- =begin
400
- def mmatch_multiple(arr,start)
401
- mat=i=nil
402
- count=0
403
- @regs.each_with_index{|reg,idx|
404
- if reg.respond_to? :mmatch
405
- mat=reg.mmatch(arr,start) or next
406
- else
407
- reg===arr[start] or next
408
- mat=[[arr[start]],1]
409
- end
410
- count==0 or return nil
411
- count=1
412
- assert mat
413
- }
414
-
415
- return nil unless mat
416
- assert count==1
417
- mat.respond_to? :next_match and return XorMatchSet.new(reg,idx,mat,huh)
418
-
419
- a=RR[nil]*regs.size
420
- a[idx]=mat[0]
421
- mat[0]=a
422
- assert huh
423
- assert ::Array===mat.first.first
424
- return mat
425
- end
426
- =end
427
-
428
- def mmatch_multiple arr, start
429
- found=nil
430
- @regs.each{|reg|
431
- if m=reg.mmatch(arr, start)
432
- return if found
433
- found=m
434
- end
435
- }
436
- return found
437
- end
438
-
439
- end
440
-
441
- #--------------------------
442
- class And
443
- include Backtrace #shouldn't this be included only when needed?
444
-
445
- def update_di(di,len) di; end
446
-
447
-
448
- def clean_result
449
- huh
450
- end
451
-
452
-
453
- def enough_matches? matchcnt
454
- matchcnt==@regs.size
455
- end
456
-
457
- def itemrange
458
- limits=@regs.map{|r| i=r.itemrange; [i.first,i.last]}.transpose
459
- limits.first.sort.last .. limits.last.sort.last
460
- end
461
-
462
- private
463
- def mmatch_multiple(arr,start)
464
- #in this version, at least one of @regs is a multiple reg
465
- assert( (0..arr.size).include?( start))
466
- result,*bogus=huh.bt_match(arr,start,0,0,[RR[]])
467
- result and AndMatchSet.new(self,result)
468
- end
469
- end
470
-
471
- #--------------------------
472
- class Array
473
- include Reg,Backtrace
474
-
475
- def max_matches; @regs.size end
476
-
477
- def initialize(*regs)
478
- @regs=regs
479
- end
480
-
481
- class <<self
482
- alias new__nobooleans new
483
- def new(*args)
484
- # args.detect{|o| /^(AND|X?OR)$/.sym===o } or return new__nobooleans(*args)
485
- # +[/^(AND|X?OR)$/.sym.splitter].match(args)
486
- Pair===args.first and return OrderedHash.new(*args)
487
- new__nobooleans(*args)
488
- end
489
- alias [] new
490
- end
491
-
492
- def matches_class; ::Array end
493
-
494
- def -@ #subsequence inclusion
495
- Subseq.new(*@regs)
496
- end
497
-
498
- def +@ #cvt to Reg::Array; that what we are already....
499
- self
500
- end
501
-
502
- def maybe_multiples(*args) end #never do anything for Reg::Array
503
-
504
- def enough_matches? matchcnt
505
- matchcnt==@regs.size
506
- end
507
-
508
- def +(reg)
509
- #not right... + should not modify self
510
- if self.class==reg.class
511
- @regs.concat reg.regs
512
- else
513
- super
514
- end
515
- end
516
-
517
- def inspect
518
- "+["+ @regs.collect{|r| r.inspect}.join(', ') +"]"
519
- end
520
-
521
- def subregs; @regs end
522
- end
523
-
524
-
525
-
526
-
527
-
528
-
529
-
530
- #--------------------------
531
- class Subseq < ::Reg::Array
532
- include Multiple
533
-
534
- def max_matches; @regs.size end
535
-
536
- def initialize(*regs)
537
- regs.each{|reg| Multiple===reg and class<<self
538
- undef mmatch
539
- def mmatch(a,s) mmatch_multiple(a,s) end
540
- end}
541
-
542
- @regs=regs
543
- end
544
-
545
-
546
- def inspect
547
- super.sub( /^\+/,'-')
548
- end
549
-
550
- def itemrange
551
- #add the ranges of the individual items
552
- @itemrange ||= #some caching...
553
- @regs.inject(0){|sum,ob| sum+ob.begin } ..
554
- @regs.inject(0){|sum,ob| sum+ob.end }
555
- end
556
-
557
- def -@ #subsequence inclusion... that's what we are, do nothing
558
- self
559
- end
560
-
561
- def +@ #cvt to Reg::Array
562
- Array.new(*@regs)
563
- end
564
-
565
- private
566
-
567
- #tla of +[], regproc{}
568
- assign_TLA true, :Reg=>:Array
569
- assign_TLA :Res=>:Subseq
570
- #no need to alias the constant name 'Reg', too.
571
- #ruby does it for us.
572
- end
573
-
574
-
575
-
576
- #--------------------------
577
- class None; end
578
- class <<None
579
- include Reg
580
- def new; self end
581
-
582
- def *(times)
583
- times===0 ? Many[0] : self
584
- end
585
-
586
- def ~; Any; end
587
-
588
- def &(other); self; end
589
-
590
- def |(other) other end
591
- def ^(other) other end
592
-
593
- def ===(other); false; end
594
- def matches_class; self; end
595
- end
596
-
597
- if defined? $RegAnyEnable #disabled for now -- these optimizations are broken
598
-
599
- #--------------------------
600
- class Any; end
601
- class <<Any #maybe all this can be in Object's meta-class....
602
- include Reg
603
-
604
- #any is a singleton
605
- def new; self end
606
-
607
- def *(times)
608
- Many.new(times)
609
- end
610
-
611
- def ~; None; end
612
-
613
- def &(other); other; end
614
-
615
- def |(other); self; end
616
- def ^(other); ~other end
617
-
618
- def ===(other); true;end
619
- def matches_class; ::Object end
620
- end
621
-
622
- #--------------------------
623
- class Many
624
- include Reg
625
- include Multiple
626
-
627
- class <<self
628
- @@RAMs={}
629
- alias uncached__new new
630
- def new times=0..INFINITY
631
- @@RAMs[times] ||= uncached__new times
632
- end
633
- alias [] new
634
- end
635
-
636
- def initialize(times=0..INFINITY)
637
- Integer===times and times=times..times
638
- @times=times
639
- end
640
-
641
- def mmatch(arr,start)
642
- left=arr.size-start
643
- beg=@times.begin
644
- beg<=left and
645
- SingleRepeatMatchSet.new([left,@times.end].max, -1, beg)
646
- end
647
-
648
- def subregs; Any end
649
-
650
- def inspect; "Any*(#{@times})"; end
651
- end
652
-
653
- #--------------------------
654
- class ::Object
655
- def reg
656
- Any
657
- end
658
- end
659
- OB=Any
660
- OBS=Many[]
661
-
662
- else #traditional and uncomplicated version of OB and OBS
663
- OB=::Object.reg
664
- OBS=OB+0 #std abbreviation for 0 or more of anything
665
- def OBS.inspect
666
- "OBS"
667
- end
668
- def OB.inspect
669
- "OB"
670
- end
671
- end
672
-
673
-
674
-
675
- end