rubylexer 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/lib/rubylexer/token.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -25,6 +25,8 @@ class Token
|
|
25
25
|
attr_accessor :ident
|
26
26
|
alias to_s ident
|
27
27
|
attr_accessor :offset #file offset of start of this token
|
28
|
+
attr_accessor :as #if defined, a KeywordToken which this token stands in for.
|
29
|
+
attr_accessor :allow_ooo_offset #hack
|
28
30
|
|
29
31
|
def initialize(ident,offset=nil)
|
30
32
|
@ident=ident
|
@@ -48,8 +50,8 @@ end
|
|
48
50
|
class KeywordToken < WToken #also some operators
|
49
51
|
|
50
52
|
#-----------------------------------
|
51
|
-
def set_callsite! #not needed
|
52
|
-
@callsite=
|
53
|
+
def set_callsite!(x=true) #not needed
|
54
|
+
@callsite=x
|
53
55
|
end
|
54
56
|
|
55
57
|
#-----------------------------------
|
@@ -79,6 +81,17 @@ class KeywordToken < WToken #also some operators
|
|
79
81
|
def has_end?
|
80
82
|
self===RubyLexer::BEGINWORDS and @has_end||=nil
|
81
83
|
end
|
84
|
+
|
85
|
+
attr_accessor :comma_type
|
86
|
+
|
87
|
+
def has_no_block!
|
88
|
+
@has_no_block=true
|
89
|
+
end
|
90
|
+
|
91
|
+
def has_no_block?
|
92
|
+
@has_no_block
|
93
|
+
end
|
94
|
+
|
82
95
|
end
|
83
96
|
|
84
97
|
#-------------------------
|
@@ -109,6 +122,8 @@ class ::Regexp; include TokenPat; end
|
|
109
122
|
|
110
123
|
#-------------------------
|
111
124
|
class VarNameToken < WToken
|
125
|
+
attr_accessor :lvar_type
|
126
|
+
attr_accessor :in_def
|
112
127
|
end
|
113
128
|
|
114
129
|
#-------------------------
|
@@ -118,15 +133,30 @@ end
|
|
118
133
|
|
119
134
|
#-------------------------
|
120
135
|
class SymbolToken < Token
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
136
|
+
attr_accessor :open,:close
|
137
|
+
attr :raw
|
138
|
+
def initialize(ident,offset=nil,starter=':')
|
139
|
+
@raw=ident
|
140
|
+
str=ident.to_s
|
141
|
+
str[0,2]='' if /\A%s/===str
|
142
|
+
super starter+str, offset
|
143
|
+
@open=":"
|
144
|
+
@close=""
|
145
|
+
# @char=':'
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
def to_s
|
150
|
+
return @ident
|
151
|
+
raw=@raw.to_s
|
152
|
+
raw=raw[1...-1] if StringToken===@raw
|
153
|
+
@open+raw+@close
|
154
|
+
end
|
125
155
|
end
|
126
156
|
|
127
157
|
#-------------------------
|
128
158
|
class MethNameToken < Token # < SymbolToken
|
129
|
-
def initialize(ident,offset=nil)
|
159
|
+
def initialize(ident,offset=nil,bogus=nil)
|
130
160
|
@ident= (VarNameToken===ident)? ident.ident : ident
|
131
161
|
@offset=offset
|
132
162
|
@has_no_block=false
|
@@ -155,6 +185,7 @@ class NewlineToken < Token
|
|
155
185
|
super(nlstr,offset)
|
156
186
|
#@char=''
|
157
187
|
end
|
188
|
+
def as; ';' end
|
158
189
|
end
|
159
190
|
|
160
191
|
#-------------------------
|
@@ -164,6 +195,10 @@ class StringToken < Token
|
|
164
195
|
attr_accessor :modifiers #for regex only
|
165
196
|
attr_accessor :elems
|
166
197
|
attr_accessor :line #line on which the string ENDS
|
198
|
+
attr_accessor :bs_handler
|
199
|
+
|
200
|
+
attr_accessor :open #exact sequence of chars used to start the str
|
201
|
+
attr_accessor :close #exact seq of (1) char to stop the str
|
167
202
|
|
168
203
|
def with_line(line)
|
169
204
|
@line=line
|
@@ -193,26 +228,39 @@ class StringToken < Token
|
|
193
228
|
PREFIXERS={ '['=>"%w[", '{'=>'%W{' }
|
194
229
|
SUFFIXERS={ '['=>"]", '{'=>'}' }
|
195
230
|
|
196
|
-
def
|
231
|
+
def has_str_inc?
|
232
|
+
elems.size>1 or RubyCode===elems.first
|
233
|
+
end
|
234
|
+
|
235
|
+
def to_s transname=:transform
|
197
236
|
assert @char[/[\[{"`\/]/] #"
|
198
237
|
#on output, all single-quoted strings become double-quoted
|
199
238
|
assert(@elems.length==1) if @char=='['
|
200
239
|
|
201
|
-
result=
|
240
|
+
result=open.dup
|
202
241
|
starter=result[-1,1]
|
203
|
-
ender=
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
242
|
+
ender=close
|
243
|
+
elems.each{|e|
|
244
|
+
case e
|
245
|
+
when String: result<<e
|
246
|
+
# strfrag=translate_escapes strfrag if RubyLexer::FASTER_STRING_ESCAPES
|
247
|
+
# result << send(transname,strfrag,starter,ender)
|
248
|
+
when VarNameToken:
|
249
|
+
if /^[$@]/===e.to_s
|
210
250
|
result << '#' + e.to_s
|
211
|
-
|
251
|
+
else
|
252
|
+
result << "\#{#{e}}"
|
253
|
+
end
|
254
|
+
when RubyCode: result << '#' + e.to_s
|
255
|
+
else fail
|
256
|
+
end
|
212
257
|
}
|
213
258
|
result << ender
|
214
259
|
|
215
|
-
|
260
|
+
if @char=='/'
|
261
|
+
result << modifiers if modifiers #regex only
|
262
|
+
result="%r"+result if RubyLexer::WHSPLF[result[1,1]]
|
263
|
+
end
|
216
264
|
|
217
265
|
return result
|
218
266
|
end
|
@@ -231,18 +279,18 @@ class StringToken < Token
|
|
231
279
|
end
|
232
280
|
|
233
281
|
def append(glob)
|
234
|
-
assert @elems.last.kind_of?(String)
|
282
|
+
#assert @elems.last.kind_of?(String)
|
235
283
|
case glob
|
236
284
|
when String,Integer then append_str! glob
|
237
285
|
when RubyCode then append_code! glob
|
238
286
|
else raise "bad string contents: #{glob}, a #{glob.class}"
|
239
287
|
end
|
240
|
-
assert @elems.last.kind_of?(String)
|
288
|
+
#assert @elems.last.kind_of?(String)
|
241
289
|
end
|
242
290
|
|
243
291
|
def append_token(strtok)
|
244
292
|
assert @elems.last.kind_of?(String)
|
245
|
-
assert strtok.elems.last.kind_of?(String)
|
293
|
+
#assert strtok.elems.last.kind_of?(String)
|
246
294
|
assert strtok.elems.first.kind_of?(String)
|
247
295
|
|
248
296
|
@elems.last << strtok.elems.shift
|
@@ -256,17 +304,60 @@ class StringToken < Token
|
|
256
304
|
assert((!@modifiers or !strtok.modifiers))
|
257
305
|
@modifiers||=strtok.modifiers
|
258
306
|
|
259
|
-
assert @elems.last.kind_of?(String)
|
307
|
+
#assert @elems.last.kind_of?(String)
|
308
|
+
|
309
|
+
@bs_handler ||=strtok.bs_handler
|
260
310
|
|
261
311
|
return self
|
262
312
|
end
|
263
313
|
|
314
|
+
def translate_escapes(str)
|
315
|
+
rl=RubyLexer.new("(string escape translation hack...)",'')
|
316
|
+
result=str.dup
|
317
|
+
seq=result.to_sequence
|
318
|
+
rl.instance_eval{@file=seq}
|
319
|
+
repls=[]
|
320
|
+
i=0
|
321
|
+
#ugly ugly ugly
|
322
|
+
while i<result.size and bs_at=result.index(/\\./m,i)
|
323
|
+
seq.pos=$~.end(0)-1
|
324
|
+
ch=rl.send(bs_handler,"\\",@open[-1,1],@close)
|
325
|
+
result[bs_at...seq.pos]=ch
|
326
|
+
i=bs_at+ch.size
|
327
|
+
end
|
328
|
+
|
329
|
+
return result
|
330
|
+
end
|
331
|
+
|
264
332
|
private
|
333
|
+
UNESC_DELIMS={}
|
334
|
+
|
265
335
|
#simpler transform, preserves original exactly
|
266
336
|
def simple_transform(strfrag,starter,ender)
|
267
|
-
|
268
|
-
#strfrag.gsub!(
|
269
|
-
|
337
|
+
assert('[{/'[@char])
|
338
|
+
#strfrag.gsub!(/(\A|[^\\])(?:\\\\)*\#([{$@])/){$1+'\\#'+$2} unless @char=='['
|
339
|
+
delimchars=Regexp.quote starter+ender
|
340
|
+
delimchars+=Regexp.quote("#") unless @char=='[' #escape beginning of string iterpolations
|
341
|
+
|
342
|
+
#i think most or all of this method is useless now...
|
343
|
+
|
344
|
+
#escape curly brace in string interpolations (%W only)
|
345
|
+
strfrag.gsub!('#{', '#\\{') if @char=='{'
|
346
|
+
|
347
|
+
ckey=starter+ender
|
348
|
+
unesc_delim=
|
349
|
+
UNESC_DELIMS[ckey]||=
|
350
|
+
/(\A|[^\\](?:\\\\)*)([#{delimchars}]+)/
|
351
|
+
# /(\\)([^#{delimchars}#{RubyLexer::WHSPLF}]|\Z)/
|
352
|
+
|
353
|
+
#an even number (esp 0) of backslashes before delim becomes escaped delim
|
354
|
+
strfrag.gsub!(unesc_delim){
|
355
|
+
pre=$1; toesc=$2
|
356
|
+
pre+toesc.gsub(/(.)/){ "\\"+$1 }
|
357
|
+
}
|
358
|
+
|
359
|
+
#no need to double backslashes anymore... they should come pre-doubled
|
360
|
+
|
270
361
|
return strfrag
|
271
362
|
end
|
272
363
|
|
@@ -286,15 +377,21 @@ private
|
|
286
377
|
end
|
287
378
|
|
288
379
|
def append_str!(str)
|
289
|
-
|
290
|
-
|
380
|
+
if @elems.last.kind_of?(String)
|
381
|
+
@elems.last << str
|
382
|
+
else
|
383
|
+
@elems << str
|
384
|
+
end
|
291
385
|
@ident << str
|
292
386
|
assert @elems.last.kind_of?(String)
|
293
387
|
end
|
294
388
|
|
295
389
|
def append_code!(code)
|
296
|
-
|
297
|
-
|
390
|
+
if @elems.last.kind_of?(String)
|
391
|
+
else
|
392
|
+
@elems.push ''
|
393
|
+
end
|
394
|
+
@elems.push code,''
|
298
395
|
@ident << "\#{#{code}}"
|
299
396
|
assert @elems.last.kind_of?(String)
|
300
397
|
end
|
@@ -310,9 +407,10 @@ class HerePlaceholderToken < WToken
|
|
310
407
|
attr_reader :termex, :quote, :ender, :dash
|
311
408
|
attr_accessor :unsafe_to_use, :string
|
312
409
|
attr_accessor :bodyclass
|
410
|
+
attr_accessor :open, :close
|
313
411
|
|
314
|
-
def initialize(dash,quote,ender)
|
315
|
-
@dash,@quote,@ender=dash,quote,ender
|
412
|
+
def initialize(dash,quote,ender,quote_real=true)
|
413
|
+
@dash,@quote,@ender,@quote_real=dash,quote,ender,quote_real
|
316
414
|
@unsafe_to_use=true
|
317
415
|
@string=StringToken.new
|
318
416
|
|
@@ -325,17 +423,17 @@ class HerePlaceholderToken < WToken
|
|
325
423
|
def ===(bogus); false end
|
326
424
|
|
327
425
|
def to_s
|
328
|
-
if @bodyclass==OutlinedHereBodyToken
|
426
|
+
# if @bodyclass==OutlinedHereBodyToken
|
329
427
|
result=if/[^a-z_0-9]/i===@ender
|
330
|
-
|
428
|
+
@ender.gsub(/[\\"]/, '\\\\'+'\\&')
|
331
429
|
else
|
332
430
|
@ender
|
333
431
|
end
|
334
|
-
["<<",@quote,@
|
335
|
-
else
|
336
|
-
assert !unsafe_to_use
|
337
|
-
@string.to_s
|
338
|
-
end
|
432
|
+
return ["<<",@dash,@quote_real&&@quote,result,@quote_real&&@quote].to_s
|
433
|
+
# else
|
434
|
+
# assert !unsafe_to_use
|
435
|
+
# return @string.to_s
|
436
|
+
# end
|
339
437
|
end
|
340
438
|
|
341
439
|
def append s; @string.append s end
|
@@ -374,6 +472,7 @@ class ZwToken < IgnoreToken
|
|
374
472
|
def explicit_form_all; explicit_form end
|
375
473
|
end
|
376
474
|
|
475
|
+
#-------------------------
|
377
476
|
class NoWsToken < ZwToken
|
378
477
|
def explicit_form_all
|
379
478
|
"#nows#"
|
@@ -383,34 +482,41 @@ class NoWsToken < ZwToken
|
|
383
482
|
end
|
384
483
|
end
|
385
484
|
|
485
|
+
#-------------------------
|
386
486
|
class ImplicitParamListStartToken < KeywordToken
|
387
487
|
include StillIgnoreToken
|
388
488
|
def initialize(offset)
|
389
489
|
super("(",offset)
|
390
490
|
end
|
391
491
|
def to_s; '' end
|
492
|
+
def as; "(" end
|
392
493
|
end
|
393
494
|
|
495
|
+
#-------------------------
|
394
496
|
class ImplicitParamListEndToken < KeywordToken
|
395
497
|
include StillIgnoreToken
|
396
498
|
def initialize(offset)
|
397
499
|
super(")",offset)
|
398
500
|
end
|
399
501
|
def to_s; '' end
|
502
|
+
def as; ")" end
|
400
503
|
end
|
401
504
|
|
505
|
+
#-------------------------
|
402
506
|
class AssignmentRhsListStartToken < ZwToken
|
403
507
|
def explicit_form
|
404
508
|
'*['
|
405
509
|
end
|
406
510
|
end
|
407
511
|
|
512
|
+
#-------------------------
|
408
513
|
class AssignmentRhsListEndToken < ZwToken
|
409
514
|
def explicit_form
|
410
515
|
']'
|
411
516
|
end
|
412
517
|
end
|
413
518
|
|
519
|
+
#-------------------------
|
414
520
|
class KwParamListStartToken < ZwToken
|
415
521
|
def explicit_form_all
|
416
522
|
"#((#"
|
@@ -420,6 +526,7 @@ class KwParamListStartToken < ZwToken
|
|
420
526
|
end
|
421
527
|
end
|
422
528
|
|
529
|
+
#-------------------------
|
423
530
|
class KwParamListEndToken < ZwToken
|
424
531
|
def explicit_form_all
|
425
532
|
"#))#"
|
@@ -429,6 +536,11 @@ class KwParamListEndToken < ZwToken
|
|
429
536
|
end
|
430
537
|
end
|
431
538
|
|
539
|
+
#-------------------------
|
540
|
+
class EndDefHeaderToken < ZwToken
|
541
|
+
def as; ";" end
|
542
|
+
end
|
543
|
+
|
432
544
|
#-------------------------
|
433
545
|
class EscNlToken < IgnoreToken
|
434
546
|
def initialize(filename,linenum,ident="\\\n",offset=nil)
|
@@ -440,7 +552,7 @@ class EscNlToken < IgnoreToken
|
|
440
552
|
end
|
441
553
|
|
442
554
|
#-------------------------
|
443
|
-
class EoiToken <
|
555
|
+
class EoiToken < Token
|
444
556
|
attr :file
|
445
557
|
alias :pos :offset
|
446
558
|
|
@@ -453,18 +565,29 @@ end
|
|
453
565
|
#-------------------------
|
454
566
|
class HereBodyToken < IgnoreToken
|
455
567
|
#attr_accessor :ender
|
456
|
-
|
568
|
+
attr_accessor :open,:close
|
569
|
+
def initialize(headtok,linecount)
|
457
570
|
assert HerePlaceholderToken===headtok
|
458
571
|
super(headtok.string,headtok.string.offset)
|
459
572
|
@headtok=headtok
|
573
|
+
@linecount=linecount
|
574
|
+
end
|
575
|
+
|
576
|
+
def line
|
577
|
+
@ident.line
|
578
|
+
end
|
579
|
+
|
580
|
+
def to_s
|
581
|
+
@ident.to_s
|
460
582
|
end
|
461
583
|
|
462
584
|
attr :headtok
|
585
|
+
attr :linecount #num lines here body spans (including terminator)
|
463
586
|
end
|
464
587
|
|
465
588
|
#-------------------------
|
466
589
|
class FileAndLineToken < IgnoreToken
|
467
|
-
|
590
|
+
attr_accessor :line
|
468
591
|
|
469
592
|
def initialize(ident,line,offset=nil)
|
470
593
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -79,6 +79,9 @@ end
|
|
79
79
|
|
80
80
|
return result
|
81
81
|
end end
|
82
|
+
class HereBodyToken; def ws_munge(tp) #experimental
|
83
|
+
nil
|
84
|
+
end end
|
82
85
|
class OutlinedHereBodyToken; def ws_munge(tp)
|
83
86
|
nil
|
84
87
|
end end
|
@@ -121,21 +124,26 @@ class KeepWsTokenPrinter
|
|
121
124
|
|
122
125
|
def aprint(tok)
|
123
126
|
if StringToken===tok or
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
+
HereBodyToken===tok
|
128
|
+
# (HerePlaceholderToken===tok and
|
129
|
+
# tok.bodyclass!=OutlinedHereBodyToken
|
130
|
+
# )
|
127
131
|
str_needs_escnls=(tok.line-@lastfal.line).nonzero?
|
128
|
-
end
|
132
|
+
end if false
|
129
133
|
result=tok.ws_munge(self) and return result
|
130
134
|
|
131
135
|
|
132
136
|
#insert extra ws unless an ambiguous op immediately follows
|
133
137
|
#id or num, in which case ws would change the meaning
|
134
|
-
result=
|
138
|
+
result=tok
|
139
|
+
result=
|
140
|
+
case tok
|
141
|
+
when ZwToken,EoiToken,NoWsToken, HereBodyToken, NewlineToken,
|
142
|
+
ImplicitParamListStartToken,ImplicitParamListEndToken:
|
135
143
|
tok
|
136
144
|
else
|
137
145
|
[@sep.dup,tok]
|
138
|
-
end
|
146
|
+
end unless NoWsToken===lasttok
|
139
147
|
|
140
148
|
if str_needs_escnls
|
141
149
|
result=result.to_s
|