rubylexer 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +90 -0
- data/Manifest.txt +54 -3
- data/README.txt +4 -7
- data/Rakefile +3 -2
- data/lib/rubylexer.rb +856 -323
- data/lib/rubylexer/0.7.0.rb +11 -2
- data/lib/rubylexer/0.7.1.rb +2 -0
- data/lib/rubylexer/charhandler.rb +4 -4
- data/lib/rubylexer/context.rb +86 -9
- data/lib/rubylexer/rulexer.rb +455 -101
- data/lib/rubylexer/token.rb +166 -43
- data/lib/rubylexer/tokenprinter.rb +16 -8
- data/lib/rubylexer/version.rb +1 -1
- data/rubylexer.vpj +98 -0
- data/test/code/all_the_gems.rb +33 -0
- data/test/code/all_the_raas.rb +226 -0
- data/test/code/all_the_rubies.rb +2 -0
- data/test/code/deletewarns.rb +19 -1
- data/test/code/dumptokens.rb +39 -8
- data/test/code/errscan +2 -0
- data/test/code/isolate_error.rb +72 -0
- data/test/code/lexloop +14 -0
- data/test/code/locatetest.rb +150 -8
- data/test/code/regression.rb +109 -0
- data/test/code/rubylexervsruby.rb +53 -15
- data/test/code/strgen.rb +138 -0
- data/test/code/tarball.rb +144 -0
- data/test/code/testcases.rb +11 -0
- data/test/code/tokentest.rb +115 -24
- data/test/data/__eof2.rb +1 -0
- data/test/data/__eof5.rb +2 -0
- data/test/data/__eof6.rb +2 -0
- data/test/data/cvtesc.rb +17 -0
- data/test/data/g.rb +6 -0
- data/test/data/hd0.rb +3 -0
- data/test/data/hdateof.rb +2 -0
- data/test/data/hdempty.rb +3 -0
- data/test/data/hdr.rb +9 -0
- data/test/data/hdr_dos.rb +13 -0
- data/test/data/hdr_dos2.rb +18 -0
- data/test/data/heart.rb +2 -0
- data/test/data/here_escnl.rb +25 -0
- data/test/data/here_escnl_dos.rb +20 -0
- data/test/data/here_squote.rb +3 -0
- data/test/data/heremonsters.rb +140 -0
- data/test/data/heremonsters.rb.broken +68 -0
- data/test/data/heremonsters.rb.broken.save +68 -0
- data/test/data/heremonsters_dos.rb +140 -0
- data/test/data/heremonsters_dos.rb.broken +68 -0
- data/test/data/illegal_oneliners.rb +1 -0
- data/test/data/illegal_stanzas.rb +0 -0
- data/test/data/make_ws_strdelim.rb +22 -0
- data/test/data/maven2_builer_test.rb +82 -0
- data/test/data/migration.rb +8944 -0
- data/test/data/modl.rb +6 -0
- data/test/data/modl_dos.rb +7 -0
- data/test/data/modl_fails.rb +10 -0
- data/test/data/multilinestring.rb +6 -0
- data/test/data/oneliners.rb +555 -0
- data/test/data/p-op.rb +2 -0
- data/test/data/p.rb +3 -1710
- data/test/data/s.rb +90 -21
- data/test/data/simple.rb +1 -0
- data/test/data/simple_dos.rb +1 -0
- data/test/data/stanzas.rb +1194 -0
- data/test/data/strdelim_crlf.rb +6 -0
- data/test/data/stuff.rb +6 -0
- data/test/data/stuff2.rb +5 -0
- data/test/data/stuff3.rb +6 -0
- data/test/data/stuff4.rb +6 -0
- data/test/data/tkweird.rb +20 -0
- data/test/data/unending_stuff.rb +5 -0
- data/test/data/whatnot.rb +8 -0
- data/test/data/ws_strdelim.rb +0 -0
- data/test/test.sh +239 -0
- data/testing.txt +39 -50
- metadata +110 -12
- data/test/code/dl_all_gems.rb +0 -43
- data/test/code/unpack_all_gems.rb +0 -15
- data/test/data/gemlist.txt +0 -280
data/lib/rubylexer/token.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -25,6 +25,8 @@ class Token
|
|
25
25
|
attr_accessor :ident
|
26
26
|
alias to_s ident
|
27
27
|
attr_accessor :offset #file offset of start of this token
|
28
|
+
attr_accessor :as #if defined, a KeywordToken which this token stands in for.
|
29
|
+
attr_accessor :allow_ooo_offset #hack
|
28
30
|
|
29
31
|
def initialize(ident,offset=nil)
|
30
32
|
@ident=ident
|
@@ -48,8 +50,8 @@ end
|
|
48
50
|
class KeywordToken < WToken #also some operators
|
49
51
|
|
50
52
|
#-----------------------------------
|
51
|
-
def set_callsite! #not needed
|
52
|
-
@callsite=
|
53
|
+
def set_callsite!(x=true) #not needed
|
54
|
+
@callsite=x
|
53
55
|
end
|
54
56
|
|
55
57
|
#-----------------------------------
|
@@ -79,6 +81,17 @@ class KeywordToken < WToken #also some operators
|
|
79
81
|
def has_end?
|
80
82
|
self===RubyLexer::BEGINWORDS and @has_end||=nil
|
81
83
|
end
|
84
|
+
|
85
|
+
attr_accessor :comma_type
|
86
|
+
|
87
|
+
def has_no_block!
|
88
|
+
@has_no_block=true
|
89
|
+
end
|
90
|
+
|
91
|
+
def has_no_block?
|
92
|
+
@has_no_block
|
93
|
+
end
|
94
|
+
|
82
95
|
end
|
83
96
|
|
84
97
|
#-------------------------
|
@@ -109,6 +122,8 @@ class ::Regexp; include TokenPat; end
|
|
109
122
|
|
110
123
|
#-------------------------
|
111
124
|
class VarNameToken < WToken
|
125
|
+
attr_accessor :lvar_type
|
126
|
+
attr_accessor :in_def
|
112
127
|
end
|
113
128
|
|
114
129
|
#-------------------------
|
@@ -118,15 +133,30 @@ end
|
|
118
133
|
|
119
134
|
#-------------------------
|
120
135
|
class SymbolToken < Token
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
136
|
+
attr_accessor :open,:close
|
137
|
+
attr :raw
|
138
|
+
def initialize(ident,offset=nil,starter=':')
|
139
|
+
@raw=ident
|
140
|
+
str=ident.to_s
|
141
|
+
str[0,2]='' if /\A%s/===str
|
142
|
+
super starter+str, offset
|
143
|
+
@open=":"
|
144
|
+
@close=""
|
145
|
+
# @char=':'
|
146
|
+
|
147
|
+
end
|
148
|
+
|
149
|
+
def to_s
|
150
|
+
return @ident
|
151
|
+
raw=@raw.to_s
|
152
|
+
raw=raw[1...-1] if StringToken===@raw
|
153
|
+
@open+raw+@close
|
154
|
+
end
|
125
155
|
end
|
126
156
|
|
127
157
|
#-------------------------
|
128
158
|
class MethNameToken < Token # < SymbolToken
|
129
|
-
def initialize(ident,offset=nil)
|
159
|
+
def initialize(ident,offset=nil,bogus=nil)
|
130
160
|
@ident= (VarNameToken===ident)? ident.ident : ident
|
131
161
|
@offset=offset
|
132
162
|
@has_no_block=false
|
@@ -155,6 +185,7 @@ class NewlineToken < Token
|
|
155
185
|
super(nlstr,offset)
|
156
186
|
#@char=''
|
157
187
|
end
|
188
|
+
def as; ';' end
|
158
189
|
end
|
159
190
|
|
160
191
|
#-------------------------
|
@@ -164,6 +195,10 @@ class StringToken < Token
|
|
164
195
|
attr_accessor :modifiers #for regex only
|
165
196
|
attr_accessor :elems
|
166
197
|
attr_accessor :line #line on which the string ENDS
|
198
|
+
attr_accessor :bs_handler
|
199
|
+
|
200
|
+
attr_accessor :open #exact sequence of chars used to start the str
|
201
|
+
attr_accessor :close #exact seq of (1) char to stop the str
|
167
202
|
|
168
203
|
def with_line(line)
|
169
204
|
@line=line
|
@@ -193,26 +228,39 @@ class StringToken < Token
|
|
193
228
|
PREFIXERS={ '['=>"%w[", '{'=>'%W{' }
|
194
229
|
SUFFIXERS={ '['=>"]", '{'=>'}' }
|
195
230
|
|
196
|
-
def
|
231
|
+
def has_str_inc?
|
232
|
+
elems.size>1 or RubyCode===elems.first
|
233
|
+
end
|
234
|
+
|
235
|
+
def to_s transname=:transform
|
197
236
|
assert @char[/[\[{"`\/]/] #"
|
198
237
|
#on output, all single-quoted strings become double-quoted
|
199
238
|
assert(@elems.length==1) if @char=='['
|
200
239
|
|
201
|
-
result=
|
240
|
+
result=open.dup
|
202
241
|
starter=result[-1,1]
|
203
|
-
ender=
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
242
|
+
ender=close
|
243
|
+
elems.each{|e|
|
244
|
+
case e
|
245
|
+
when String: result<<e
|
246
|
+
# strfrag=translate_escapes strfrag if RubyLexer::FASTER_STRING_ESCAPES
|
247
|
+
# result << send(transname,strfrag,starter,ender)
|
248
|
+
when VarNameToken:
|
249
|
+
if /^[$@]/===e.to_s
|
210
250
|
result << '#' + e.to_s
|
211
|
-
|
251
|
+
else
|
252
|
+
result << "\#{#{e}}"
|
253
|
+
end
|
254
|
+
when RubyCode: result << '#' + e.to_s
|
255
|
+
else fail
|
256
|
+
end
|
212
257
|
}
|
213
258
|
result << ender
|
214
259
|
|
215
|
-
|
260
|
+
if @char=='/'
|
261
|
+
result << modifiers if modifiers #regex only
|
262
|
+
result="%r"+result if RubyLexer::WHSPLF[result[1,1]]
|
263
|
+
end
|
216
264
|
|
217
265
|
return result
|
218
266
|
end
|
@@ -231,18 +279,18 @@ class StringToken < Token
|
|
231
279
|
end
|
232
280
|
|
233
281
|
def append(glob)
|
234
|
-
assert @elems.last.kind_of?(String)
|
282
|
+
#assert @elems.last.kind_of?(String)
|
235
283
|
case glob
|
236
284
|
when String,Integer then append_str! glob
|
237
285
|
when RubyCode then append_code! glob
|
238
286
|
else raise "bad string contents: #{glob}, a #{glob.class}"
|
239
287
|
end
|
240
|
-
assert @elems.last.kind_of?(String)
|
288
|
+
#assert @elems.last.kind_of?(String)
|
241
289
|
end
|
242
290
|
|
243
291
|
def append_token(strtok)
|
244
292
|
assert @elems.last.kind_of?(String)
|
245
|
-
assert strtok.elems.last.kind_of?(String)
|
293
|
+
#assert strtok.elems.last.kind_of?(String)
|
246
294
|
assert strtok.elems.first.kind_of?(String)
|
247
295
|
|
248
296
|
@elems.last << strtok.elems.shift
|
@@ -256,17 +304,60 @@ class StringToken < Token
|
|
256
304
|
assert((!@modifiers or !strtok.modifiers))
|
257
305
|
@modifiers||=strtok.modifiers
|
258
306
|
|
259
|
-
assert @elems.last.kind_of?(String)
|
307
|
+
#assert @elems.last.kind_of?(String)
|
308
|
+
|
309
|
+
@bs_handler ||=strtok.bs_handler
|
260
310
|
|
261
311
|
return self
|
262
312
|
end
|
263
313
|
|
314
|
+
def translate_escapes(str)
|
315
|
+
rl=RubyLexer.new("(string escape translation hack...)",'')
|
316
|
+
result=str.dup
|
317
|
+
seq=result.to_sequence
|
318
|
+
rl.instance_eval{@file=seq}
|
319
|
+
repls=[]
|
320
|
+
i=0
|
321
|
+
#ugly ugly ugly
|
322
|
+
while i<result.size and bs_at=result.index(/\\./m,i)
|
323
|
+
seq.pos=$~.end(0)-1
|
324
|
+
ch=rl.send(bs_handler,"\\",@open[-1,1],@close)
|
325
|
+
result[bs_at...seq.pos]=ch
|
326
|
+
i=bs_at+ch.size
|
327
|
+
end
|
328
|
+
|
329
|
+
return result
|
330
|
+
end
|
331
|
+
|
264
332
|
private
|
333
|
+
UNESC_DELIMS={}
|
334
|
+
|
265
335
|
#simpler transform, preserves original exactly
|
266
336
|
def simple_transform(strfrag,starter,ender)
|
267
|
-
|
268
|
-
#strfrag.gsub!(
|
269
|
-
|
337
|
+
assert('[{/'[@char])
|
338
|
+
#strfrag.gsub!(/(\A|[^\\])(?:\\\\)*\#([{$@])/){$1+'\\#'+$2} unless @char=='['
|
339
|
+
delimchars=Regexp.quote starter+ender
|
340
|
+
delimchars+=Regexp.quote("#") unless @char=='[' #escape beginning of string iterpolations
|
341
|
+
|
342
|
+
#i think most or all of this method is useless now...
|
343
|
+
|
344
|
+
#escape curly brace in string interpolations (%W only)
|
345
|
+
strfrag.gsub!('#{', '#\\{') if @char=='{'
|
346
|
+
|
347
|
+
ckey=starter+ender
|
348
|
+
unesc_delim=
|
349
|
+
UNESC_DELIMS[ckey]||=
|
350
|
+
/(\A|[^\\](?:\\\\)*)([#{delimchars}]+)/
|
351
|
+
# /(\\)([^#{delimchars}#{RubyLexer::WHSPLF}]|\Z)/
|
352
|
+
|
353
|
+
#an even number (esp 0) of backslashes before delim becomes escaped delim
|
354
|
+
strfrag.gsub!(unesc_delim){
|
355
|
+
pre=$1; toesc=$2
|
356
|
+
pre+toesc.gsub(/(.)/){ "\\"+$1 }
|
357
|
+
}
|
358
|
+
|
359
|
+
#no need to double backslashes anymore... they should come pre-doubled
|
360
|
+
|
270
361
|
return strfrag
|
271
362
|
end
|
272
363
|
|
@@ -286,15 +377,21 @@ private
|
|
286
377
|
end
|
287
378
|
|
288
379
|
def append_str!(str)
|
289
|
-
|
290
|
-
|
380
|
+
if @elems.last.kind_of?(String)
|
381
|
+
@elems.last << str
|
382
|
+
else
|
383
|
+
@elems << str
|
384
|
+
end
|
291
385
|
@ident << str
|
292
386
|
assert @elems.last.kind_of?(String)
|
293
387
|
end
|
294
388
|
|
295
389
|
def append_code!(code)
|
296
|
-
|
297
|
-
|
390
|
+
if @elems.last.kind_of?(String)
|
391
|
+
else
|
392
|
+
@elems.push ''
|
393
|
+
end
|
394
|
+
@elems.push code,''
|
298
395
|
@ident << "\#{#{code}}"
|
299
396
|
assert @elems.last.kind_of?(String)
|
300
397
|
end
|
@@ -310,9 +407,10 @@ class HerePlaceholderToken < WToken
|
|
310
407
|
attr_reader :termex, :quote, :ender, :dash
|
311
408
|
attr_accessor :unsafe_to_use, :string
|
312
409
|
attr_accessor :bodyclass
|
410
|
+
attr_accessor :open, :close
|
313
411
|
|
314
|
-
def initialize(dash,quote,ender)
|
315
|
-
@dash,@quote,@ender=dash,quote,ender
|
412
|
+
def initialize(dash,quote,ender,quote_real=true)
|
413
|
+
@dash,@quote,@ender,@quote_real=dash,quote,ender,quote_real
|
316
414
|
@unsafe_to_use=true
|
317
415
|
@string=StringToken.new
|
318
416
|
|
@@ -325,17 +423,17 @@ class HerePlaceholderToken < WToken
|
|
325
423
|
def ===(bogus); false end
|
326
424
|
|
327
425
|
def to_s
|
328
|
-
if @bodyclass==OutlinedHereBodyToken
|
426
|
+
# if @bodyclass==OutlinedHereBodyToken
|
329
427
|
result=if/[^a-z_0-9]/i===@ender
|
330
|
-
|
428
|
+
@ender.gsub(/[\\"]/, '\\\\'+'\\&')
|
331
429
|
else
|
332
430
|
@ender
|
333
431
|
end
|
334
|
-
["<<",@quote,@
|
335
|
-
else
|
336
|
-
assert !unsafe_to_use
|
337
|
-
@string.to_s
|
338
|
-
end
|
432
|
+
return ["<<",@dash,@quote_real&&@quote,result,@quote_real&&@quote].to_s
|
433
|
+
# else
|
434
|
+
# assert !unsafe_to_use
|
435
|
+
# return @string.to_s
|
436
|
+
# end
|
339
437
|
end
|
340
438
|
|
341
439
|
def append s; @string.append s end
|
@@ -374,6 +472,7 @@ class ZwToken < IgnoreToken
|
|
374
472
|
def explicit_form_all; explicit_form end
|
375
473
|
end
|
376
474
|
|
475
|
+
#-------------------------
|
377
476
|
class NoWsToken < ZwToken
|
378
477
|
def explicit_form_all
|
379
478
|
"#nows#"
|
@@ -383,34 +482,41 @@ class NoWsToken < ZwToken
|
|
383
482
|
end
|
384
483
|
end
|
385
484
|
|
485
|
+
#-------------------------
|
386
486
|
class ImplicitParamListStartToken < KeywordToken
|
387
487
|
include StillIgnoreToken
|
388
488
|
def initialize(offset)
|
389
489
|
super("(",offset)
|
390
490
|
end
|
391
491
|
def to_s; '' end
|
492
|
+
def as; "(" end
|
392
493
|
end
|
393
494
|
|
495
|
+
#-------------------------
|
394
496
|
class ImplicitParamListEndToken < KeywordToken
|
395
497
|
include StillIgnoreToken
|
396
498
|
def initialize(offset)
|
397
499
|
super(")",offset)
|
398
500
|
end
|
399
501
|
def to_s; '' end
|
502
|
+
def as; ")" end
|
400
503
|
end
|
401
504
|
|
505
|
+
#-------------------------
|
402
506
|
class AssignmentRhsListStartToken < ZwToken
|
403
507
|
def explicit_form
|
404
508
|
'*['
|
405
509
|
end
|
406
510
|
end
|
407
511
|
|
512
|
+
#-------------------------
|
408
513
|
class AssignmentRhsListEndToken < ZwToken
|
409
514
|
def explicit_form
|
410
515
|
']'
|
411
516
|
end
|
412
517
|
end
|
413
518
|
|
519
|
+
#-------------------------
|
414
520
|
class KwParamListStartToken < ZwToken
|
415
521
|
def explicit_form_all
|
416
522
|
"#((#"
|
@@ -420,6 +526,7 @@ class KwParamListStartToken < ZwToken
|
|
420
526
|
end
|
421
527
|
end
|
422
528
|
|
529
|
+
#-------------------------
|
423
530
|
class KwParamListEndToken < ZwToken
|
424
531
|
def explicit_form_all
|
425
532
|
"#))#"
|
@@ -429,6 +536,11 @@ class KwParamListEndToken < ZwToken
|
|
429
536
|
end
|
430
537
|
end
|
431
538
|
|
539
|
+
#-------------------------
|
540
|
+
class EndDefHeaderToken < ZwToken
|
541
|
+
def as; ";" end
|
542
|
+
end
|
543
|
+
|
432
544
|
#-------------------------
|
433
545
|
class EscNlToken < IgnoreToken
|
434
546
|
def initialize(filename,linenum,ident="\\\n",offset=nil)
|
@@ -440,7 +552,7 @@ class EscNlToken < IgnoreToken
|
|
440
552
|
end
|
441
553
|
|
442
554
|
#-------------------------
|
443
|
-
class EoiToken <
|
555
|
+
class EoiToken < Token
|
444
556
|
attr :file
|
445
557
|
alias :pos :offset
|
446
558
|
|
@@ -453,18 +565,29 @@ end
|
|
453
565
|
#-------------------------
|
454
566
|
class HereBodyToken < IgnoreToken
|
455
567
|
#attr_accessor :ender
|
456
|
-
|
568
|
+
attr_accessor :open,:close
|
569
|
+
def initialize(headtok,linecount)
|
457
570
|
assert HerePlaceholderToken===headtok
|
458
571
|
super(headtok.string,headtok.string.offset)
|
459
572
|
@headtok=headtok
|
573
|
+
@linecount=linecount
|
574
|
+
end
|
575
|
+
|
576
|
+
def line
|
577
|
+
@ident.line
|
578
|
+
end
|
579
|
+
|
580
|
+
def to_s
|
581
|
+
@ident.to_s
|
460
582
|
end
|
461
583
|
|
462
584
|
attr :headtok
|
585
|
+
attr :linecount #num lines here body spans (including terminator)
|
463
586
|
end
|
464
587
|
|
465
588
|
#-------------------------
|
466
589
|
class FileAndLineToken < IgnoreToken
|
467
|
-
|
590
|
+
attr_accessor :line
|
468
591
|
|
469
592
|
def initialize(ident,line,offset=nil)
|
470
593
|
|
@@ -1,6 +1,6 @@
|
|
1
|
-
=begin
|
1
|
+
=begin legal crap
|
2
2
|
rubylexer - a ruby lexer written in ruby
|
3
|
-
Copyright (C) 2004,2005 Caleb Clausen
|
3
|
+
Copyright (C) 2004,2005,2008 Caleb Clausen
|
4
4
|
|
5
5
|
This library is free software; you can redistribute it and/or
|
6
6
|
modify it under the terms of the GNU Lesser General Public
|
@@ -79,6 +79,9 @@ end
|
|
79
79
|
|
80
80
|
return result
|
81
81
|
end end
|
82
|
+
class HereBodyToken; def ws_munge(tp) #experimental
|
83
|
+
nil
|
84
|
+
end end
|
82
85
|
class OutlinedHereBodyToken; def ws_munge(tp)
|
83
86
|
nil
|
84
87
|
end end
|
@@ -121,21 +124,26 @@ class KeepWsTokenPrinter
|
|
121
124
|
|
122
125
|
def aprint(tok)
|
123
126
|
if StringToken===tok or
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
+
HereBodyToken===tok
|
128
|
+
# (HerePlaceholderToken===tok and
|
129
|
+
# tok.bodyclass!=OutlinedHereBodyToken
|
130
|
+
# )
|
127
131
|
str_needs_escnls=(tok.line-@lastfal.line).nonzero?
|
128
|
-
end
|
132
|
+
end if false
|
129
133
|
result=tok.ws_munge(self) and return result
|
130
134
|
|
131
135
|
|
132
136
|
#insert extra ws unless an ambiguous op immediately follows
|
133
137
|
#id or num, in which case ws would change the meaning
|
134
|
-
result=
|
138
|
+
result=tok
|
139
|
+
result=
|
140
|
+
case tok
|
141
|
+
when ZwToken,EoiToken,NoWsToken, HereBodyToken, NewlineToken,
|
142
|
+
ImplicitParamListStartToken,ImplicitParamListEndToken:
|
135
143
|
tok
|
136
144
|
else
|
137
145
|
[@sep.dup,tok]
|
138
|
-
end
|
146
|
+
end unless NoWsToken===lasttok
|
139
147
|
|
140
148
|
if str_needs_escnls
|
141
149
|
result=result.to_s
|