rubylexer 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -25,6 +25,8 @@ class Token
25
25
  attr_accessor :ident
26
26
  alias to_s ident
27
27
  attr_accessor :offset #file offset of start of this token
28
+ attr_accessor :as #if defined, a KeywordToken which this token stands in for.
29
+ attr_accessor :allow_ooo_offset #hack
28
30
 
29
31
  def initialize(ident,offset=nil)
30
32
  @ident=ident
@@ -48,8 +50,8 @@ end
48
50
  class KeywordToken < WToken #also some operators
49
51
 
50
52
  #-----------------------------------
51
- def set_callsite! #not needed
52
- @callsite=true
53
+ def set_callsite!(x=true) #not needed
54
+ @callsite=x
53
55
  end
54
56
 
55
57
  #-----------------------------------
@@ -79,6 +81,17 @@ class KeywordToken < WToken #also some operators
79
81
  def has_end?
80
82
  self===RubyLexer::BEGINWORDS and @has_end||=nil
81
83
  end
84
+
85
+ attr_accessor :comma_type
86
+
87
+ def has_no_block!
88
+ @has_no_block=true
89
+ end
90
+
91
+ def has_no_block?
92
+ @has_no_block
93
+ end
94
+
82
95
  end
83
96
 
84
97
  #-------------------------
@@ -109,6 +122,8 @@ class ::Regexp; include TokenPat; end
109
122
 
110
123
  #-------------------------
111
124
  class VarNameToken < WToken
125
+ attr_accessor :lvar_type
126
+ attr_accessor :in_def
112
127
  end
113
128
 
114
129
  #-------------------------
@@ -118,15 +133,30 @@ end
118
133
 
119
134
  #-------------------------
120
135
  class SymbolToken < Token
121
- def initialize(ident,offset=nil)
122
- super ":#{ident}", offset
123
- # @char=':'
124
- end
136
+ attr_accessor :open,:close
137
+ attr :raw
138
+ def initialize(ident,offset=nil,starter=':')
139
+ @raw=ident
140
+ str=ident.to_s
141
+ str[0,2]='' if /\A%s/===str
142
+ super starter+str, offset
143
+ @open=":"
144
+ @close=""
145
+ # @char=':'
146
+
147
+ end
148
+
149
+ def to_s
150
+ return @ident
151
+ raw=@raw.to_s
152
+ raw=raw[1...-1] if StringToken===@raw
153
+ @open+raw+@close
154
+ end
125
155
  end
126
156
 
127
157
  #-------------------------
128
158
  class MethNameToken < Token # < SymbolToken
129
- def initialize(ident,offset=nil)
159
+ def initialize(ident,offset=nil,bogus=nil)
130
160
  @ident= (VarNameToken===ident)? ident.ident : ident
131
161
  @offset=offset
132
162
  @has_no_block=false
@@ -155,6 +185,7 @@ class NewlineToken < Token
155
185
  super(nlstr,offset)
156
186
  #@char=''
157
187
  end
188
+ def as; ';' end
158
189
  end
159
190
 
160
191
  #-------------------------
@@ -164,6 +195,10 @@ class StringToken < Token
164
195
  attr_accessor :modifiers #for regex only
165
196
  attr_accessor :elems
166
197
  attr_accessor :line #line on which the string ENDS
198
+ attr_accessor :bs_handler
199
+
200
+ attr_accessor :open #exact sequence of chars used to start the str
201
+ attr_accessor :close #exact seq of (1) char to stop the str
167
202
 
168
203
  def with_line(line)
169
204
  @line=line
@@ -193,26 +228,39 @@ class StringToken < Token
193
228
  PREFIXERS={ '['=>"%w[", '{'=>'%W{' }
194
229
  SUFFIXERS={ '['=>"]", '{'=>'}' }
195
230
 
196
- def to_s(transname=:transform)
231
+ def has_str_inc?
232
+ elems.size>1 or RubyCode===elems.first
233
+ end
234
+
235
+ def to_s transname=:transform
197
236
  assert @char[/[\[{"`\/]/] #"
198
237
  #on output, all single-quoted strings become double-quoted
199
238
  assert(@elems.length==1) if @char=='['
200
239
 
201
- result=(PREFIXERS[@char] or @char).dup
240
+ result=open.dup
202
241
  starter=result[-1,1]
203
- ender=(SUFFIXERS[@char] or @char).dup
204
- 0.step(@elems.length-1,2) { |i|
205
- strfrag=@elems[i].dup
206
- result << send(transname,strfrag,starter,ender)
207
-
208
- if e=@elems[i+1]
209
- assert(e.kind_of?(RubyCode))
242
+ ender=close
243
+ elems.each{|e|
244
+ case e
245
+ when String: result<<e
246
+ # strfrag=translate_escapes strfrag if RubyLexer::FASTER_STRING_ESCAPES
247
+ # result << send(transname,strfrag,starter,ender)
248
+ when VarNameToken:
249
+ if /^[$@]/===e.to_s
210
250
  result << '#' + e.to_s
211
- end
251
+ else
252
+ result << "\#{#{e}}"
253
+ end
254
+ when RubyCode: result << '#' + e.to_s
255
+ else fail
256
+ end
212
257
  }
213
258
  result << ender
214
259
 
215
- modifiers and result << modifiers #regex only
260
+ if @char=='/'
261
+ result << modifiers if modifiers #regex only
262
+ result="%r"+result if RubyLexer::WHSPLF[result[1,1]]
263
+ end
216
264
 
217
265
  return result
218
266
  end
@@ -231,18 +279,18 @@ class StringToken < Token
231
279
  end
232
280
 
233
281
  def append(glob)
234
- assert @elems.last.kind_of?(String)
282
+ #assert @elems.last.kind_of?(String)
235
283
  case glob
236
284
  when String,Integer then append_str! glob
237
285
  when RubyCode then append_code! glob
238
286
  else raise "bad string contents: #{glob}, a #{glob.class}"
239
287
  end
240
- assert @elems.last.kind_of?(String)
288
+ #assert @elems.last.kind_of?(String)
241
289
  end
242
290
 
243
291
  def append_token(strtok)
244
292
  assert @elems.last.kind_of?(String)
245
- assert strtok.elems.last.kind_of?(String)
293
+ #assert strtok.elems.last.kind_of?(String)
246
294
  assert strtok.elems.first.kind_of?(String)
247
295
 
248
296
  @elems.last << strtok.elems.shift
@@ -256,17 +304,60 @@ class StringToken < Token
256
304
  assert((!@modifiers or !strtok.modifiers))
257
305
  @modifiers||=strtok.modifiers
258
306
 
259
- assert @elems.last.kind_of?(String)
307
+ #assert @elems.last.kind_of?(String)
308
+
309
+ @bs_handler ||=strtok.bs_handler
260
310
 
261
311
  return self
262
312
  end
263
313
 
314
+ def translate_escapes(str)
315
+ rl=RubyLexer.new("(string escape translation hack...)",'')
316
+ result=str.dup
317
+ seq=result.to_sequence
318
+ rl.instance_eval{@file=seq}
319
+ repls=[]
320
+ i=0
321
+ #ugly ugly ugly
322
+ while i<result.size and bs_at=result.index(/\\./m,i)
323
+ seq.pos=$~.end(0)-1
324
+ ch=rl.send(bs_handler,"\\",@open[-1,1],@close)
325
+ result[bs_at...seq.pos]=ch
326
+ i=bs_at+ch.size
327
+ end
328
+
329
+ return result
330
+ end
331
+
264
332
  private
333
+ UNESC_DELIMS={}
334
+
265
335
  #simpler transform, preserves original exactly
266
336
  def simple_transform(strfrag,starter,ender)
267
- #assert('[{/'[@char])
268
- #strfrag.gsub!(/#([{$@])/,'\\#\\1') unless @char=='['
269
- strfrag.gsub!(Regexp.new("[\\"+starter+"\\"+ender+"]"), '\\\\\&')
337
+ assert('[{/'[@char])
338
+ #strfrag.gsub!(/(\A|[^\\])(?:\\\\)*\#([{$@])/){$1+'\\#'+$2} unless @char=='['
339
+ delimchars=Regexp.quote starter+ender
340
+ delimchars+=Regexp.quote("#") unless @char=='[' #escape beginning of string iterpolations
341
+
342
+ #i think most or all of this method is useless now...
343
+
344
+ #escape curly brace in string interpolations (%W only)
345
+ strfrag.gsub!('#{', '#\\{') if @char=='{'
346
+
347
+ ckey=starter+ender
348
+ unesc_delim=
349
+ UNESC_DELIMS[ckey]||=
350
+ /(\A|[^\\](?:\\\\)*)([#{delimchars}]+)/
351
+ # /(\\)([^#{delimchars}#{RubyLexer::WHSPLF}]|\Z)/
352
+
353
+ #an even number (esp 0) of backslashes before delim becomes escaped delim
354
+ strfrag.gsub!(unesc_delim){
355
+ pre=$1; toesc=$2
356
+ pre+toesc.gsub(/(.)/){ "\\"+$1 }
357
+ }
358
+
359
+ #no need to double backslashes anymore... they should come pre-doubled
360
+
270
361
  return strfrag
271
362
  end
272
363
 
@@ -286,15 +377,21 @@ private
286
377
  end
287
378
 
288
379
  def append_str!(str)
289
- assert @elems.last.kind_of?(String)
290
- @elems.last << str
380
+ if @elems.last.kind_of?(String)
381
+ @elems.last << str
382
+ else
383
+ @elems << str
384
+ end
291
385
  @ident << str
292
386
  assert @elems.last.kind_of?(String)
293
387
  end
294
388
 
295
389
  def append_code!(code)
296
- assert @elems.last.kind_of?(String)
297
- @elems.concat [code, '']
390
+ if @elems.last.kind_of?(String)
391
+ else
392
+ @elems.push ''
393
+ end
394
+ @elems.push code,''
298
395
  @ident << "\#{#{code}}"
299
396
  assert @elems.last.kind_of?(String)
300
397
  end
@@ -310,9 +407,10 @@ class HerePlaceholderToken < WToken
310
407
  attr_reader :termex, :quote, :ender, :dash
311
408
  attr_accessor :unsafe_to_use, :string
312
409
  attr_accessor :bodyclass
410
+ attr_accessor :open, :close
313
411
 
314
- def initialize(dash,quote,ender)
315
- @dash,@quote,@ender=dash,quote,ender
412
+ def initialize(dash,quote,ender,quote_real=true)
413
+ @dash,@quote,@ender,@quote_real=dash,quote,ender,quote_real
316
414
  @unsafe_to_use=true
317
415
  @string=StringToken.new
318
416
 
@@ -325,17 +423,17 @@ class HerePlaceholderToken < WToken
325
423
  def ===(bogus); false end
326
424
 
327
425
  def to_s
328
- if @bodyclass==OutlinedHereBodyToken
426
+ # if @bodyclass==OutlinedHereBodyToken
329
427
  result=if/[^a-z_0-9]/i===@ender
330
- %["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
428
+ @ender.gsub(/[\\"]/, '\\\\'+'\\&')
331
429
  else
332
430
  @ender
333
431
  end
334
- ["<<",@quote,@ender,@quote].to_s
335
- else
336
- assert !unsafe_to_use
337
- @string.to_s
338
- end
432
+ return ["<<",@dash,@quote_real&&@quote,result,@quote_real&&@quote].to_s
433
+ # else
434
+ # assert !unsafe_to_use
435
+ # return @string.to_s
436
+ # end
339
437
  end
340
438
 
341
439
  def append s; @string.append s end
@@ -374,6 +472,7 @@ class ZwToken < IgnoreToken
374
472
  def explicit_form_all; explicit_form end
375
473
  end
376
474
 
475
+ #-------------------------
377
476
  class NoWsToken < ZwToken
378
477
  def explicit_form_all
379
478
  "#nows#"
@@ -383,34 +482,41 @@ class NoWsToken < ZwToken
383
482
  end
384
483
  end
385
484
 
485
+ #-------------------------
386
486
  class ImplicitParamListStartToken < KeywordToken
387
487
  include StillIgnoreToken
388
488
  def initialize(offset)
389
489
  super("(",offset)
390
490
  end
391
491
  def to_s; '' end
492
+ def as; "(" end
392
493
  end
393
494
 
495
+ #-------------------------
394
496
  class ImplicitParamListEndToken < KeywordToken
395
497
  include StillIgnoreToken
396
498
  def initialize(offset)
397
499
  super(")",offset)
398
500
  end
399
501
  def to_s; '' end
502
+ def as; ")" end
400
503
  end
401
504
 
505
+ #-------------------------
402
506
  class AssignmentRhsListStartToken < ZwToken
403
507
  def explicit_form
404
508
  '*['
405
509
  end
406
510
  end
407
511
 
512
+ #-------------------------
408
513
  class AssignmentRhsListEndToken < ZwToken
409
514
  def explicit_form
410
515
  ']'
411
516
  end
412
517
  end
413
518
 
519
+ #-------------------------
414
520
  class KwParamListStartToken < ZwToken
415
521
  def explicit_form_all
416
522
  "#((#"
@@ -420,6 +526,7 @@ class KwParamListStartToken < ZwToken
420
526
  end
421
527
  end
422
528
 
529
+ #-------------------------
423
530
  class KwParamListEndToken < ZwToken
424
531
  def explicit_form_all
425
532
  "#))#"
@@ -429,6 +536,11 @@ class KwParamListEndToken < ZwToken
429
536
  end
430
537
  end
431
538
 
539
+ #-------------------------
540
+ class EndDefHeaderToken < ZwToken
541
+ def as; ";" end
542
+ end
543
+
432
544
  #-------------------------
433
545
  class EscNlToken < IgnoreToken
434
546
  def initialize(filename,linenum,ident="\\\n",offset=nil)
@@ -440,7 +552,7 @@ class EscNlToken < IgnoreToken
440
552
  end
441
553
 
442
554
  #-------------------------
443
- class EoiToken < IgnoreToken
555
+ class EoiToken < Token
444
556
  attr :file
445
557
  alias :pos :offset
446
558
 
@@ -453,18 +565,29 @@ end
453
565
  #-------------------------
454
566
  class HereBodyToken < IgnoreToken
455
567
  #attr_accessor :ender
456
- def initialize(headtok)
568
+ attr_accessor :open,:close
569
+ def initialize(headtok,linecount)
457
570
  assert HerePlaceholderToken===headtok
458
571
  super(headtok.string,headtok.string.offset)
459
572
  @headtok=headtok
573
+ @linecount=linecount
574
+ end
575
+
576
+ def line
577
+ @ident.line
578
+ end
579
+
580
+ def to_s
581
+ @ident.to_s
460
582
  end
461
583
 
462
584
  attr :headtok
585
+ attr :linecount #num lines here body spans (including terminator)
463
586
  end
464
587
 
465
588
  #-------------------------
466
589
  class FileAndLineToken < IgnoreToken
467
- attr :line
590
+ attr_accessor :line
468
591
 
469
592
  def initialize(ident,line,offset=nil)
470
593
 
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -79,6 +79,9 @@ end
79
79
 
80
80
  return result
81
81
  end end
82
+ class HereBodyToken; def ws_munge(tp) #experimental
83
+ nil
84
+ end end
82
85
  class OutlinedHereBodyToken; def ws_munge(tp)
83
86
  nil
84
87
  end end
@@ -121,21 +124,26 @@ class KeepWsTokenPrinter
121
124
 
122
125
  def aprint(tok)
123
126
  if StringToken===tok or
124
- (HerePlaceholderToken===tok and
125
- tok.bodyclass!=OutlinedHereBodyToken
126
- )
127
+ HereBodyToken===tok
128
+ # (HerePlaceholderToken===tok and
129
+ # tok.bodyclass!=OutlinedHereBodyToken
130
+ # )
127
131
  str_needs_escnls=(tok.line-@lastfal.line).nonzero?
128
- end
132
+ end if false
129
133
  result=tok.ws_munge(self) and return result
130
134
 
131
135
 
132
136
  #insert extra ws unless an ambiguous op immediately follows
133
137
  #id or num, in which case ws would change the meaning
134
- result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
138
+ result=tok
139
+ result=
140
+ case tok
141
+ when ZwToken,EoiToken,NoWsToken, HereBodyToken, NewlineToken,
142
+ ImplicitParamListStartToken,ImplicitParamListEndToken:
135
143
  tok
136
144
  else
137
145
  [@sep.dup,tok]
138
- end
146
+ end unless NoWsToken===lasttok
139
147
 
140
148
  if str_needs_escnls
141
149
  result=result.to_s