rubylexer 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. data/History.txt +90 -0
  2. data/Manifest.txt +54 -3
  3. data/README.txt +4 -7
  4. data/Rakefile +3 -2
  5. data/lib/rubylexer.rb +856 -323
  6. data/lib/rubylexer/0.7.0.rb +11 -2
  7. data/lib/rubylexer/0.7.1.rb +2 -0
  8. data/lib/rubylexer/charhandler.rb +4 -4
  9. data/lib/rubylexer/context.rb +86 -9
  10. data/lib/rubylexer/rulexer.rb +455 -101
  11. data/lib/rubylexer/token.rb +166 -43
  12. data/lib/rubylexer/tokenprinter.rb +16 -8
  13. data/lib/rubylexer/version.rb +1 -1
  14. data/rubylexer.vpj +98 -0
  15. data/test/code/all_the_gems.rb +33 -0
  16. data/test/code/all_the_raas.rb +226 -0
  17. data/test/code/all_the_rubies.rb +2 -0
  18. data/test/code/deletewarns.rb +19 -1
  19. data/test/code/dumptokens.rb +39 -8
  20. data/test/code/errscan +2 -0
  21. data/test/code/isolate_error.rb +72 -0
  22. data/test/code/lexloop +14 -0
  23. data/test/code/locatetest.rb +150 -8
  24. data/test/code/regression.rb +109 -0
  25. data/test/code/rubylexervsruby.rb +53 -15
  26. data/test/code/strgen.rb +138 -0
  27. data/test/code/tarball.rb +144 -0
  28. data/test/code/testcases.rb +11 -0
  29. data/test/code/tokentest.rb +115 -24
  30. data/test/data/__eof2.rb +1 -0
  31. data/test/data/__eof5.rb +2 -0
  32. data/test/data/__eof6.rb +2 -0
  33. data/test/data/cvtesc.rb +17 -0
  34. data/test/data/g.rb +6 -0
  35. data/test/data/hd0.rb +3 -0
  36. data/test/data/hdateof.rb +2 -0
  37. data/test/data/hdempty.rb +3 -0
  38. data/test/data/hdr.rb +9 -0
  39. data/test/data/hdr_dos.rb +13 -0
  40. data/test/data/hdr_dos2.rb +18 -0
  41. data/test/data/heart.rb +2 -0
  42. data/test/data/here_escnl.rb +25 -0
  43. data/test/data/here_escnl_dos.rb +20 -0
  44. data/test/data/here_squote.rb +3 -0
  45. data/test/data/heremonsters.rb +140 -0
  46. data/test/data/heremonsters.rb.broken +68 -0
  47. data/test/data/heremonsters.rb.broken.save +68 -0
  48. data/test/data/heremonsters_dos.rb +140 -0
  49. data/test/data/heremonsters_dos.rb.broken +68 -0
  50. data/test/data/illegal_oneliners.rb +1 -0
  51. data/test/data/illegal_stanzas.rb +0 -0
  52. data/test/data/make_ws_strdelim.rb +22 -0
  53. data/test/data/maven2_builer_test.rb +82 -0
  54. data/test/data/migration.rb +8944 -0
  55. data/test/data/modl.rb +6 -0
  56. data/test/data/modl_dos.rb +7 -0
  57. data/test/data/modl_fails.rb +10 -0
  58. data/test/data/multilinestring.rb +6 -0
  59. data/test/data/oneliners.rb +555 -0
  60. data/test/data/p-op.rb +2 -0
  61. data/test/data/p.rb +3 -1710
  62. data/test/data/s.rb +90 -21
  63. data/test/data/simple.rb +1 -0
  64. data/test/data/simple_dos.rb +1 -0
  65. data/test/data/stanzas.rb +1194 -0
  66. data/test/data/strdelim_crlf.rb +6 -0
  67. data/test/data/stuff.rb +6 -0
  68. data/test/data/stuff2.rb +5 -0
  69. data/test/data/stuff3.rb +6 -0
  70. data/test/data/stuff4.rb +6 -0
  71. data/test/data/tkweird.rb +20 -0
  72. data/test/data/unending_stuff.rb +5 -0
  73. data/test/data/whatnot.rb +8 -0
  74. data/test/data/ws_strdelim.rb +0 -0
  75. data/test/test.sh +239 -0
  76. data/testing.txt +39 -50
  77. metadata +110 -12
  78. data/test/code/dl_all_gems.rb +0 -43
  79. data/test/code/unpack_all_gems.rb +0 -15
  80. data/test/data/gemlist.txt +0 -280
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -25,6 +25,8 @@ class Token
25
25
  attr_accessor :ident
26
26
  alias to_s ident
27
27
  attr_accessor :offset #file offset of start of this token
28
+ attr_accessor :as #if defined, a KeywordToken which this token stands in for.
29
+ attr_accessor :allow_ooo_offset #hack
28
30
 
29
31
  def initialize(ident,offset=nil)
30
32
  @ident=ident
@@ -48,8 +50,8 @@ end
48
50
  class KeywordToken < WToken #also some operators
49
51
 
50
52
  #-----------------------------------
51
- def set_callsite! #not needed
52
- @callsite=true
53
+ def set_callsite!(x=true) #not needed
54
+ @callsite=x
53
55
  end
54
56
 
55
57
  #-----------------------------------
@@ -79,6 +81,17 @@ class KeywordToken < WToken #also some operators
79
81
  def has_end?
80
82
  self===RubyLexer::BEGINWORDS and @has_end||=nil
81
83
  end
84
+
85
+ attr_accessor :comma_type
86
+
87
+ def has_no_block!
88
+ @has_no_block=true
89
+ end
90
+
91
+ def has_no_block?
92
+ @has_no_block
93
+ end
94
+
82
95
  end
83
96
 
84
97
  #-------------------------
@@ -109,6 +122,8 @@ class ::Regexp; include TokenPat; end
109
122
 
110
123
  #-------------------------
111
124
  class VarNameToken < WToken
125
+ attr_accessor :lvar_type
126
+ attr_accessor :in_def
112
127
  end
113
128
 
114
129
  #-------------------------
@@ -118,15 +133,30 @@ end
118
133
 
119
134
  #-------------------------
120
135
  class SymbolToken < Token
121
- def initialize(ident,offset=nil)
122
- super ":#{ident}", offset
123
- # @char=':'
124
- end
136
+ attr_accessor :open,:close
137
+ attr :raw
138
+ def initialize(ident,offset=nil,starter=':')
139
+ @raw=ident
140
+ str=ident.to_s
141
+ str[0,2]='' if /\A%s/===str
142
+ super starter+str, offset
143
+ @open=":"
144
+ @close=""
145
+ # @char=':'
146
+
147
+ end
148
+
149
+ def to_s
150
+ return @ident
151
+ raw=@raw.to_s
152
+ raw=raw[1...-1] if StringToken===@raw
153
+ @open+raw+@close
154
+ end
125
155
  end
126
156
 
127
157
  #-------------------------
128
158
  class MethNameToken < Token # < SymbolToken
129
- def initialize(ident,offset=nil)
159
+ def initialize(ident,offset=nil,bogus=nil)
130
160
  @ident= (VarNameToken===ident)? ident.ident : ident
131
161
  @offset=offset
132
162
  @has_no_block=false
@@ -155,6 +185,7 @@ class NewlineToken < Token
155
185
  super(nlstr,offset)
156
186
  #@char=''
157
187
  end
188
+ def as; ';' end
158
189
  end
159
190
 
160
191
  #-------------------------
@@ -164,6 +195,10 @@ class StringToken < Token
164
195
  attr_accessor :modifiers #for regex only
165
196
  attr_accessor :elems
166
197
  attr_accessor :line #line on which the string ENDS
198
+ attr_accessor :bs_handler
199
+
200
+ attr_accessor :open #exact sequence of chars used to start the str
201
+ attr_accessor :close #exact seq of (1) char to stop the str
167
202
 
168
203
  def with_line(line)
169
204
  @line=line
@@ -193,26 +228,39 @@ class StringToken < Token
193
228
  PREFIXERS={ '['=>"%w[", '{'=>'%W{' }
194
229
  SUFFIXERS={ '['=>"]", '{'=>'}' }
195
230
 
196
- def to_s(transname=:transform)
231
+ def has_str_inc?
232
+ elems.size>1 or RubyCode===elems.first
233
+ end
234
+
235
+ def to_s transname=:transform
197
236
  assert @char[/[\[{"`\/]/] #"
198
237
  #on output, all single-quoted strings become double-quoted
199
238
  assert(@elems.length==1) if @char=='['
200
239
 
201
- result=(PREFIXERS[@char] or @char).dup
240
+ result=open.dup
202
241
  starter=result[-1,1]
203
- ender=(SUFFIXERS[@char] or @char).dup
204
- 0.step(@elems.length-1,2) { |i|
205
- strfrag=@elems[i].dup
206
- result << send(transname,strfrag,starter,ender)
207
-
208
- if e=@elems[i+1]
209
- assert(e.kind_of?(RubyCode))
242
+ ender=close
243
+ elems.each{|e|
244
+ case e
245
+ when String: result<<e
246
+ # strfrag=translate_escapes strfrag if RubyLexer::FASTER_STRING_ESCAPES
247
+ # result << send(transname,strfrag,starter,ender)
248
+ when VarNameToken:
249
+ if /^[$@]/===e.to_s
210
250
  result << '#' + e.to_s
211
- end
251
+ else
252
+ result << "\#{#{e}}"
253
+ end
254
+ when RubyCode: result << '#' + e.to_s
255
+ else fail
256
+ end
212
257
  }
213
258
  result << ender
214
259
 
215
- modifiers and result << modifiers #regex only
260
+ if @char=='/'
261
+ result << modifiers if modifiers #regex only
262
+ result="%r"+result if RubyLexer::WHSPLF[result[1,1]]
263
+ end
216
264
 
217
265
  return result
218
266
  end
@@ -231,18 +279,18 @@ class StringToken < Token
231
279
  end
232
280
 
233
281
  def append(glob)
234
- assert @elems.last.kind_of?(String)
282
+ #assert @elems.last.kind_of?(String)
235
283
  case glob
236
284
  when String,Integer then append_str! glob
237
285
  when RubyCode then append_code! glob
238
286
  else raise "bad string contents: #{glob}, a #{glob.class}"
239
287
  end
240
- assert @elems.last.kind_of?(String)
288
+ #assert @elems.last.kind_of?(String)
241
289
  end
242
290
 
243
291
  def append_token(strtok)
244
292
  assert @elems.last.kind_of?(String)
245
- assert strtok.elems.last.kind_of?(String)
293
+ #assert strtok.elems.last.kind_of?(String)
246
294
  assert strtok.elems.first.kind_of?(String)
247
295
 
248
296
  @elems.last << strtok.elems.shift
@@ -256,17 +304,60 @@ class StringToken < Token
256
304
  assert((!@modifiers or !strtok.modifiers))
257
305
  @modifiers||=strtok.modifiers
258
306
 
259
- assert @elems.last.kind_of?(String)
307
+ #assert @elems.last.kind_of?(String)
308
+
309
+ @bs_handler ||=strtok.bs_handler
260
310
 
261
311
  return self
262
312
  end
263
313
 
314
+ def translate_escapes(str)
315
+ rl=RubyLexer.new("(string escape translation hack...)",'')
316
+ result=str.dup
317
+ seq=result.to_sequence
318
+ rl.instance_eval{@file=seq}
319
+ repls=[]
320
+ i=0
321
+ #ugly ugly ugly
322
+ while i<result.size and bs_at=result.index(/\\./m,i)
323
+ seq.pos=$~.end(0)-1
324
+ ch=rl.send(bs_handler,"\\",@open[-1,1],@close)
325
+ result[bs_at...seq.pos]=ch
326
+ i=bs_at+ch.size
327
+ end
328
+
329
+ return result
330
+ end
331
+
264
332
  private
333
+ UNESC_DELIMS={}
334
+
265
335
  #simpler transform, preserves original exactly
266
336
  def simple_transform(strfrag,starter,ender)
267
- #assert('[{/'[@char])
268
- #strfrag.gsub!(/#([{$@])/,'\\#\\1') unless @char=='['
269
- strfrag.gsub!(Regexp.new("[\\"+starter+"\\"+ender+"]"), '\\\\\&')
337
+ assert('[{/'[@char])
338
+ #strfrag.gsub!(/(\A|[^\\])(?:\\\\)*\#([{$@])/){$1+'\\#'+$2} unless @char=='['
339
+ delimchars=Regexp.quote starter+ender
340
+ delimchars+=Regexp.quote("#") unless @char=='[' #escape beginning of string iterpolations
341
+
342
+ #i think most or all of this method is useless now...
343
+
344
+ #escape curly brace in string interpolations (%W only)
345
+ strfrag.gsub!('#{', '#\\{') if @char=='{'
346
+
347
+ ckey=starter+ender
348
+ unesc_delim=
349
+ UNESC_DELIMS[ckey]||=
350
+ /(\A|[^\\](?:\\\\)*)([#{delimchars}]+)/
351
+ # /(\\)([^#{delimchars}#{RubyLexer::WHSPLF}]|\Z)/
352
+
353
+ #an even number (esp 0) of backslashes before delim becomes escaped delim
354
+ strfrag.gsub!(unesc_delim){
355
+ pre=$1; toesc=$2
356
+ pre+toesc.gsub(/(.)/){ "\\"+$1 }
357
+ }
358
+
359
+ #no need to double backslashes anymore... they should come pre-doubled
360
+
270
361
  return strfrag
271
362
  end
272
363
 
@@ -286,15 +377,21 @@ private
286
377
  end
287
378
 
288
379
  def append_str!(str)
289
- assert @elems.last.kind_of?(String)
290
- @elems.last << str
380
+ if @elems.last.kind_of?(String)
381
+ @elems.last << str
382
+ else
383
+ @elems << str
384
+ end
291
385
  @ident << str
292
386
  assert @elems.last.kind_of?(String)
293
387
  end
294
388
 
295
389
  def append_code!(code)
296
- assert @elems.last.kind_of?(String)
297
- @elems.concat [code, '']
390
+ if @elems.last.kind_of?(String)
391
+ else
392
+ @elems.push ''
393
+ end
394
+ @elems.push code,''
298
395
  @ident << "\#{#{code}}"
299
396
  assert @elems.last.kind_of?(String)
300
397
  end
@@ -310,9 +407,10 @@ class HerePlaceholderToken < WToken
310
407
  attr_reader :termex, :quote, :ender, :dash
311
408
  attr_accessor :unsafe_to_use, :string
312
409
  attr_accessor :bodyclass
410
+ attr_accessor :open, :close
313
411
 
314
- def initialize(dash,quote,ender)
315
- @dash,@quote,@ender=dash,quote,ender
412
+ def initialize(dash,quote,ender,quote_real=true)
413
+ @dash,@quote,@ender,@quote_real=dash,quote,ender,quote_real
316
414
  @unsafe_to_use=true
317
415
  @string=StringToken.new
318
416
 
@@ -325,17 +423,17 @@ class HerePlaceholderToken < WToken
325
423
  def ===(bogus); false end
326
424
 
327
425
  def to_s
328
- if @bodyclass==OutlinedHereBodyToken
426
+ # if @bodyclass==OutlinedHereBodyToken
329
427
  result=if/[^a-z_0-9]/i===@ender
330
- %["#{@ender.gsub(/[\\"]/, '\\\\'+'\\&')}"]
428
+ @ender.gsub(/[\\"]/, '\\\\'+'\\&')
331
429
  else
332
430
  @ender
333
431
  end
334
- ["<<",@quote,@ender,@quote].to_s
335
- else
336
- assert !unsafe_to_use
337
- @string.to_s
338
- end
432
+ return ["<<",@dash,@quote_real&&@quote,result,@quote_real&&@quote].to_s
433
+ # else
434
+ # assert !unsafe_to_use
435
+ # return @string.to_s
436
+ # end
339
437
  end
340
438
 
341
439
  def append s; @string.append s end
@@ -374,6 +472,7 @@ class ZwToken < IgnoreToken
374
472
  def explicit_form_all; explicit_form end
375
473
  end
376
474
 
475
+ #-------------------------
377
476
  class NoWsToken < ZwToken
378
477
  def explicit_form_all
379
478
  "#nows#"
@@ -383,34 +482,41 @@ class NoWsToken < ZwToken
383
482
  end
384
483
  end
385
484
 
485
+ #-------------------------
386
486
  class ImplicitParamListStartToken < KeywordToken
387
487
  include StillIgnoreToken
388
488
  def initialize(offset)
389
489
  super("(",offset)
390
490
  end
391
491
  def to_s; '' end
492
+ def as; "(" end
392
493
  end
393
494
 
495
+ #-------------------------
394
496
  class ImplicitParamListEndToken < KeywordToken
395
497
  include StillIgnoreToken
396
498
  def initialize(offset)
397
499
  super(")",offset)
398
500
  end
399
501
  def to_s; '' end
502
+ def as; ")" end
400
503
  end
401
504
 
505
+ #-------------------------
402
506
  class AssignmentRhsListStartToken < ZwToken
403
507
  def explicit_form
404
508
  '*['
405
509
  end
406
510
  end
407
511
 
512
+ #-------------------------
408
513
  class AssignmentRhsListEndToken < ZwToken
409
514
  def explicit_form
410
515
  ']'
411
516
  end
412
517
  end
413
518
 
519
+ #-------------------------
414
520
  class KwParamListStartToken < ZwToken
415
521
  def explicit_form_all
416
522
  "#((#"
@@ -420,6 +526,7 @@ class KwParamListStartToken < ZwToken
420
526
  end
421
527
  end
422
528
 
529
+ #-------------------------
423
530
  class KwParamListEndToken < ZwToken
424
531
  def explicit_form_all
425
532
  "#))#"
@@ -429,6 +536,11 @@ class KwParamListEndToken < ZwToken
429
536
  end
430
537
  end
431
538
 
539
+ #-------------------------
540
+ class EndDefHeaderToken < ZwToken
541
+ def as; ";" end
542
+ end
543
+
432
544
  #-------------------------
433
545
  class EscNlToken < IgnoreToken
434
546
  def initialize(filename,linenum,ident="\\\n",offset=nil)
@@ -440,7 +552,7 @@ class EscNlToken < IgnoreToken
440
552
  end
441
553
 
442
554
  #-------------------------
443
- class EoiToken < IgnoreToken
555
+ class EoiToken < Token
444
556
  attr :file
445
557
  alias :pos :offset
446
558
 
@@ -453,18 +565,29 @@ end
453
565
  #-------------------------
454
566
  class HereBodyToken < IgnoreToken
455
567
  #attr_accessor :ender
456
- def initialize(headtok)
568
+ attr_accessor :open,:close
569
+ def initialize(headtok,linecount)
457
570
  assert HerePlaceholderToken===headtok
458
571
  super(headtok.string,headtok.string.offset)
459
572
  @headtok=headtok
573
+ @linecount=linecount
574
+ end
575
+
576
+ def line
577
+ @ident.line
578
+ end
579
+
580
+ def to_s
581
+ @ident.to_s
460
582
  end
461
583
 
462
584
  attr :headtok
585
+ attr :linecount #num lines here body spans (including terminator)
463
586
  end
464
587
 
465
588
  #-------------------------
466
589
  class FileAndLineToken < IgnoreToken
467
- attr :line
590
+ attr_accessor :line
468
591
 
469
592
  def initialize(ident,line,offset=nil)
470
593
 
@@ -1,6 +1,6 @@
1
- =begin copyright
1
+ =begin legal crap
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -79,6 +79,9 @@ end
79
79
 
80
80
  return result
81
81
  end end
82
+ class HereBodyToken; def ws_munge(tp) #experimental
83
+ nil
84
+ end end
82
85
  class OutlinedHereBodyToken; def ws_munge(tp)
83
86
  nil
84
87
  end end
@@ -121,21 +124,26 @@ class KeepWsTokenPrinter
121
124
 
122
125
  def aprint(tok)
123
126
  if StringToken===tok or
124
- (HerePlaceholderToken===tok and
125
- tok.bodyclass!=OutlinedHereBodyToken
126
- )
127
+ HereBodyToken===tok
128
+ # (HerePlaceholderToken===tok and
129
+ # tok.bodyclass!=OutlinedHereBodyToken
130
+ # )
127
131
  str_needs_escnls=(tok.line-@lastfal.line).nonzero?
128
- end
132
+ end if false
129
133
  result=tok.ws_munge(self) and return result
130
134
 
131
135
 
132
136
  #insert extra ws unless an ambiguous op immediately follows
133
137
  #id or num, in which case ws would change the meaning
134
- result=if (ZwToken===tok or NoWsToken===@lasttok or ImplicitParamListStartToken===tok or ImplicitParamListEndToken===tok)
138
+ result=tok
139
+ result=
140
+ case tok
141
+ when ZwToken,EoiToken,NoWsToken, HereBodyToken, NewlineToken,
142
+ ImplicitParamListStartToken,ImplicitParamListEndToken:
135
143
  tok
136
144
  else
137
145
  [@sep.dup,tok]
138
- end
146
+ end unless NoWsToken===lasttok
139
147
 
140
148
  if str_needs_escnls
141
149
  result=result.to_s