rubylexer 0.7.7 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -0
  2. data/History.txt +64 -0
  3. data/Makefile +2 -2
  4. data/README.txt +13 -9
  5. data/bin/rubylexer +113 -0
  6. data/lib/assert.rb +1 -1
  7. data/lib/rubylexer.rb +856 -305
  8. data/lib/rubylexer/charhandler.rb +1 -1
  9. data/lib/rubylexer/charset.rb +15 -7
  10. data/lib/rubylexer/context.rb +10 -2
  11. data/lib/rubylexer/lextable.rb +1 -0
  12. data/lib/rubylexer/rubycode.rb +1 -1
  13. data/lib/rubylexer/rulexer.rb +106 -32
  14. data/lib/rubylexer/symboltable.rb +1 -1
  15. data/lib/rubylexer/test/oneliners.rb +15 -5
  16. data/lib/rubylexer/test/oneliners_1.9.rb +116 -92
  17. data/lib/rubylexer/test/stanzas.rb +49 -27
  18. data/lib/rubylexer/test/testcases.rb +2 -2
  19. data/lib/rubylexer/token.rb +153 -23
  20. data/lib/rubylexer/tokenprinter.rb +9 -6
  21. data/lib/rubylexer/version.rb +1 -1
  22. data/rubylexer.gemspec +12 -8
  23. data/test/bad/ruby_lexer.rb +7 -0
  24. data/test/code/deletewarns.rb +1 -1
  25. data/test/code/dumptokens.rb +1 -81
  26. data/test/code/heredoc_blast_test.rb +112 -0
  27. data/test/code/locatetest.rb +1 -1
  28. data/test/code/regression.rb +23 -23
  29. data/test/code/rubylexervsruby.rb +59 -12
  30. data/test/code/tokentest.rb +62 -52
  31. data/test/data/23.rb +0 -1
  32. data/test/data/g.rb +0 -1
  33. data/test/data/heremonsters.rb +1 -1
  34. data/test/data/heremonsters_dos.rb +1 -1
  35. data/test/data/pre.rb +0 -1
  36. data/test/data/pre.unix.rb +0 -1
  37. data/test/data/putstext.rb +4 -0
  38. data/test/data/regtest.rb +0 -1
  39. data/test/data/stuffydog.rb +5 -0
  40. data/test/data/stuffydog2.rb +5 -0
  41. data/test/data/wsdlDriver.rb +0 -1
  42. data/test/test.sh +1 -1
  43. data/test/test_all.rb +3 -0
  44. data/test/test_bad_rubylexer.rb +16 -0
  45. data/test/test_rubylexer_bad.rb +12 -0
  46. data/testing.txt +40 -20
  47. metadata +51 -38
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005,2008 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -34,7 +34,7 @@ class CharSet
34
34
  case chars
35
35
  when ::String
36
36
  chars.each_byte {|c| @bitset |= (1<<c) }
37
- when ::Fixnum then @bitset |= (1<<chars)
37
+ when ::Fixnum then @bitset |= (1<<chars)
38
38
  else chars.each {|c| @bitset |= (1<<c) }
39
39
  end
40
40
  end
@@ -49,17 +49,25 @@ class CharSet
49
49
  #this math works right with bignums... (i'm pretty sure)
50
50
  end
51
51
 
52
- def ===(c) #c is String|Fixnum|nil
53
- c.nil? and return false
54
- c.kind_of? String and c=c[0]
55
- return ( @bitset[c] != 0 )
52
+ if String==="a"[0]
53
+ def ===(c) #c is String|Fixnum|nil
54
+ c.nil? and return false
55
+ c.kind_of? String and c=c.getbyte(0)
56
+ return ( @bitset[c] != 0 )
57
+ end
58
+ else
59
+ def ===(c) #c is String|Fixnum|nil
60
+ c.nil? and return false
61
+ c.kind_of? String and c=c[0]
62
+ return ( @bitset[c] != 0 )
63
+ end
56
64
  end
57
65
 
58
66
  #enumerate the chars in n AS INTEGERS
59
67
  def each_byte(&block)
60
68
  #should use ffs... not available in ruby
61
69
  (0..255).each { |n|
62
- @bitset[n] and block[n]
70
+ @bitset[n].nonzero? and block[n]
63
71
  }
64
72
  end
65
73
 
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2008 Caleb Clausen
3
+ Copyright (C) 2008, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -36,6 +36,7 @@ module NestedContexts
36
36
 
37
37
  def see lxr,msg; end
38
38
  def lhs=*x; end #do nothing
39
+ def lhs; false end
39
40
  def wantarrow; false end
40
41
  end
41
42
 
@@ -51,6 +52,12 @@ module NestedContexts
51
52
  end
52
53
  def wantarrow; true end
53
54
  end
55
+
56
+ class StringInclusionContext < NestedContext
57
+ def initialize(linenum)
58
+ super("{","}" ,linenum)
59
+ end
60
+ end
54
61
 
55
62
  class ParenContext < NestedContext
56
63
  def initialize(linenum)
@@ -120,7 +127,7 @@ module NestedContexts
120
127
  def starter; huh end #" " ???
121
128
  def ender; huh end #; or \n when from method def, { or do when from stabby block
122
129
  def endtoken offset
123
- KwParamListEndToken.new offset
130
+ ImplicitParamListEndToken.new offset
124
131
  end
125
132
  end
126
133
 
@@ -324,5 +331,6 @@ module NestedContexts
324
331
  dflt_initialize('?',':',linenum)
325
332
  end
326
333
  end
334
+
327
335
  end
328
336
  end
@@ -1,3 +1,4 @@
1
+ #Copyright (c) 2011 Caleb Clausen
1
2
  class RubyLexer
2
3
  class Rule
3
4
  def initialize(lead,matcher,*actions)
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -1,6 +1,7 @@
1
+ #encoding: binary
1
2
  =begin
2
3
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005,2008 Caleb Clausen
4
+ Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
4
5
 
5
6
  This library is free software; you can redistribute it and/or
6
7
  modify it under the terms of the GNU Lesser General Public
@@ -17,9 +18,9 @@
17
18
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
  =end
19
20
 
20
- #warn "hacking $LOAD_PATH to find latest sequence"
21
- #$:<<"../sequence/lib"
22
-
21
+ if defined? RubyLexer #sigh
22
+ Object.send :remove_const, :RubyLexer
23
+ end
23
24
 
24
25
  require "assert"
25
26
  #require "charhandler"
@@ -32,10 +33,18 @@ rescue LoadError=>e
32
33
  raise unless /rubygems/===e.message
33
34
  #hope we don't need it
34
35
  end
35
- #require 'sequence'
36
- require 'sequence/indexed'
37
- require 'sequence/file'
38
- require 'sequence/list'
36
+ begin
37
+ #require 'sequence'
38
+ require 'sequence/indexed'
39
+ require 'sequence/file'
40
+ require 'sequence/list'
41
+ rescue LoadError
42
+ trydir=File.expand_path File.dirname(__FILE__)+"/../../../sequence/lib"
43
+ raise if $:.include? trydir
44
+ warn "hacking $LOAD_PATH to find latest sequence"
45
+ $:<<trydir
46
+ retry
47
+ end
39
48
  #-----------------------------------
40
49
  assert !defined? ::RubyLexer
41
50
  class RubyLexer
@@ -46,7 +55,7 @@ class RubyLexer
46
55
  WHSPLF=WHSP+"\n"
47
56
  #maybe \r should be in WHSPLF instead
48
57
 
49
- LEGALCHARS=/[ -~#{WHSPLF}\x80-\xFF]/
58
+ LEGALCHARS=/[!-~#{WHSPLF}\x80-\xFF]/
50
59
 
51
60
  PAIRS={ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}
52
61
 
@@ -72,20 +81,23 @@ class RubyLexer
72
81
 
73
82
  #-----------------------------------
74
83
  def endoffile_detected s=''
75
- EoiToken.new(s,@original_file, input_position-s.size)
84
+ EoiToken.new(s,@original_file, input_position-s.size,@linenum)
76
85
  end
77
86
  alias rulexer_endoffile_detected endoffile_detected
78
87
 
79
88
  #-----------------------------------
80
89
  def get1token
81
- @moretokens.empty? or return @moretokens.shift
90
+ @moretokens.empty? or return result=@moretokens.shift
82
91
 
83
92
  if eof?
84
93
  #@moretokens<<nil
85
- return endoffile_detected()
94
+ return result=endoffile_detected()
86
95
  end
87
96
 
88
- @toptable.go( nextchar )
97
+ return result=@toptable.go( nextchar )
98
+ ensure
99
+ #hacky: result.endline should already be set
100
+ result.endline||=@linenum if result
89
101
  end
90
102
  alias rulexer_get1token get1token
91
103
 
@@ -135,8 +147,8 @@ private
135
147
 
136
148
  #-----------------------------------
137
149
  def regex(ch=nil)
138
- result=RenderExactlyStringToken.new('/').append_token double_quote("/")
139
- if @rubyversion>=1.9
150
+ result= double_quote("/")
151
+ if false and @rubyversion>=1.9
140
152
  named_brs=[]
141
153
  if result.elems.size==1 and String===result.elems.first
142
154
  elem=result.elems.first
@@ -219,7 +231,7 @@ private
219
231
  when /^#{LCLETTER().gsub('_','')}$/o
220
232
  error= "unrecognized %string type: "+ch; '"'
221
233
  when ''
222
- result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
234
+ result= lexerror( assign_encoding!(StringToken.new('', oldpos)), "unexpected eof in %string")
223
235
  result.line=@linenum
224
236
  return result
225
237
 
@@ -239,7 +251,6 @@ end
239
251
  case ch
240
252
  when /^[Wwr]$/
241
253
  str=result
242
- result=RenderExactlyStringToken.new(type).append_token(result)
243
254
  result.open=str.open; result.close=str.close
244
255
  result.line=@linenum
245
256
  when 's'
@@ -269,6 +280,11 @@ end
269
280
  return result
270
281
  end
271
282
 
283
+ #-----------------------------------
284
+ def assign_encoding! str
285
+ str
286
+ end
287
+
272
288
  #-----------------------------------
273
289
  INTERIOR_REX_CACHE={}
274
290
  EVEN_BS_S=/
@@ -316,14 +332,14 @@ if FASTER_STRING_ESCAPES
316
332
  str=StringToken.new type
317
333
  str.bs_handler ||= case type
318
334
  when '/' then :regex_esc_seq
319
- when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
320
- when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
335
+ when '{' then Wquote_handler_name() #@rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
336
+ when '"','`',':' then dquote_handler_name #@rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
321
337
  when "'" then :squote_esc_seq
322
338
  when "[" then :wquote_esc_seq
323
339
  else raise "unknown quote type: #{type}"
324
340
  end
325
341
 
326
- old_linenum=@linenum
342
+ str.startline=old_linenum=@linenum
327
343
  nestlevel=1
328
344
  loop{
329
345
  str.append(@file.scan( interior ))
@@ -404,7 +420,7 @@ if FASTER_STRING_ESCAPES
404
420
  /ox #and this?
405
421
 
406
422
  #shouldn't tolerate ILLEGAL_ESCAPED in str (unless single quotish)....
407
- lexerror str, "illegal escape sequence" if /#{@@ILLEGAL_CRUNCH}|#{ILLEGAL_ESCAPED}/===b
423
+ lexerror str, "illegal escape sequence" if /#{@@ILLEGAL_CRUNCH}|#{ILLEGAL_ESCAPED}/o===b
408
424
  end
409
425
 
410
426
  str.append b
@@ -429,8 +445,8 @@ else
429
445
 
430
446
  bs_handler ||= case type
431
447
  when '/' then :regex_esc_seq
432
- when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
433
- when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
448
+ when '{' then Wquote_handler_name #@rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
449
+ when '"','`',':' then dquote_handler_name #@rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
434
450
  when "'" then :squote_esc_seq
435
451
  when "[" then :wquote_esc_seq
436
452
  else raise "unknown quote type: #{type}"
@@ -506,6 +522,24 @@ else
506
522
  str.line=@linenum
507
523
  str
508
524
  end
525
+ ensure
526
+ assign_encoding!(str) if str
527
+ end
528
+
529
+ #-----------------------------------
530
+ def dquote_handle(ch)
531
+ @rubyversion >= 1.9 ? dquote19_esc_seq(ch) : dquote_esc_seq(ch)
532
+ #factored
533
+ end
534
+ #-----------------------------------
535
+ def dquote_handler_name
536
+ @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
537
+ #factored
538
+ end
539
+ #-----------------------------------
540
+ def Wquote_handler_name
541
+ @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
542
+ #factored
509
543
  end
510
544
 
511
545
  #-----------------------------------
@@ -570,13 +604,14 @@ end
570
604
  when 'u'
571
605
  case ch=getchar
572
606
  when /[a-f0-9]/i
573
- u=read(4)
607
+ u=ch+read(3)
574
608
  raise "bad unicode escape" unless /[0-9a-f]{4}/i===u
575
609
  [u.hex].pack "U"
576
610
  when '{'
577
611
  result=[]
578
612
  until eat_next_if '}'
579
- u=@file.scan(/\A[0-9a-f]{1,6}[ \t]?/i,7)
613
+ u=@file.scan( /\A[0-9a-f]{1,6}[ \t]?/i )
614
+ raise "bad unicode escape" unless u
580
615
  result<<u.hex
581
616
  end
582
617
  result=result.pack "U*"
@@ -607,7 +642,7 @@ end
607
642
  case ch=getchar
608
643
  when "\n"; @linenum+=1; ch
609
644
  when nester,delimiter; ch
610
- when /[\s\v\\]/; ch
645
+ when /[#@@WSCHARS\\]/o; ch
611
646
  else
612
647
  back1char
613
648
  result=dquote_esc_seq('\\',nester,delimiter)
@@ -622,7 +657,7 @@ end
622
657
  case ch=getchar
623
658
  when "\n"; @linenum+=1; ch
624
659
  when nester,delimiter; ch
625
- when /[\s\v\\]/; ch
660
+ when /[#@@WSCHARS\\]/o; ch
626
661
  else
627
662
  back1char
628
663
  result=dquote19_esc_seq('\\',nester,delimiter)
@@ -644,7 +679,7 @@ end
644
679
  when delimiter,nester,'\\'; escchar
645
680
  # when delimiter,nester; escchar
646
681
  when "\n"; @linenum+=1; escchar
647
- when /[\s\v]/; escchar
682
+ when /[#@@WSCHARS]/o; escchar
648
683
  else "\\"+escchar
649
684
  end
650
685
  end
@@ -755,6 +790,12 @@ end
755
790
  assert ch[/^[{(@$]$/]
756
791
  klass= RubyLexer===self ? self.class : RubyLexer
757
792
  rl=klass.new(@filename,@file,@linenum,offset_adjust(),:rubyversion=>@rubyversion)
793
+ modules=[]
794
+ class<<self;ancestors;end.each{|anc|
795
+ break if Class===anc
796
+ modules<<anc
797
+ }
798
+ modules.reverse.each{|m| rl.extend m }
758
799
  rl.extend RecursiveRubyLexer
759
800
  rl.enable_macros! if @enable_macro
760
801
  rl.in_def=true if inside_method_def?
@@ -927,12 +968,12 @@ if (defined? DEBUGGER__ or defined? Debugger)
927
968
  #assert str == '#'
928
969
  Process.kill("INT",0) if readahead(11)==%/#breakpoint/
929
970
 
930
- IgnoreToken.new(til_charset(/[\r\n]/))
971
+ IgnoreToken.new(til_charset(/\n/))
931
972
  end
932
973
  else
933
974
  #-----------------------------------
934
975
  def comment(str=nil)
935
- IgnoreToken.new(til_charset(/[\r\n]/))
976
+ IgnoreToken.new(til_charset(/\n/))
936
977
  end
937
978
  end
938
979
  alias rulexer_comment comment
@@ -956,6 +997,7 @@ end
956
997
  nl or return nil
957
998
  assert((1..2)===nl.length)
958
999
  @linenum+=1
1000
+ @offset_adjust2=0
959
1001
  read nl.length
960
1002
  end
961
1003
 
@@ -975,7 +1017,7 @@ end
975
1017
  c=getc.chr
976
1018
 
977
1019
  if c == "\\"
978
- c = @rubyversion >= 1.9 ? dquote19_esc_seq('\\') : dquote_esc_seq('\\')
1020
+ c = dquote_handle('\\') #@rubyversion >= 1.9 ? dquote19_esc_seq('\\') : dquote_esc_seq('\\')
979
1021
  c = "\n" if c.empty?
980
1022
  end
981
1023
  return c
@@ -1025,6 +1067,18 @@ protected
1025
1067
  #-----------------------------------
1026
1068
  def input_position_set x; @file.pos=x end
1027
1069
 
1070
+ #-----------------------------------
1071
+ def adjust_linenums_in_moretokens!(tok2)
1072
+ line=tok2.endline
1073
+ @moretokens.each{|tok|
1074
+ if tok.linecount.zero?
1075
+ tok.endline||=line
1076
+ else
1077
+ line+=tok.linecount
1078
+ end
1079
+ }
1080
+ end
1081
+
1028
1082
  #-----------------------------------
1029
1083
  def self.save_offsets_in(*funcnames)
1030
1084
  eval funcnames.collect{|fn| <<-endeval }.join
@@ -1032,15 +1086,35 @@ protected
1032
1086
  alias #{fn}__no_offset #{fn} #rename old ver of fn
1033
1087
  def #{fn}(*args) #create new version
1034
1088
  pos= input_position
1089
+ ln=@linenum
1035
1090
  result=#{fn}__no_offset(*args)
1036
- assert Token===result
1091
+ assert Token===result, "lexer output was not a Token"
1037
1092
  result.offset||=pos
1093
+ result.endline||=ln
1094
+ adjust_linenums_in_moretokens!(result)
1038
1095
  return result
1039
1096
  end
1040
1097
  end
1041
1098
  endeval
1042
1099
  end
1043
1100
 
1101
+ #-----------------------------------
1102
+ def self.save_linenums_in(*funcnames)
1103
+ eval funcnames.collect{|fn| <<-endeval }.join
1104
+ class ::#{self}
1105
+ alias #{fn}__no_linenum #{fn} #rename old ver of fn
1106
+ def #{fn}(*args) #create new version
1107
+ ln=@linenum
1108
+ result=#{fn}__no_linenum(*args)
1109
+ assert Token===result
1110
+ result.endline||=ln
1111
+ adjust_linenums_in_moretokens!(result)
1112
+ return result
1113
+ end
1114
+ end
1115
+ endeval
1116
+ end
1117
+
1044
1118
 
1045
1119
  end
1046
1120
 
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -65,10 +65,8 @@ wwww,eeee=1,2
65
65
  x{a.b,c.d=1,2}
66
66
  x{proc{|a.b,c.d|}}
67
67
 
68
- p % foo
69
68
  p % foo
70
69
 
71
- p(% foo )
72
70
  p(% foo )
73
71
 
74
72
  p eval "%\sfoo\s"
@@ -250,8 +248,6 @@ p 0x123456789abcdefABCDEF01
250
248
  p "Hi, my name is #{"Slim #{(4)>2?"Whitman":"Shady"} "}."
251
249
  p "Hi, my name is #{"Slim #{(4)<2?"Whitman":"Shady"} "}."
252
250
 
253
- p(String *Class)
254
-
255
251
  def String.*(right) [self,right] end
256
252
  def String.<<(right) [self,:<<,right] end
257
253
  def String./(right) [self,:/,right] end
@@ -463,7 +459,6 @@ p 0x80
463
459
  p ?p
464
460
  p 0.1
465
461
  p 0.8
466
- p 0.9
467
462
  p(-1)
468
463
  p %/p/
469
464
  p %Q[<LI>]
@@ -576,3 +571,18 @@ def a.b; end rescue b0
576
571
  def maybe(chance = 0.5)end
577
572
  return rval / precision
578
573
  0e0
574
+
575
+ while false do end
576
+ while false do; end
577
+ until false do; end
578
+ for i in [] do; end
579
+ while (((((((((((((((((((((((((((((((false))))))))))))))))))))))))))))))) do; end
580
+ while (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((false))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) do; end
581
+
582
+ * = z
583
+ tuf while buf=sread 4096
584
+ {:n?=>1}
585
+ {:n!=>1}
586
+ {:n==>1}
587
+ {:n=>1}
588
+ case;when I; JIS;else case; when sjis__length; EJP ;else 55; end;end