rubylexer 0.7.7 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -0
  2. data/History.txt +64 -0
  3. data/Makefile +2 -2
  4. data/README.txt +13 -9
  5. data/bin/rubylexer +113 -0
  6. data/lib/assert.rb +1 -1
  7. data/lib/rubylexer.rb +856 -305
  8. data/lib/rubylexer/charhandler.rb +1 -1
  9. data/lib/rubylexer/charset.rb +15 -7
  10. data/lib/rubylexer/context.rb +10 -2
  11. data/lib/rubylexer/lextable.rb +1 -0
  12. data/lib/rubylexer/rubycode.rb +1 -1
  13. data/lib/rubylexer/rulexer.rb +106 -32
  14. data/lib/rubylexer/symboltable.rb +1 -1
  15. data/lib/rubylexer/test/oneliners.rb +15 -5
  16. data/lib/rubylexer/test/oneliners_1.9.rb +116 -92
  17. data/lib/rubylexer/test/stanzas.rb +49 -27
  18. data/lib/rubylexer/test/testcases.rb +2 -2
  19. data/lib/rubylexer/token.rb +153 -23
  20. data/lib/rubylexer/tokenprinter.rb +9 -6
  21. data/lib/rubylexer/version.rb +1 -1
  22. data/rubylexer.gemspec +12 -8
  23. data/test/bad/ruby_lexer.rb +7 -0
  24. data/test/code/deletewarns.rb +1 -1
  25. data/test/code/dumptokens.rb +1 -81
  26. data/test/code/heredoc_blast_test.rb +112 -0
  27. data/test/code/locatetest.rb +1 -1
  28. data/test/code/regression.rb +23 -23
  29. data/test/code/rubylexervsruby.rb +59 -12
  30. data/test/code/tokentest.rb +62 -52
  31. data/test/data/23.rb +0 -1
  32. data/test/data/g.rb +0 -1
  33. data/test/data/heremonsters.rb +1 -1
  34. data/test/data/heremonsters_dos.rb +1 -1
  35. data/test/data/pre.rb +0 -1
  36. data/test/data/pre.unix.rb +0 -1
  37. data/test/data/putstext.rb +4 -0
  38. data/test/data/regtest.rb +0 -1
  39. data/test/data/stuffydog.rb +5 -0
  40. data/test/data/stuffydog2.rb +5 -0
  41. data/test/data/wsdlDriver.rb +0 -1
  42. data/test/test.sh +1 -1
  43. data/test/test_all.rb +3 -0
  44. data/test/test_bad_rubylexer.rb +16 -0
  45. data/test/test_rubylexer_bad.rb +12 -0
  46. data/testing.txt +40 -20
  47. metadata +51 -38
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005,2008 Caleb Clausen
3
+ Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -34,7 +34,7 @@ class CharSet
34
34
  case chars
35
35
  when ::String
36
36
  chars.each_byte {|c| @bitset |= (1<<c) }
37
- when ::Fixnum then @bitset |= (1<<chars)
37
+ when ::Fixnum then @bitset |= (1<<chars)
38
38
  else chars.each {|c| @bitset |= (1<<c) }
39
39
  end
40
40
  end
@@ -49,17 +49,25 @@ class CharSet
49
49
  #this math works right with bignums... (i'm pretty sure)
50
50
  end
51
51
 
52
- def ===(c) #c is String|Fixnum|nil
53
- c.nil? and return false
54
- c.kind_of? String and c=c[0]
55
- return ( @bitset[c] != 0 )
52
+ if String==="a"[0]
53
+ def ===(c) #c is String|Fixnum|nil
54
+ c.nil? and return false
55
+ c.kind_of? String and c=c.getbyte(0)
56
+ return ( @bitset[c] != 0 )
57
+ end
58
+ else
59
+ def ===(c) #c is String|Fixnum|nil
60
+ c.nil? and return false
61
+ c.kind_of? String and c=c[0]
62
+ return ( @bitset[c] != 0 )
63
+ end
56
64
  end
57
65
 
58
66
  #enumerate the chars in n AS INTEGERS
59
67
  def each_byte(&block)
60
68
  #should use ffs... not available in ruby
61
69
  (0..255).each { |n|
62
- @bitset[n] and block[n]
70
+ @bitset[n].nonzero? and block[n]
63
71
  }
64
72
  end
65
73
 
@@ -1,6 +1,6 @@
1
1
  =begin
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2008 Caleb Clausen
3
+ Copyright (C) 2008, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -36,6 +36,7 @@ module NestedContexts
36
36
 
37
37
  def see lxr,msg; end
38
38
  def lhs=*x; end #do nothing
39
+ def lhs; false end
39
40
  def wantarrow; false end
40
41
  end
41
42
 
@@ -51,6 +52,12 @@ module NestedContexts
51
52
  end
52
53
  def wantarrow; true end
53
54
  end
55
+
56
+ class StringInclusionContext < NestedContext
57
+ def initialize(linenum)
58
+ super("{","}" ,linenum)
59
+ end
60
+ end
54
61
 
55
62
  class ParenContext < NestedContext
56
63
  def initialize(linenum)
@@ -120,7 +127,7 @@ module NestedContexts
120
127
  def starter; huh end #" " ???
121
128
  def ender; huh end #; or \n when from method def, { or do when from stabby block
122
129
  def endtoken offset
123
- KwParamListEndToken.new offset
130
+ ImplicitParamListEndToken.new offset
124
131
  end
125
132
  end
126
133
 
@@ -324,5 +331,6 @@ module NestedContexts
324
331
  dflt_initialize('?',':',linenum)
325
332
  end
326
333
  end
334
+
327
335
  end
328
336
  end
@@ -1,3 +1,4 @@
1
+ #Copyright (c) 2011 Caleb Clausen
1
2
  class RubyLexer
2
3
  class Rule
3
4
  def initialize(lead,matcher,*actions)
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -1,6 +1,7 @@
1
+ #encoding: binary
1
2
  =begin
2
3
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005,2008 Caleb Clausen
4
+ Copyright (C) 2004,2005,2008, 2011 Caleb Clausen
4
5
 
5
6
  This library is free software; you can redistribute it and/or
6
7
  modify it under the terms of the GNU Lesser General Public
@@ -17,9 +18,9 @@
17
18
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
  =end
19
20
 
20
- #warn "hacking $LOAD_PATH to find latest sequence"
21
- #$:<<"../sequence/lib"
22
-
21
+ if defined? RubyLexer #sigh
22
+ Object.send :remove_const, :RubyLexer
23
+ end
23
24
 
24
25
  require "assert"
25
26
  #require "charhandler"
@@ -32,10 +33,18 @@ rescue LoadError=>e
32
33
  raise unless /rubygems/===e.message
33
34
  #hope we don't need it
34
35
  end
35
- #require 'sequence'
36
- require 'sequence/indexed'
37
- require 'sequence/file'
38
- require 'sequence/list'
36
+ begin
37
+ #require 'sequence'
38
+ require 'sequence/indexed'
39
+ require 'sequence/file'
40
+ require 'sequence/list'
41
+ rescue LoadError
42
+ trydir=File.expand_path File.dirname(__FILE__)+"/../../../sequence/lib"
43
+ raise if $:.include? trydir
44
+ warn "hacking $LOAD_PATH to find latest sequence"
45
+ $:<<trydir
46
+ retry
47
+ end
39
48
  #-----------------------------------
40
49
  assert !defined? ::RubyLexer
41
50
  class RubyLexer
@@ -46,7 +55,7 @@ class RubyLexer
46
55
  WHSPLF=WHSP+"\n"
47
56
  #maybe \r should be in WHSPLF instead
48
57
 
49
- LEGALCHARS=/[ -~#{WHSPLF}\x80-\xFF]/
58
+ LEGALCHARS=/[!-~#{WHSPLF}\x80-\xFF]/
50
59
 
51
60
  PAIRS={ '{'=>'}', '['=>']', '('=>')', '<'=>'>'}
52
61
 
@@ -72,20 +81,23 @@ class RubyLexer
72
81
 
73
82
  #-----------------------------------
74
83
  def endoffile_detected s=''
75
- EoiToken.new(s,@original_file, input_position-s.size)
84
+ EoiToken.new(s,@original_file, input_position-s.size,@linenum)
76
85
  end
77
86
  alias rulexer_endoffile_detected endoffile_detected
78
87
 
79
88
  #-----------------------------------
80
89
  def get1token
81
- @moretokens.empty? or return @moretokens.shift
90
+ @moretokens.empty? or return result=@moretokens.shift
82
91
 
83
92
  if eof?
84
93
  #@moretokens<<nil
85
- return endoffile_detected()
94
+ return result=endoffile_detected()
86
95
  end
87
96
 
88
- @toptable.go( nextchar )
97
+ return result=@toptable.go( nextchar )
98
+ ensure
99
+ #hacky: result.endline should already be set
100
+ result.endline||=@linenum if result
89
101
  end
90
102
  alias rulexer_get1token get1token
91
103
 
@@ -135,8 +147,8 @@ private
135
147
 
136
148
  #-----------------------------------
137
149
  def regex(ch=nil)
138
- result=RenderExactlyStringToken.new('/').append_token double_quote("/")
139
- if @rubyversion>=1.9
150
+ result= double_quote("/")
151
+ if false and @rubyversion>=1.9
140
152
  named_brs=[]
141
153
  if result.elems.size==1 and String===result.elems.first
142
154
  elem=result.elems.first
@@ -219,7 +231,7 @@ private
219
231
  when /^#{LCLETTER().gsub('_','')}$/o
220
232
  error= "unrecognized %string type: "+ch; '"'
221
233
  when ''
222
- result= lexerror( StringToken.new('', oldpos), "unexpected eof in %string")
234
+ result= lexerror( assign_encoding!(StringToken.new('', oldpos)), "unexpected eof in %string")
223
235
  result.line=@linenum
224
236
  return result
225
237
 
@@ -239,7 +251,6 @@ end
239
251
  case ch
240
252
  when /^[Wwr]$/
241
253
  str=result
242
- result=RenderExactlyStringToken.new(type).append_token(result)
243
254
  result.open=str.open; result.close=str.close
244
255
  result.line=@linenum
245
256
  when 's'
@@ -269,6 +280,11 @@ end
269
280
  return result
270
281
  end
271
282
 
283
+ #-----------------------------------
284
+ def assign_encoding! str
285
+ str
286
+ end
287
+
272
288
  #-----------------------------------
273
289
  INTERIOR_REX_CACHE={}
274
290
  EVEN_BS_S=/
@@ -316,14 +332,14 @@ if FASTER_STRING_ESCAPES
316
332
  str=StringToken.new type
317
333
  str.bs_handler ||= case type
318
334
  when '/' then :regex_esc_seq
319
- when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
320
- when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
335
+ when '{' then Wquote_handler_name() #@rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
336
+ when '"','`',':' then dquote_handler_name #@rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
321
337
  when "'" then :squote_esc_seq
322
338
  when "[" then :wquote_esc_seq
323
339
  else raise "unknown quote type: #{type}"
324
340
  end
325
341
 
326
- old_linenum=@linenum
342
+ str.startline=old_linenum=@linenum
327
343
  nestlevel=1
328
344
  loop{
329
345
  str.append(@file.scan( interior ))
@@ -404,7 +420,7 @@ if FASTER_STRING_ESCAPES
404
420
  /ox #and this?
405
421
 
406
422
  #shouldn't tolerate ILLEGAL_ESCAPED in str (unless single quotish)....
407
- lexerror str, "illegal escape sequence" if /#{@@ILLEGAL_CRUNCH}|#{ILLEGAL_ESCAPED}/===b
423
+ lexerror str, "illegal escape sequence" if /#{@@ILLEGAL_CRUNCH}|#{ILLEGAL_ESCAPED}/o===b
408
424
  end
409
425
 
410
426
  str.append b
@@ -429,8 +445,8 @@ else
429
445
 
430
446
  bs_handler ||= case type
431
447
  when '/' then :regex_esc_seq
432
- when '{' then @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
433
- when '"','`',':' then @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
448
+ when '{' then Wquote_handler_name #@rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
449
+ when '"','`',':' then dquote_handler_name #@rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
434
450
  when "'" then :squote_esc_seq
435
451
  when "[" then :wquote_esc_seq
436
452
  else raise "unknown quote type: #{type}"
@@ -506,6 +522,24 @@ else
506
522
  str.line=@linenum
507
523
  str
508
524
  end
525
+ ensure
526
+ assign_encoding!(str) if str
527
+ end
528
+
529
+ #-----------------------------------
530
+ def dquote_handle(ch)
531
+ @rubyversion >= 1.9 ? dquote19_esc_seq(ch) : dquote_esc_seq(ch)
532
+ #factored
533
+ end
534
+ #-----------------------------------
535
+ def dquote_handler_name
536
+ @rubyversion>=1.9 ? :dquote19_esc_seq : :dquote_esc_seq
537
+ #factored
538
+ end
539
+ #-----------------------------------
540
+ def Wquote_handler_name
541
+ @rubyversion>=1.9 ? :Wquote19_esc_seq : :Wquote_esc_seq
542
+ #factored
509
543
  end
510
544
 
511
545
  #-----------------------------------
@@ -570,13 +604,14 @@ end
570
604
  when 'u'
571
605
  case ch=getchar
572
606
  when /[a-f0-9]/i
573
- u=read(4)
607
+ u=ch+read(3)
574
608
  raise "bad unicode escape" unless /[0-9a-f]{4}/i===u
575
609
  [u.hex].pack "U"
576
610
  when '{'
577
611
  result=[]
578
612
  until eat_next_if '}'
579
- u=@file.scan(/\A[0-9a-f]{1,6}[ \t]?/i,7)
613
+ u=@file.scan( /\A[0-9a-f]{1,6}[ \t]?/i )
614
+ raise "bad unicode escape" unless u
580
615
  result<<u.hex
581
616
  end
582
617
  result=result.pack "U*"
@@ -607,7 +642,7 @@ end
607
642
  case ch=getchar
608
643
  when "\n"; @linenum+=1; ch
609
644
  when nester,delimiter; ch
610
- when /[\s\v\\]/; ch
645
+ when /[#@@WSCHARS\\]/o; ch
611
646
  else
612
647
  back1char
613
648
  result=dquote_esc_seq('\\',nester,delimiter)
@@ -622,7 +657,7 @@ end
622
657
  case ch=getchar
623
658
  when "\n"; @linenum+=1; ch
624
659
  when nester,delimiter; ch
625
- when /[\s\v\\]/; ch
660
+ when /[#@@WSCHARS\\]/o; ch
626
661
  else
627
662
  back1char
628
663
  result=dquote19_esc_seq('\\',nester,delimiter)
@@ -644,7 +679,7 @@ end
644
679
  when delimiter,nester,'\\'; escchar
645
680
  # when delimiter,nester; escchar
646
681
  when "\n"; @linenum+=1; escchar
647
- when /[\s\v]/; escchar
682
+ when /[#@@WSCHARS]/o; escchar
648
683
  else "\\"+escchar
649
684
  end
650
685
  end
@@ -755,6 +790,12 @@ end
755
790
  assert ch[/^[{(@$]$/]
756
791
  klass= RubyLexer===self ? self.class : RubyLexer
757
792
  rl=klass.new(@filename,@file,@linenum,offset_adjust(),:rubyversion=>@rubyversion)
793
+ modules=[]
794
+ class<<self;ancestors;end.each{|anc|
795
+ break if Class===anc
796
+ modules<<anc
797
+ }
798
+ modules.reverse.each{|m| rl.extend m }
758
799
  rl.extend RecursiveRubyLexer
759
800
  rl.enable_macros! if @enable_macro
760
801
  rl.in_def=true if inside_method_def?
@@ -927,12 +968,12 @@ if (defined? DEBUGGER__ or defined? Debugger)
927
968
  #assert str == '#'
928
969
  Process.kill("INT",0) if readahead(11)==%/#breakpoint/
929
970
 
930
- IgnoreToken.new(til_charset(/[\r\n]/))
971
+ IgnoreToken.new(til_charset(/\n/))
931
972
  end
932
973
  else
933
974
  #-----------------------------------
934
975
  def comment(str=nil)
935
- IgnoreToken.new(til_charset(/[\r\n]/))
976
+ IgnoreToken.new(til_charset(/\n/))
936
977
  end
937
978
  end
938
979
  alias rulexer_comment comment
@@ -956,6 +997,7 @@ end
956
997
  nl or return nil
957
998
  assert((1..2)===nl.length)
958
999
  @linenum+=1
1000
+ @offset_adjust2=0
959
1001
  read nl.length
960
1002
  end
961
1003
 
@@ -975,7 +1017,7 @@ end
975
1017
  c=getc.chr
976
1018
 
977
1019
  if c == "\\"
978
- c = @rubyversion >= 1.9 ? dquote19_esc_seq('\\') : dquote_esc_seq('\\')
1020
+ c = dquote_handle('\\') #@rubyversion >= 1.9 ? dquote19_esc_seq('\\') : dquote_esc_seq('\\')
979
1021
  c = "\n" if c.empty?
980
1022
  end
981
1023
  return c
@@ -1025,6 +1067,18 @@ protected
1025
1067
  #-----------------------------------
1026
1068
  def input_position_set x; @file.pos=x end
1027
1069
 
1070
+ #-----------------------------------
1071
+ def adjust_linenums_in_moretokens!(tok2)
1072
+ line=tok2.endline
1073
+ @moretokens.each{|tok|
1074
+ if tok.linecount.zero?
1075
+ tok.endline||=line
1076
+ else
1077
+ line+=tok.linecount
1078
+ end
1079
+ }
1080
+ end
1081
+
1028
1082
  #-----------------------------------
1029
1083
  def self.save_offsets_in(*funcnames)
1030
1084
  eval funcnames.collect{|fn| <<-endeval }.join
@@ -1032,15 +1086,35 @@ protected
1032
1086
  alias #{fn}__no_offset #{fn} #rename old ver of fn
1033
1087
  def #{fn}(*args) #create new version
1034
1088
  pos= input_position
1089
+ ln=@linenum
1035
1090
  result=#{fn}__no_offset(*args)
1036
- assert Token===result
1091
+ assert Token===result, "lexer output was not a Token"
1037
1092
  result.offset||=pos
1093
+ result.endline||=ln
1094
+ adjust_linenums_in_moretokens!(result)
1038
1095
  return result
1039
1096
  end
1040
1097
  end
1041
1098
  endeval
1042
1099
  end
1043
1100
 
1101
+ #-----------------------------------
1102
+ def self.save_linenums_in(*funcnames)
1103
+ eval funcnames.collect{|fn| <<-endeval }.join
1104
+ class ::#{self}
1105
+ alias #{fn}__no_linenum #{fn} #rename old ver of fn
1106
+ def #{fn}(*args) #create new version
1107
+ ln=@linenum
1108
+ result=#{fn}__no_linenum(*args)
1109
+ assert Token===result
1110
+ result.endline||=ln
1111
+ adjust_linenums_in_moretokens!(result)
1112
+ return result
1113
+ end
1114
+ end
1115
+ endeval
1116
+ end
1117
+
1044
1118
 
1045
1119
  end
1046
1120
 
@@ -1,6 +1,6 @@
1
1
  =begin copyright
2
2
  rubylexer - a ruby lexer written in ruby
3
- Copyright (C) 2004,2005 Caleb Clausen
3
+ Copyright (C) 2004,2005, 2011 Caleb Clausen
4
4
 
5
5
  This library is free software; you can redistribute it and/or
6
6
  modify it under the terms of the GNU Lesser General Public
@@ -65,10 +65,8 @@ wwww,eeee=1,2
65
65
  x{a.b,c.d=1,2}
66
66
  x{proc{|a.b,c.d|}}
67
67
 
68
- p % foo
69
68
  p % foo
70
69
 
71
- p(% foo )
72
70
  p(% foo )
73
71
 
74
72
  p eval "%\sfoo\s"
@@ -250,8 +248,6 @@ p 0x123456789abcdefABCDEF01
250
248
  p "Hi, my name is #{"Slim #{(4)>2?"Whitman":"Shady"} "}."
251
249
  p "Hi, my name is #{"Slim #{(4)<2?"Whitman":"Shady"} "}."
252
250
 
253
- p(String *Class)
254
-
255
251
  def String.*(right) [self,right] end
256
252
  def String.<<(right) [self,:<<,right] end
257
253
  def String./(right) [self,:/,right] end
@@ -463,7 +459,6 @@ p 0x80
463
459
  p ?p
464
460
  p 0.1
465
461
  p 0.8
466
- p 0.9
467
462
  p(-1)
468
463
  p %/p/
469
464
  p %Q[<LI>]
@@ -576,3 +571,18 @@ def a.b; end rescue b0
576
571
  def maybe(chance = 0.5)end
577
572
  return rval / precision
578
573
  0e0
574
+
575
+ while false do end
576
+ while false do; end
577
+ until false do; end
578
+ for i in [] do; end
579
+ while (((((((((((((((((((((((((((((((false))))))))))))))))))))))))))))))) do; end
580
+ while (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((false))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) do; end
581
+
582
+ * = z
583
+ tuf while buf=sread 4096
584
+ {:n?=>1}
585
+ {:n!=>1}
586
+ {:n==>1}
587
+ {:n=>1}
588
+ case;when I; JIS;else case; when sjis__length; EJP ;else 55; end;end