rubylexer 0.6.2 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
@@ -0,0 +1,30 @@
1
+ test $RUBY || export RUBY=ruby
2
+ export PWD=`pwd`
3
+ export RUBYLEXERVSRUBY="$RUBY -Ilib test/code/rubylexervsruby.rb"
4
+
5
+ if $RUBY --version|grep '^ruby 1\.6'; then
6
+ echo 'error: need ruby 1.8'; exit
7
+ fi
8
+
9
+ mkdir test/results
10
+
11
+ ruby <<END > test/results/rubyscripts.txt
12
+ require 'open3'
13
+ binfiles=ENV['PATH'].split(':').map{|dir| Dir[dir+'/*']}.compact
14
+ rubyscripts=[]
15
+ Open3.popen3('file -f -'){|cin,cout,cerr|
16
+ cin.puts *binfiles
17
+ cin.flush
18
+ cin.close
19
+ cout.each{|line|
20
+ name=line[/\A([^:]+):.*ruby.*text/i,1] and rubyscripts<<name and p name
21
+ }
22
+ }
23
+ puts rubyscripts
24
+ END
25
+
26
+ #cd `dirname -- $0`
27
+
28
+ for i in test/data/p.rb `(locate tk.rb;locate examples/examples_test.rb ron.rb /generator.rb ipaddr.rb date/format.rb /optparse.rb ferret/browser.rb;locate .rb; locate rakefile; locate Rakefile; locate RAKEFILE)|egrep -v '/test/results/'; cat test/results/rubyscripts.txt; find test/data/gems -name "*.rb"`; do
29
+ $RUBYLEXERVSRUBY $i
30
+ done
@@ -0,0 +1,49 @@
1
+ require 'test/code/rubylexervsruby'
2
+ #ENV['RUBY']||='ruby'
3
+ $RUBY=ENV['RUBY']||'ruby'
4
+ #test $RUBY || export RUBY=ruby
5
+
6
+ #$RUBYLEXERVSRUBY="#$RUBY test/code/rubylexervsruby.rb"
7
+
8
+ RUBY_VERSION[/^1\.[0-7]\./] and raise 'need ruby>= 1.8'
9
+
10
+
11
+
12
+ #if RUBY_VERSION --version|grep '^ruby 1\.6'; then
13
+ # echo 'error: need ruby 1.8'; exit
14
+ #fi
15
+
16
+
17
+ RLROOT= (File.dirname $0)+'/../..'
18
+ #cd `dirname -- $0`
19
+
20
+ =begin if locate fails, we should use the algorithm from this sh code
21
+
22
+ #also look in bin and lib directories
23
+ file -L `echo $PATH":/sbin:/usr/sbin"|tr : "\n"|sort -u|xargs -i echo "{}/*"`| \
24
+ grep "ruby[^:]*script"|cut -d: -f1 > test/results/rubyexelibs
25
+
26
+ ruby -e 'print ($:.sort.uniq+[""]).join"\n"'|xargs -i ls "{}/*.rb" >> test/results/rubyexelibs
27
+
28
+ for i in `cat test/results/rubyexelibs`; do
29
+ $RUBYLEXERVSRUBY $i;
30
+ done
31
+
32
+ =end
33
+
34
+ for i in [
35
+ RLROOT+"/test/data/p.rb", *Dir["test/data/*.rb"]+`(locate /tk.rb;
36
+ locate examples/examples_test.rb;locate .rb; locate rakefile;
37
+ locate Rakefile; locate RAKEFILE)|egrep -v '/test/(results|data)/'`.
38
+ split("\n")
39
+ ] do
40
+ # system $RUBYLEXERVSRUBY, i
41
+ #hmm, rubylexervsruby needs to be upgraded to not regard an output
42
+ #consisting entirely of warnings as a failure.
43
+ #if no 'warning' (in any capitalization) for 4 or more lines
44
+ RubyLexerVsRuby.rubylexervsruby i #or fail "failed in #{i}"
45
+ end
46
+
47
+ #for i in test/data/p.rb `(locate /tk.rb;locate examples/examples_test.rb;#locate .rb; locate rakefile; locate Rakefile; locate RAKEFILE)|egrep -v '/#test/results/'`; do
48
+ # $RUBYLEXERVSRUBY $i
49
+ #done
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/ruby
2
+ #$DEBUG=$VERBOSE=true
3
+ $Debug=true
4
+ require "getoptlong"
5
+ require "test/code/tokentest"
6
+ require "test/code/deletewarns"
7
+
8
+
9
+ module RubyLexerVsRuby;end
10
+ class<<RubyLexerVsRuby
11
+ ENABLEMD5=false
12
+ def nop_ruby(cmd,input,output,stringdata)
13
+ # system %[echo "BEGIN{exit};">#{output}]
14
+ File.open(output,'w'){|f| f.write "BEGIN{exit};\n" }
15
+ if stringdata
16
+ File.open(output,'a'){|f| f.write stringdata }
17
+ else
18
+ system [cmd,'"'+input+'"','>>',output].join(' ')
19
+ end
20
+ end
21
+
22
+ def ruby_parsedump(input,output,ruby)
23
+ #todo: use ruby's md5 lib
24
+ #recursive ruby call here is unavoidable because -y flag has to be set
25
+
26
+ #do nothing if input unchanged
27
+ ENABLEMD5 and system "md5sum -c #{input}.md5 2>/dev/null" and return
28
+
29
+ status=0
30
+ IO.popen("#{ruby} -w -y < #{input} 2>&1"){ |pipe|
31
+ File.open(output,"w") { |outfd|
32
+ pipe.each{ |line|
33
+ outfd.print(line) \
34
+ if /^Shifting|^#{DeleteWarns::WARNERRREX}/o===line
35
+ #elsif /(warning|error)/i===line
36
+ # raise("a warning or error, appearently, not caught by rex above: "+line)
37
+ }
38
+ pid,status=Process.waitpid2 pipe.pid #get err status of subprocess
39
+ }
40
+ }
41
+ ENABLEMD5 and status==0 and system "md5sum #{input} > #{input}.md5" #compute sum only if no errors
42
+ end
43
+
44
+ def head(fname)
45
+ print "foobaaaaaaaaar\n\n\n\n\n\n"
46
+ File.open(fname){|fd| print(fd.read(512)+"\n") }
47
+ end
48
+
49
+ def rubylexervsruby(input,stringdata=nil,&ignore_it)
50
+
51
+ #cmdpath= `which #$0`
52
+ cmddir=Dir.getwd+"/test/code/"
53
+ base='test/results/'+File.basename(input)
54
+ _ttfile=base+'.tt'
55
+ mttfile=base+'.mtt'
56
+ p_ttfile=_ttfile+'.prs'
57
+ pmttfile=mttfile+'.prs'
58
+ p_ttdiff=p_ttfile+'.diff'
59
+ pmttdiff=pmttfile+'.diff'
60
+ nopfile=base+'.nop'
61
+ origfile=nopfile+'.prs'
62
+ ruby=ENV['RUBY'] || 'ruby'
63
+ expected_failures=Dir.getwd+"/test/code/"+File.basename(input)+".expected_failures"
64
+
65
+ #olddir=Dir.pwd
66
+ #Dir.chdir cmddir + '/../..'
67
+
68
+ nop_ruby "#{input[/\.gz$/]&&'z'}cat", input, nopfile, stringdata
69
+
70
+ print "executing: #{ruby} -Ilib test/code/tokentest.rb --keepws #{input}\n"
71
+
72
+ ruby_parsedump nopfile, origfile, ruby
73
+
74
+
75
+
76
+ tokentest nopfile, RubyLexer, RubyLexer::KeepWsTokenPrinter.new, nil, _ttfile
77
+ tokentest nopfile, RubyLexer, RubyLexer::KeepWsTokenPrinter.new(' '), nil, mttfile
78
+
79
+
80
+ ruby_parsedump _ttfile, p_ttfile, ruby
81
+ ruby_parsedump mttfile, pmttfile, ruby
82
+
83
+ if File.exists?(p_ttfile)
84
+ IO.popen("diff -u1 -b #{origfile} #{p_ttfile}"){ |pipe|
85
+ File.open(p_ttdiff,"w") { |diff|
86
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
87
+ }
88
+ }
89
+ # File.unlink p_ttfile
90
+ end
91
+
92
+ if File.exists?(pmttfile)
93
+ IO.popen("diff -u1 -b #{origfile} #{pmttfile}"){ |pipe|
94
+ File.open(pmttdiff,"w") { |diff|
95
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
96
+ }
97
+ }
98
+ # File.unlink pmttfile
99
+ end
100
+
101
+ list=[]
102
+ #nonwarn4=/(^(?![^\n]*warning[^\n]*)[^\n]*\n){4,}/im
103
+ #4 or more non-warning lines:
104
+ nonwarn4=/^(?:(?![^\r\n]*warning)[^\r\n]+(?:\r\n?|\n\r?)){4,}/mi
105
+ result=true
106
+ for name in [p_ttdiff,pmttdiff] do
107
+ i=File.read(name)
108
+ # i.tr("\r","\n")
109
+ # i.gsub!(/^\n/m, '')
110
+ i.sub!(/\A([^\r\n]+(\r\n?|\n\r?)){2}/, '') #remove 1st 2 lines
111
+ i.scan nonwarn4 do |j|
112
+ unless ignore_it && ignore_it[j]
113
+ list.push( *j.split(/\r\n?|\n\r?/) ) #unless list.size>=10
114
+ end
115
+ end
116
+
117
+ unless list.empty?
118
+ list=list.join("\n") +"\n"
119
+ unless (File.exists?(expected_failures) and File.read(expected_failures))==list
120
+ print list
121
+ result=false
122
+ end
123
+ list=[]
124
+ end
125
+ end
126
+
127
+ #print( list.join("\n") +"\n")
128
+ #Dir.chdir olddir
129
+ return result
130
+
131
+ =begin
132
+ case File.zero?(p_ttdiff).to_s +
133
+ File.zero?(pmttdiff).to_s
134
+ when 'falsefalse' then
135
+ head p_ttdiff
136
+ print "omitting #{pmttdiff}\n"
137
+ when 'falsetrue'
138
+ head p_ttdiff
139
+ when 'truefalse'
140
+ head pmttdiff
141
+ when 'truetrue'
142
+ #File.unlink origfile
143
+ return true
144
+ default
145
+ raise "unexpected 2bool val"
146
+ end
147
+ return false
148
+ =end
149
+
150
+ rescue Exception
151
+ system "ruby -c #{input} >/dev/null 2>&1" or expected="(expected) "
152
+ print "#{expected}error in: #{input}\n"
153
+ raise unless expected
154
+ end
155
+ end
156
+
157
+ if __FILE__==$0
158
+ #allow -e
159
+ stringdata=input=nil
160
+ opts=GetoptLong.new(["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT])
161
+ opts.each{|opt,arg|
162
+ opt=='--eval' or raise :impossible
163
+ stringdata=arg
164
+ input='-e'
165
+ }
166
+
167
+ input||=ARGV[0]
168
+ RubyLexerVsRuby.rubylexervsruby(input,stringdata) and exit 0
169
+
170
+ exit 1
171
+ end
172
+
173
+
@@ -1,24 +1,31 @@
1
- #!/usr/bin/ruby -dw
1
+ #!/usr/bin/ruby
2
+ $Debug=true
2
3
  require "rubylexer"
3
- # require "rumalexer"
4
- require "token"
5
- require "tokenprinter"
6
4
  require "getoptlong"
7
5
  require "pp"
8
6
 
7
+ class RubyLexer
9
8
  class Token
10
9
  def verify_offset(fd); false end
11
10
 
12
11
  def check_for_error; end
13
12
  end
14
13
 
14
+ class LexerError<Exception; end
15
+
15
16
  module ErrorToken
16
- def check_for_error; raise @error end
17
+ def check_for_error; raise LexerError,@error end
17
18
  end
18
19
 
19
20
  class FileAndLineToken
20
21
  def verify_offset(fd); true end
21
22
  end
23
+ class ImplicitParamListStartToken
24
+ def verify_offset(fd); true end
25
+ end
26
+ class ImplicitParamListEndToken
27
+ def verify_offset(fd); true end
28
+ end
22
29
 
23
30
  module SimpleVerify
24
31
  def verify_offset(fd)
@@ -33,8 +40,13 @@ class MethNameToken; include SimpleVerify; end
33
40
 
34
41
  class SymbolToken
35
42
  def verify_offset(fd)
36
- readsym=fd.read(@ident.length)
37
- @ident[1]==?" or @ident[1]==?' or readsym==@ident
43
+ la=fd.read(2)
44
+ case la
45
+ when '%s': #stay right here
46
+ when /^:/: fd.pos-=1
47
+ else raise 'unrecognized symbol type'
48
+ end
49
+ @ident[1]==?" or @ident[1]==?' or fd.read(@ident.length-1)==@ident[1..-1]
38
50
 
39
51
  end
40
52
  end
@@ -42,7 +54,7 @@ end
42
54
  class EoiToken
43
55
  def verify_offset(fd)
44
56
  result=super(fd)
45
- fd.pos=fd.stat.size
57
+ fd.eof?
46
58
  return result
47
59
  end
48
60
  end
@@ -66,12 +78,12 @@ end
66
78
  class HerePlaceholderToken
67
79
  def verify_offset(fd)
68
80
  '<<'==fd.read(2) or return false
69
- @dash and (?-==fd.getc or return false)
70
- case ch=fd.getc
81
+ @dash and (?-==fd.read1 or return false)
82
+ case ch=fd.read1
71
83
  when ?', ?`, ?"
72
- @quote==ch.chr or return false
73
- fd.read(@ender.size)==@ender or return false
74
- return fd.getc.chr==@quote
84
+ @quote==ch.chr and
85
+ fd.read(@ender.size)==@ender and
86
+ return fd.read(1)==@quote
75
87
  when ?a..?z, ?A..?Z, ?_, ?0..?9
76
88
  @quote=='"' or return false
77
89
  fd.pos-=1
@@ -93,7 +105,7 @@ class StringToken
93
105
 
94
106
  def verify_subtoken_offsets(fd)
95
107
  #verify offsets of subtokens
96
- 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) }
108
+ 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) or raise LexerError}
97
109
  return true
98
110
  end
99
111
 
@@ -137,28 +149,28 @@ end
137
149
  # $ShowImplicit ? explicit_form : super
138
150
  # end
139
151
  #end
140
-
141
- class RuLexer
142
- def check_offset(tok,file=@file)
143
- endpos=(@moretokens.empty?)? file.pos : @moretokens[0].offset
144
- super(tok,file,endpos)
145
- end
146
152
  end
153
+ public
147
154
 
148
- def check_offset(tok,file,endpos)
149
- oldpos=file.pos
155
+ def check_offset(tok,file=nil,endpos=nil)
156
+ file||=@file
157
+ endpos||=(@moretokens.empty?)? file.pos : @moretokens[0].offset
158
+ oldpos=file.pos
150
159
 
151
- assert Integer===tok.offset
152
- assert Integer===endpos
153
- assert endpos>=tok.offset
160
+ assert Integer===tok.offset
161
+ assert Integer===endpos
162
+ endpos>=tok.offset or
163
+ raise RubyLexer::LexerError, "expected >=#{tok.offset}, got #{endpos}, "\
164
+ "token #{tok}:#{tok.class}"
154
165
 
155
- file.pos=tok.offset
156
- assert tok.verify_offset(file)
157
- case tok
158
- when StringToken,NumberToken,HereBodyToken
159
- else assert(file.pos==endpos)
160
- end
161
- file.pos=oldpos
166
+ file.pos=tok.offset
167
+ tok.verify_offset(file) or raise RubyLexer::LexerError, "couldn't check offset of token #{tok.class}: #{tok}"
168
+ case tok
169
+ when RubyLexer::StringToken,RubyLexer::NumberToken,
170
+ RubyLexer::HereBodyToken,RubyLexer::SymbolToken: #do nothing
171
+ else (file.pos==endpos) or raise RubyLexer::LexerError, "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok}"
172
+ end
173
+ file.pos=oldpos
162
174
  end
163
175
 
164
176
 
@@ -166,34 +178,33 @@ end
166
178
 
167
179
 
168
180
 
169
- def tokentest(name,lexertype,pprinter,input=File.open(name),output=nil)
181
+ def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
170
182
  input ||= File.open(name)
171
- if output
172
- old_stdout=$stdout
173
- $stdout=File.open(output,'w')
183
+ if output!=$stdout
184
+ output=File.open(output,'w')
174
185
  end
175
- File.open(name) {|fd|
186
+
187
+ fd=input
188
+ #File.open(name) {|fd|
176
189
  lxr=lexertype.new(name,fd,1)
177
190
 
178
191
  begin
179
192
  tok=lxr.get1token
180
193
  lxr.check_offset(tok)
181
194
  tok.check_for_error
182
- pprinter.pprint(tok)
183
- end until EoiToken===tok
195
+ pprinter.pprint(tok,output)
196
+ end until RubyLexer::EoiToken===tok
184
197
 
185
198
  #hack for SimpleTokenPrinter....
186
- print "\n" if NewlineToken===lxr.last_operative_token and
187
- SimpleTokenPrinter===pprinter
199
+ print "\n" if RubyLexer::NewlineToken===lxr.last_operative_token and
200
+ RubyLexer::SimpleTokenPrinter===pprinter
188
201
 
189
202
  # unless lxr.balanced_braces?
190
203
  # raise "unbalanced braces at eof"
191
204
  # end
192
- }
193
- if output
194
- $stdout.close
195
- $stdout=old_stdout
196
- end
205
+ #}
206
+ output.close unless output==$stdout
207
+
197
208
  end
198
209
 
199
210
  #$ShowImplicit=false
@@ -203,7 +214,7 @@ if __FILE__==$0
203
214
  # lexertype= RumaLexer if defined? RumaLexer
204
215
  lexertype=RubyLexer
205
216
  insertnils=fd=name=nil
206
- pprinter=SimpleTokenPrinter
217
+ pprinter=RubyLexer::SimpleTokenPrinter
207
218
 
208
219
  opts=GetoptLong.new \
209
220
  ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT],
@@ -213,15 +224,15 @@ if __FILE__==$0
213
224
  ["--implicit","-i", GetoptLong::NO_ARGUMENT],
214
225
  ["--implicit-all", GetoptLong::NO_ARGUMENT]
215
226
 
227
+ saweval=nil
216
228
  opts.each do|opt,arg|
217
229
  case opt
218
230
  when '--eval' then
219
- pprinter =pprinter.new(sep,line,showzw)
220
- tokentest('-e',lexertype,pprinter,arg)
231
+ tokentest('-e',lexertype,pprinter.new(sep,line,showzw),arg)
221
232
  saweval=true
222
233
  # when '--ruby' then lexertype=RubyLexer
223
- when '--keepws' then pprinter= KeepWsTokenPrinter
224
- when '--maxws' then pprinter= KeepWsTokenPrinter;sep=' '
234
+ when '--keepws' then pprinter= RubyLexer::KeepWsTokenPrinter
235
+ when '--maxws' then pprinter= RubyLexer::KeepWsTokenPrinter;sep=' '
225
236
  when '--implicit' then showzw=1
226
237
  when '--implicit-all' then showzw=2
227
238
  else raise :impossible