rubylexer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. data/History.txt +55 -0
  2. data/Manifest.txt +67 -0
  3. data/README.txt +103 -0
  4. data/Rakefile +24 -0
  5. data/howtouse.txt +9 -6
  6. data/{assert.rb → lib/assert.rb} +11 -11
  7. data/{rubylexer.rb → lib/rubylexer.rb} +645 -342
  8. data/lib/rubylexer/0.6.2.rb +39 -0
  9. data/lib/rubylexer/0.6.rb +5 -0
  10. data/lib/rubylexer/0.7.0.rb +2 -0
  11. data/{charhandler.rb → lib/rubylexer/charhandler.rb} +4 -2
  12. data/{charset.rb → lib/rubylexer/charset.rb} +4 -3
  13. data/{context.rb → lib/rubylexer/context.rb} +48 -18
  14. data/{rubycode.rb → lib/rubylexer/rubycode.rb} +5 -3
  15. data/{rulexer.rb → lib/rubylexer/rulexer.rb} +180 -102
  16. data/{symboltable.rb → lib/rubylexer/symboltable.rb} +10 -1
  17. data/{token.rb → lib/rubylexer/token.rb} +72 -20
  18. data/{tokenprinter.rb → lib/rubylexer/tokenprinter.rb} +39 -16
  19. data/lib/rubylexer/version.rb +3 -0
  20. data/{testcode → test/code}/deletewarns.rb +0 -0
  21. data/test/code/dl_all_gems.rb +43 -0
  22. data/{testcode → test/code}/dumptokens.rb +12 -9
  23. data/test/code/locatetest +30 -0
  24. data/test/code/locatetest.rb +49 -0
  25. data/test/code/rubylexervsruby.rb +173 -0
  26. data/{testcode → test/code}/tokentest.rb +62 -51
  27. data/{testcode → test/code}/torment +8 -8
  28. data/test/code/unpack_all_gems.rb +15 -0
  29. data/{testdata → test/data}/1.rb.broken +0 -0
  30. data/{testdata → test/data}/23.rb +0 -0
  31. data/test/data/__end__.rb +2 -0
  32. data/test/data/__end__2.rb +3 -0
  33. data/test/data/and.rb +5 -0
  34. data/test/data/blockassigntest.rb +23 -0
  35. data/test/data/chunky.plain.rb +75 -0
  36. data/test/data/chunky_bacon.rb +112 -0
  37. data/test/data/chunky_bacon2.rb +112 -0
  38. data/test/data/chunky_bacon3.rb +112 -0
  39. data/test/data/chunky_bacon4.rb +112 -0
  40. data/test/data/for.rb +45 -0
  41. data/test/data/format.rb +6 -0
  42. data/{testdata → test/data}/g.rb +0 -0
  43. data/test/data/gemlist.txt +280 -0
  44. data/test/data/heart.rb +7 -0
  45. data/test/data/if.rb +6 -0
  46. data/test/data/jarh.rb +369 -0
  47. data/test/data/lbrace.rb +4 -0
  48. data/test/data/lbrack.rb +4 -0
  49. data/{testdata → test/data}/newsyntax.rb +0 -0
  50. data/{testdata → test/data}/noeolatend.rb +0 -0
  51. data/test/data/p-op.rb +8 -0
  52. data/{testdata → test/data}/p.rb +671 -79
  53. data/{testdata → test/data}/pleac.rb.broken +0 -0
  54. data/{testdata → test/data}/pre.rb +0 -0
  55. data/{testdata → test/data}/pre.unix.rb +0 -0
  56. data/{testdata → test/data}/regtest.rb +0 -0
  57. data/test/data/rescue.rb +35 -0
  58. data/test/data/s.rb +186 -0
  59. data/test/data/strinc.rb +2 -0
  60. data/{testdata → test/data}/tokentest.assert.rb.can +0 -0
  61. data/test/data/untermed_here.rb.broken +2 -0
  62. data/test/data/untermed_string.rb.broken +1 -0
  63. data/{testdata → test/data}/untitled1.rb +0 -0
  64. data/{testdata → test/data}/w.rb +0 -0
  65. data/{testdata → test/data}/wsdlDriver.rb +0 -0
  66. data/testing.txt +6 -4
  67. metadata +163 -59
  68. data/README +0 -134
  69. data/Rantfile +0 -37
  70. data/io.each_til_charset.rb +0 -247
  71. data/require.rb +0 -103
  72. data/rlold.rb +0 -12
  73. data/testcode/locatetest +0 -12
  74. data/testcode/rubylexervsruby.rb +0 -104
  75. data/testcode/rubylexervsruby.sh +0 -51
  76. data/testresults/placeholder +0 -0
@@ -0,0 +1,30 @@
1
+ test $RUBY || export RUBY=ruby
2
+ export PWD=`pwd`
3
+ export RUBYLEXERVSRUBY="$RUBY -Ilib test/code/rubylexervsruby.rb"
4
+
5
+ if $RUBY --version|grep '^ruby 1\.6'; then
6
+ echo 'error: need ruby 1.8'; exit
7
+ fi
8
+
9
+ mkdir test/results
10
+
11
+ ruby <<END > test/results/rubyscripts.txt
12
+ require 'open3'
13
+ binfiles=ENV['PATH'].split(':').map{|dir| Dir[dir+'/*']}.compact
14
+ rubyscripts=[]
15
+ Open3.popen3('file -f -'){|cin,cout,cerr|
16
+ cin.puts *binfiles
17
+ cin.flush
18
+ cin.close
19
+ cout.each{|line|
20
+ name=line[/\A([^:]+):.*ruby.*text/i,1] and rubyscripts<<name and p name
21
+ }
22
+ }
23
+ puts rubyscripts
24
+ END
25
+
26
+ #cd `dirname -- $0`
27
+
28
+ for i in test/data/p.rb `(locate tk.rb;locate examples/examples_test.rb ron.rb /generator.rb ipaddr.rb date/format.rb /optparse.rb ferret/browser.rb;locate .rb; locate rakefile; locate Rakefile; locate RAKEFILE)|egrep -v '/test/results/'; cat test/results/rubyscripts.txt; find test/data/gems -name "*.rb"`; do
29
+ $RUBYLEXERVSRUBY $i
30
+ done
@@ -0,0 +1,49 @@
1
+ require 'test/code/rubylexervsruby'
2
+ #ENV['RUBY']||='ruby'
3
+ $RUBY=ENV['RUBY']||'ruby'
4
+ #test $RUBY || export RUBY=ruby
5
+
6
+ #$RUBYLEXERVSRUBY="#$RUBY test/code/rubylexervsruby.rb"
7
+
8
+ RUBY_VERSION[/^1\.[0-7]\./] and raise 'need ruby>= 1.8'
9
+
10
+
11
+
12
+ #if RUBY_VERSION --version|grep '^ruby 1\.6'; then
13
+ # echo 'error: need ruby 1.8'; exit
14
+ #fi
15
+
16
+
17
+ RLROOT= (File.dirname $0)+'/../..'
18
+ #cd `dirname -- $0`
19
+
20
+ =begin if locate fails, we should use the algorithm from this sh code
21
+
22
+ #also look in bin and lib directories
23
+ file -L `echo $PATH":/sbin:/usr/sbin"|tr : "\n"|sort -u|xargs -i echo "{}/*"`| \
24
+ grep "ruby[^:]*script"|cut -d: -f1 > test/results/rubyexelibs
25
+
26
+ ruby -e 'print ($:.sort.uniq+[""]).join"\n"'|xargs -i ls "{}/*.rb" >> test/results/rubyexelibs
27
+
28
+ for i in `cat test/results/rubyexelibs`; do
29
+ $RUBYLEXERVSRUBY $i;
30
+ done
31
+
32
+ =end
33
+
34
+ for i in [
35
+ RLROOT+"/test/data/p.rb", *Dir["test/data/*.rb"]+`(locate /tk.rb;
36
+ locate examples/examples_test.rb;locate .rb; locate rakefile;
37
+ locate Rakefile; locate RAKEFILE)|egrep -v '/test/(results|data)/'`.
38
+ split("\n")
39
+ ] do
40
+ # system $RUBYLEXERVSRUBY, i
41
+ #hmm, rubylexervsruby needs to be upgraded to not regard an output
42
+ #consisting entirely of warnings as a failure.
43
+ #if no 'warning' (in any capitalization) for 4 or more lines
44
+ RubyLexerVsRuby.rubylexervsruby i #or fail "failed in #{i}"
45
+ end
46
+
47
+ #for i in test/data/p.rb `(locate /tk.rb;locate examples/examples_test.rb;#locate .rb; locate rakefile; locate Rakefile; locate RAKEFILE)|egrep -v '/#test/results/'`; do
48
+ # $RUBYLEXERVSRUBY $i
49
+ #done
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/ruby
2
+ #$DEBUG=$VERBOSE=true
3
+ $Debug=true
4
+ require "getoptlong"
5
+ require "test/code/tokentest"
6
+ require "test/code/deletewarns"
7
+
8
+
9
+ module RubyLexerVsRuby;end
10
+ class<<RubyLexerVsRuby
11
+ ENABLEMD5=false
12
+ def nop_ruby(cmd,input,output,stringdata)
13
+ # system %[echo "BEGIN{exit};">#{output}]
14
+ File.open(output,'w'){|f| f.write "BEGIN{exit};\n" }
15
+ if stringdata
16
+ File.open(output,'a'){|f| f.write stringdata }
17
+ else
18
+ system [cmd,'"'+input+'"','>>',output].join(' ')
19
+ end
20
+ end
21
+
22
+ def ruby_parsedump(input,output,ruby)
23
+ #todo: use ruby's md5 lib
24
+ #recursive ruby call here is unavoidable because -y flag has to be set
25
+
26
+ #do nothing if input unchanged
27
+ ENABLEMD5 and system "md5sum -c #{input}.md5 2>/dev/null" and return
28
+
29
+ status=0
30
+ IO.popen("#{ruby} -w -y < #{input} 2>&1"){ |pipe|
31
+ File.open(output,"w") { |outfd|
32
+ pipe.each{ |line|
33
+ outfd.print(line) \
34
+ if /^Shifting|^#{DeleteWarns::WARNERRREX}/o===line
35
+ #elsif /(warning|error)/i===line
36
+ # raise("a warning or error, appearently, not caught by rex above: "+line)
37
+ }
38
+ pid,status=Process.waitpid2 pipe.pid #get err status of subprocess
39
+ }
40
+ }
41
+ ENABLEMD5 and status==0 and system "md5sum #{input} > #{input}.md5" #compute sum only if no errors
42
+ end
43
+
44
+ def head(fname)
45
+ print "foobaaaaaaaaar\n\n\n\n\n\n"
46
+ File.open(fname){|fd| print(fd.read(512)+"\n") }
47
+ end
48
+
49
+ def rubylexervsruby(input,stringdata=nil,&ignore_it)
50
+
51
+ #cmdpath= `which #$0`
52
+ cmddir=Dir.getwd+"/test/code/"
53
+ base='test/results/'+File.basename(input)
54
+ _ttfile=base+'.tt'
55
+ mttfile=base+'.mtt'
56
+ p_ttfile=_ttfile+'.prs'
57
+ pmttfile=mttfile+'.prs'
58
+ p_ttdiff=p_ttfile+'.diff'
59
+ pmttdiff=pmttfile+'.diff'
60
+ nopfile=base+'.nop'
61
+ origfile=nopfile+'.prs'
62
+ ruby=ENV['RUBY'] || 'ruby'
63
+ expected_failures=Dir.getwd+"/test/code/"+File.basename(input)+".expected_failures"
64
+
65
+ #olddir=Dir.pwd
66
+ #Dir.chdir cmddir + '/../..'
67
+
68
+ nop_ruby "#{input[/\.gz$/]&&'z'}cat", input, nopfile, stringdata
69
+
70
+ print "executing: #{ruby} -Ilib test/code/tokentest.rb --keepws #{input}\n"
71
+
72
+ ruby_parsedump nopfile, origfile, ruby
73
+
74
+
75
+
76
+ tokentest nopfile, RubyLexer, RubyLexer::KeepWsTokenPrinter.new, nil, _ttfile
77
+ tokentest nopfile, RubyLexer, RubyLexer::KeepWsTokenPrinter.new(' '), nil, mttfile
78
+
79
+
80
+ ruby_parsedump _ttfile, p_ttfile, ruby
81
+ ruby_parsedump mttfile, pmttfile, ruby
82
+
83
+ if File.exists?(p_ttfile)
84
+ IO.popen("diff -u1 -b #{origfile} #{p_ttfile}"){ |pipe|
85
+ File.open(p_ttdiff,"w") { |diff|
86
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
87
+ }
88
+ }
89
+ # File.unlink p_ttfile
90
+ end
91
+
92
+ if File.exists?(pmttfile)
93
+ IO.popen("diff -u1 -b #{origfile} #{pmttfile}"){ |pipe|
94
+ File.open(pmttdiff,"w") { |diff|
95
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
96
+ }
97
+ }
98
+ # File.unlink pmttfile
99
+ end
100
+
101
+ list=[]
102
+ #nonwarn4=/(^(?![^\n]*warning[^\n]*)[^\n]*\n){4,}/im
103
+ #4 or more non-warning lines:
104
+ nonwarn4=/^(?:(?![^\r\n]*warning)[^\r\n]+(?:\r\n?|\n\r?)){4,}/mi
105
+ result=true
106
+ for name in [p_ttdiff,pmttdiff] do
107
+ i=File.read(name)
108
+ # i.tr("\r","\n")
109
+ # i.gsub!(/^\n/m, '')
110
+ i.sub!(/\A([^\r\n]+(\r\n?|\n\r?)){2}/, '') #remove 1st 2 lines
111
+ i.scan nonwarn4 do |j|
112
+ unless ignore_it && ignore_it[j]
113
+ list.push( *j.split(/\r\n?|\n\r?/) ) #unless list.size>=10
114
+ end
115
+ end
116
+
117
+ unless list.empty?
118
+ list=list.join("\n") +"\n"
119
+ unless (File.exists?(expected_failures) and File.read(expected_failures))==list
120
+ print list
121
+ result=false
122
+ end
123
+ list=[]
124
+ end
125
+ end
126
+
127
+ #print( list.join("\n") +"\n")
128
+ #Dir.chdir olddir
129
+ return result
130
+
131
+ =begin
132
+ case File.zero?(p_ttdiff).to_s +
133
+ File.zero?(pmttdiff).to_s
134
+ when 'falsefalse' then
135
+ head p_ttdiff
136
+ print "omitting #{pmttdiff}\n"
137
+ when 'falsetrue'
138
+ head p_ttdiff
139
+ when 'truefalse'
140
+ head pmttdiff
141
+ when 'truetrue'
142
+ #File.unlink origfile
143
+ return true
144
+ default
145
+ raise "unexpected 2bool val"
146
+ end
147
+ return false
148
+ =end
149
+
150
+ rescue Exception
151
+ system "ruby -c #{input} >/dev/null 2>&1" or expected="(expected) "
152
+ print "#{expected}error in: #{input}\n"
153
+ raise unless expected
154
+ end
155
+ end
156
+
157
+ if __FILE__==$0
158
+ #allow -e
159
+ stringdata=input=nil
160
+ opts=GetoptLong.new(["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT])
161
+ opts.each{|opt,arg|
162
+ opt=='--eval' or raise :impossible
163
+ stringdata=arg
164
+ input='-e'
165
+ }
166
+
167
+ input||=ARGV[0]
168
+ RubyLexerVsRuby.rubylexervsruby(input,stringdata) and exit 0
169
+
170
+ exit 1
171
+ end
172
+
173
+
@@ -1,24 +1,31 @@
1
- #!/usr/bin/ruby -dw
1
+ #!/usr/bin/ruby
2
+ $Debug=true
2
3
  require "rubylexer"
3
- # require "rumalexer"
4
- require "token"
5
- require "tokenprinter"
6
4
  require "getoptlong"
7
5
  require "pp"
8
6
 
7
+ class RubyLexer
9
8
  class Token
10
9
  def verify_offset(fd); false end
11
10
 
12
11
  def check_for_error; end
13
12
  end
14
13
 
14
+ class LexerError<Exception; end
15
+
15
16
  module ErrorToken
16
- def check_for_error; raise @error end
17
+ def check_for_error; raise LexerError,@error end
17
18
  end
18
19
 
19
20
  class FileAndLineToken
20
21
  def verify_offset(fd); true end
21
22
  end
23
+ class ImplicitParamListStartToken
24
+ def verify_offset(fd); true end
25
+ end
26
+ class ImplicitParamListEndToken
27
+ def verify_offset(fd); true end
28
+ end
22
29
 
23
30
  module SimpleVerify
24
31
  def verify_offset(fd)
@@ -33,8 +40,13 @@ class MethNameToken; include SimpleVerify; end
33
40
 
34
41
  class SymbolToken
35
42
  def verify_offset(fd)
36
- readsym=fd.read(@ident.length)
37
- @ident[1]==?" or @ident[1]==?' or readsym==@ident
43
+ la=fd.read(2)
44
+ case la
45
+ when '%s': #stay right here
46
+ when /^:/: fd.pos-=1
47
+ else raise 'unrecognized symbol type'
48
+ end
49
+ @ident[1]==?" or @ident[1]==?' or fd.read(@ident.length-1)==@ident[1..-1]
38
50
 
39
51
  end
40
52
  end
@@ -42,7 +54,7 @@ end
42
54
  class EoiToken
43
55
  def verify_offset(fd)
44
56
  result=super(fd)
45
- fd.pos=fd.stat.size
57
+ fd.eof?
46
58
  return result
47
59
  end
48
60
  end
@@ -66,12 +78,12 @@ end
66
78
  class HerePlaceholderToken
67
79
  def verify_offset(fd)
68
80
  '<<'==fd.read(2) or return false
69
- @dash and (?-==fd.getc or return false)
70
- case ch=fd.getc
81
+ @dash and (?-==fd.read1 or return false)
82
+ case ch=fd.read1
71
83
  when ?', ?`, ?"
72
- @quote==ch.chr or return false
73
- fd.read(@ender.size)==@ender or return false
74
- return fd.getc.chr==@quote
84
+ @quote==ch.chr and
85
+ fd.read(@ender.size)==@ender and
86
+ return fd.read(1)==@quote
75
87
  when ?a..?z, ?A..?Z, ?_, ?0..?9
76
88
  @quote=='"' or return false
77
89
  fd.pos-=1
@@ -93,7 +105,7 @@ class StringToken
93
105
 
94
106
  def verify_subtoken_offsets(fd)
95
107
  #verify offsets of subtokens
96
- 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) }
108
+ 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) or raise LexerError}
97
109
  return true
98
110
  end
99
111
 
@@ -137,28 +149,28 @@ end
137
149
  # $ShowImplicit ? explicit_form : super
138
150
  # end
139
151
  #end
140
-
141
- class RuLexer
142
- def check_offset(tok,file=@file)
143
- endpos=(@moretokens.empty?)? file.pos : @moretokens[0].offset
144
- super(tok,file,endpos)
145
- end
146
152
  end
153
+ public
147
154
 
148
- def check_offset(tok,file,endpos)
149
- oldpos=file.pos
155
+ def check_offset(tok,file=nil,endpos=nil)
156
+ file||=@file
157
+ endpos||=(@moretokens.empty?)? file.pos : @moretokens[0].offset
158
+ oldpos=file.pos
150
159
 
151
- assert Integer===tok.offset
152
- assert Integer===endpos
153
- assert endpos>=tok.offset
160
+ assert Integer===tok.offset
161
+ assert Integer===endpos
162
+ endpos>=tok.offset or
163
+ raise RubyLexer::LexerError, "expected >=#{tok.offset}, got #{endpos}, "\
164
+ "token #{tok}:#{tok.class}"
154
165
 
155
- file.pos=tok.offset
156
- assert tok.verify_offset(file)
157
- case tok
158
- when StringToken,NumberToken,HereBodyToken
159
- else assert(file.pos==endpos)
160
- end
161
- file.pos=oldpos
166
+ file.pos=tok.offset
167
+ tok.verify_offset(file) or raise RubyLexer::LexerError, "couldn't check offset of token #{tok.class}: #{tok}"
168
+ case tok
169
+ when RubyLexer::StringToken,RubyLexer::NumberToken,
170
+ RubyLexer::HereBodyToken,RubyLexer::SymbolToken: #do nothing
171
+ else (file.pos==endpos) or raise RubyLexer::LexerError, "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok}"
172
+ end
173
+ file.pos=oldpos
162
174
  end
163
175
 
164
176
 
@@ -166,34 +178,33 @@ end
166
178
 
167
179
 
168
180
 
169
- def tokentest(name,lexertype,pprinter,input=File.open(name),output=nil)
181
+ def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
170
182
  input ||= File.open(name)
171
- if output
172
- old_stdout=$stdout
173
- $stdout=File.open(output,'w')
183
+ if output!=$stdout
184
+ output=File.open(output,'w')
174
185
  end
175
- File.open(name) {|fd|
186
+
187
+ fd=input
188
+ #File.open(name) {|fd|
176
189
  lxr=lexertype.new(name,fd,1)
177
190
 
178
191
  begin
179
192
  tok=lxr.get1token
180
193
  lxr.check_offset(tok)
181
194
  tok.check_for_error
182
- pprinter.pprint(tok)
183
- end until EoiToken===tok
195
+ pprinter.pprint(tok,output)
196
+ end until RubyLexer::EoiToken===tok
184
197
 
185
198
  #hack for SimpleTokenPrinter....
186
- print "\n" if NewlineToken===lxr.last_operative_token and
187
- SimpleTokenPrinter===pprinter
199
+ print "\n" if RubyLexer::NewlineToken===lxr.last_operative_token and
200
+ RubyLexer::SimpleTokenPrinter===pprinter
188
201
 
189
202
  # unless lxr.balanced_braces?
190
203
  # raise "unbalanced braces at eof"
191
204
  # end
192
- }
193
- if output
194
- $stdout.close
195
- $stdout=old_stdout
196
- end
205
+ #}
206
+ output.close unless output==$stdout
207
+
197
208
  end
198
209
 
199
210
  #$ShowImplicit=false
@@ -203,7 +214,7 @@ if __FILE__==$0
203
214
  # lexertype= RumaLexer if defined? RumaLexer
204
215
  lexertype=RubyLexer
205
216
  insertnils=fd=name=nil
206
- pprinter=SimpleTokenPrinter
217
+ pprinter=RubyLexer::SimpleTokenPrinter
207
218
 
208
219
  opts=GetoptLong.new \
209
220
  ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT],
@@ -213,15 +224,15 @@ if __FILE__==$0
213
224
  ["--implicit","-i", GetoptLong::NO_ARGUMENT],
214
225
  ["--implicit-all", GetoptLong::NO_ARGUMENT]
215
226
 
227
+ saweval=nil
216
228
  opts.each do|opt,arg|
217
229
  case opt
218
230
  when '--eval' then
219
- pprinter =pprinter.new(sep,line,showzw)
220
- tokentest('-e',lexertype,pprinter,arg)
231
+ tokentest('-e',lexertype,pprinter.new(sep,line,showzw),arg)
221
232
  saweval=true
222
233
  # when '--ruby' then lexertype=RubyLexer
223
- when '--keepws' then pprinter= KeepWsTokenPrinter
224
- when '--maxws' then pprinter= KeepWsTokenPrinter;sep=' '
234
+ when '--keepws' then pprinter= RubyLexer::KeepWsTokenPrinter
235
+ when '--maxws' then pprinter= RubyLexer::KeepWsTokenPrinter;sep=' '
225
236
  when '--implicit' then showzw=1
226
237
  when '--implicit-all' then showzw=2
227
238
  else raise :impossible