rubylexer 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
data/symboltable.rb ADDED
@@ -0,0 +1,65 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+ class SymbolTable
22
+ def initialize
23
+ #note: below Stack means Array (used as a stack)
24
+ @symbols={} #Hash of String to Stack of Object(user-defined)
25
+ @locals_lists=[{}] #Stack of Hash of String to Boolean
26
+ end
27
+
28
+ def start_block
29
+ assert @locals_lists.last
30
+ @locals_lists.push({})
31
+ assert @locals_lists.last
32
+ end
33
+
34
+ def end_block
35
+ assert @locals_lists.last
36
+ list=@locals_lists.pop
37
+ list or raise "unbalanced end block"
38
+ list.each_key {|sym|
39
+ @symbols[sym].pop
40
+ @symbols[sym].empty? and @symbols[sym]=nil
41
+ }
42
+ assert @locals_lists.last
43
+ end
44
+
45
+ def [](name)
46
+ assert @locals_lists.last
47
+ (stack=@symbols[name]) and stack.last
48
+ end
49
+
50
+ alias === []
51
+
52
+ def []=(name, val)
53
+ assert @locals_lists.last
54
+ if @locals_lists.last and @locals_lists.last[name]
55
+ #already defined in this block
56
+ @symbols[name][-1]=val #overwrite current value
57
+ else
58
+ stack=(@symbols[name] ||= [])
59
+ stack.push val
60
+ @locals_lists.last[name]=true
61
+ end
62
+ assert @locals_lists.last
63
+ return val
64
+ end
65
+ end
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module DeleteWarns
4
+ FN='[^\n]+'
5
+ DATETIME='\d+-\d\d?-\d\d? \d\d:\d\d:\d\d\.\d+ -?\d+'
6
+ INDENTLINE='(?: [^\n]*\n)'
7
+
8
+ WARNERRREX='(?:Reading a token: )?-:(\d+): (warning|(?:syntax )error)(?:: ([^\n]+))?'
9
+
10
+ RE=%r"(?#--- #{FN} #{DATETIME}
11
+ \+\+\+ #{FN} #{DATETIME}
12
+ )^@@ -\d+,\d+ \+\d+,\d+ @@
13
+ #{INDENTLINE}+\
14
+ -(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
15
+ \+(?:Reading a token: )?-:(\d+): \2: \3
16
+ #{INDENTLINE}+"mo
17
+
18
+ RE2=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
19
+ #{INDENTLINE}*\
20
+ \+#{WARNERRREX}\n\
21
+ #{INDENTLINE}*"mo
22
+
23
+ RE3=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
24
+ #{INDENTLINE}+\
25
+ -(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
26
+ #{INDENTLINE}+"mo
27
+
28
+ def DeleteWarns.deletewarns(input)
29
+ input.each('\n--- ') {|match|
30
+ yield match.gsub(RE,"\\2 moved from \\1 to \\4: \\3\n") \
31
+ .gsub(RE2,"Created \\2(s) in new file, line \\1: \\3\n") \
32
+ .gsub(RE3,"Removed \\2(s) from old file (?!), line \\1: \\3\n")
33
+ }
34
+ end
35
+ end
36
+
37
+ if __FILE__==$0
38
+ DeleteWarns.deletewarns($stdin){|s| $stdout.print s}
39
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby -dw
2
+ require 'rubylexer'
3
+ require 'getoptlong'
4
+
5
+
6
+
7
+ #a Token#inspect that omits the object id
8
+ class Token
9
+ def inspect
10
+ ["#<",self.class,": ",instance_variables.sort.collect{|v|
11
+ [v,"=",instance_variable_get(v).inspect," "]
12
+ }].to_s.sub(/ $/,'>')
13
+ end
14
+ end
15
+
16
+ file=nil
17
+
18
+ #allow -e
19
+ opts=GetoptLong.new(["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT])
20
+ opts.each{|opt,arg|
21
+ opt=='--eval' or raise :impossible
22
+ file=arg
23
+ name='-e'
24
+ }
25
+
26
+ #determine input file and its name if not already known
27
+ file||=if name=ARGV.first
28
+ File.open(name)
29
+ else
30
+ name='-'
31
+ $stdout
32
+ end
33
+
34
+ lexer=RubyLexer.new(name, file)
35
+ until EoiToken===(tok=lexer.get1token)
36
+ p tok
37
+ end
38
+ p tok #print eoi token
@@ -0,0 +1,12 @@
1
+ test $RUBY || export RUBY=ruby
2
+ export RUBYLEXERVSRUBY="$RUBY testcode/rubylexervsruby.rb"
3
+
4
+ if $RUBY --version|grep '^ruby 1\.6'; then
5
+ echo 'error: need ruby 1.8'; exit
6
+ fi
7
+
8
+
9
+
10
+ for i in testdata/p.rb `(locate tk.rb;locate examples/examples_test.rb;locate .rb)|egrep -v '/testresults/|files/patch-'`; do
11
+ $RUBYLEXERVSRUBY $i
12
+ done
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/ruby -dw
2
+
3
+ require "testcode/tokentest"
4
+ require "testcode/deletewarns"
5
+
6
+ ENABLEMD5=false
7
+
8
+ #these remain globals only for ease in interpolation
9
+ $cmdpath= `which #$0`
10
+ $cmddir=File.dirname $cmdpath
11
+ $input=ARGV[0]
12
+ $base='testresults/'+File.basename($input)
13
+ $_ttfile=$base+'.tt'
14
+ $mttfile=$base+'.mtt'
15
+ $p_ttfile=$_ttfile+'.prs'
16
+ $pmttfile=$mttfile+'.prs'
17
+ $p_ttdiff=$p_ttfile+'.diff'
18
+ $pmttdiff=$pmttfile+'.diff'
19
+ $nopfile=$base+'.nop'
20
+ $origfile=$nopfile+'.prs'
21
+ $ruby=ENV['RUBY'] || 'ruby'
22
+
23
+ def nop_ruby(cmd,input,output)
24
+ File.open(output,'w'){|f| f.print "BEGIN{exit};\n" }
25
+ system [cmd,input,'>>',output].join(' ')
26
+ end
27
+
28
+ def ruby_parsedump(input,output)
29
+ #todo: use ruby's md5 lib
30
+ #recursive ruby call here is unavoidable because -y flag has to be set
31
+
32
+ #do nothing if input unchanged
33
+ ENABLEMD5 and system "md5sum -c #{input}.md5 2>/dev/null" and return
34
+
35
+ status=0
36
+ IO.popen("#$ruby -w -y < #{input} 2>&1"){ |pipe| File.open(output,"w") { |outfd|
37
+ pipe.each{ |line|
38
+ outfd.print(line) \
39
+ if /^Shifting|^#{DeleteWarns::WARNERRREX}/o===line
40
+ #elsif /(warning|error)/i===line
41
+ # raise("a warning or error, appearently, not caught by rex above: "+line)
42
+ }
43
+ pid,status=Process.waitpid2 pipe.pid #get err status of subprocess
44
+ } }
45
+ ENABLEMD5 and status==0 and system "md5sum #{input} > #{input}.md5" #compute sum only if no errors
46
+ end
47
+
48
+ Dir.chdir $cmddir + '/..'
49
+
50
+ nop_ruby "#{$input[/\.gz$/]&&'z'}cat", $input, $nopfile
51
+
52
+ print "executing: #$ruby testcode/tokentest.rb --keepws #$input\n"
53
+
54
+ ruby_parsedump $nopfile, $origfile
55
+
56
+
57
+
58
+ tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new, nil, $_ttfile
59
+ tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new(' '), nil, $mttfile
60
+
61
+
62
+ ruby_parsedump $_ttfile, $p_ttfile
63
+ ruby_parsedump $mttfile, $pmttfile
64
+
65
+ if File.exists?($p_ttfile)
66
+ IO.popen("diff -u1 -b #$origfile #$p_ttfile"){ |pipe|
67
+ File.open($p_ttdiff,"w") { |diff|
68
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
69
+ }
70
+ }
71
+ File.unlink $p_ttfile
72
+ end
73
+
74
+ if File.exists?($pmttfile)
75
+ IO.popen("diff -u1 -b #$origfile #$pmttfile"){ |pipe|
76
+ File.open($pmttdiff,"w") { |diff|
77
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
78
+ }
79
+ }
80
+ File.unlink $pmttfile
81
+ end
82
+
83
+ def head(fname)
84
+ File.open(fname){|fd| 10.times{ print((fd.gets or break)) } }
85
+ end
86
+
87
+ case File.zero?($p_ttdiff).to_s +
88
+ File.zero?($pmttdiff).to_s
89
+ when 'falsefalse' then
90
+ head $p_ttdiff
91
+ print "omitting #$pmttdiff\n"
92
+ when 'falsetrue'
93
+ head $p_ttdiff
94
+ when 'truefalse'
95
+ head $pmttdiff
96
+ when 'truetrue'
97
+ #File.unlink $origfile
98
+ exit(0)
99
+ default
100
+ raise "unexpected 2bool val"
101
+ end
102
+
103
+ exit 1
104
+
@@ -0,0 +1,51 @@
1
+ #!/bin/sh
2
+ #set -v
3
+ function nop_ruby() { (echo "BEGIN{exit};"; exec $1 $2); }
4
+ function ruby_parsedump() {
5
+ md5sum -c $1.md5 2>/dev/null && return #do nothing if input unchanged
6
+ ruby -w -y < $1 2>&1 | grep ^Shift|cut -d" " -f3 >$2
7
+ md5sum $1 > $1.md5
8
+ }
9
+
10
+ cmdpath=`which $0`
11
+ cd `dirname $cmdpath`/..
12
+
13
+ cmddir=`dirname $cmdpath`
14
+ base=testresults/`basename $1`
15
+ origfile=$base.nop.prs
16
+ _ttfile=$base.tt
17
+ mttfile=$base.mtt
18
+ p_ttfile=$_ttfile.prs
19
+ pmttfile=$mttfile.prs
20
+ nopfile=$base.nop
21
+
22
+ #if awk "BEGIN{if(\"$1\" ~! /\.gz$/) exit(1)}"; then
23
+ if ruby -e "'$1'[/\.gz\$/] or exit 1"; then
24
+ nop_ruby zcat $1 > $nopfile ;
25
+ else
26
+ nop_ruby cat $1 > $nopfile ;
27
+ fi
28
+
29
+
30
+
31
+ echo executing: testcode/tokentest.rb --ruby --keepws $1 '\|'
32
+ testcode/tokentest.rb --ruby --keepws $nopfile > $_ttfile
33
+ testcode/tokentest.rb --ruby --maxws $nopfile > $mttfile
34
+ ruby_parsedump $nopfile $origfile
35
+ ruby_parsedump $_ttfile $p_ttfile
36
+ ruby_parsedump $mttfile $pmttfile
37
+
38
+ [ -e $p_ttfile ] && (diff -ub $origfile $p_ttfile | \
39
+ testcode/deletewarns.rb > $p_ttfile.diff
40
+ rm $p_ttfile
41
+ )
42
+ [ -e $pmttfile ] && (diff -ub $origfile $pmttfile | \
43
+ testcode/deletewarns.rb > $pmttfile.diff
44
+ rm $pmttfile
45
+ )
46
+ head $p_ttfile.diff
47
+ test -s $p_ttfile.diff || head $pmttfile.diff
48
+ test -s $p_ttfile.diff && test -s $pmttfile.diff && echo omitting $pmttfile.diff
49
+
50
+ test -z $p_ttfile.diff && test -z $pmttfile.diff && exit 0
51
+ exit 1
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/ruby -dw
2
+ require "rubylexer"
3
+ # require "rumalexer"
4
+ require "token"
5
+ require "tokenprinter"
6
+ require "getoptlong"
7
+ require "pp"
8
+
9
+ class Token
10
+ def verify_offset(fd); false end
11
+
12
+ def check_for_error; end
13
+ end
14
+
15
+ module ErrorToken
16
+ def check_for_error; raise @error end
17
+ end
18
+
19
+ class FileAndLineToken
20
+ def verify_offset(fd); true end
21
+ end
22
+
23
+ module SimpleVerify
24
+ def verify_offset(fd)
25
+ fd.read(@ident.length)==@ident
26
+ end
27
+ end
28
+
29
+ class WToken; include SimpleVerify; end
30
+ class NewlineToken; include SimpleVerify; end
31
+ class IgnoreToken; include SimpleVerify; end
32
+ class MethNameToken; include SimpleVerify; end
33
+
34
+ class SymbolToken
35
+ def verify_offset(fd)
36
+ readsym=fd.read(@ident.length)
37
+ @ident[1]==?" or @ident[1]==?' or readsym==@ident
38
+
39
+ end
40
+ end
41
+
42
+ class EoiToken
43
+ def verify_offset(fd)
44
+ result=super(fd)
45
+ fd.pos=fd.stat.size
46
+ return result
47
+ end
48
+ end
49
+
50
+ class NoWsToken
51
+ def verify_offset(fd)
52
+ orig=fd.pos
53
+ fd.pos=orig-1
54
+ result= (/^[^\s\v\t\n\r\f]{2}$/===fd.read(2))
55
+ fd.pos=orig
56
+ return result
57
+ end
58
+ end
59
+
60
+ class HereBodyToken
61
+ def verify_offset(fd)
62
+ @ident.verify_subtoken_offsets(fd)
63
+ end
64
+ end
65
+
66
+ class HerePlaceholderToken
67
+ def verify_offset(fd)
68
+ '<<'==fd.read(2) or return false
69
+ @dash and (?-==fd.getc or return false)
70
+ case ch=fd.getc
71
+ when ?', ?`, ?"
72
+ @quote==ch.chr or return false
73
+ fd.read(@ender.size)==@ender or return false
74
+ return fd.getc.chr==@quote
75
+ when ?a..?z, ?A..?Z, ?_, ?0..?9
76
+ @quote=='"' or return false
77
+ fd.pos-=1
78
+ fd.read(@ender.size)==@ender or return false
79
+ else
80
+ return false
81
+ end
82
+ end
83
+ end
84
+
85
+ class StringToken
86
+ FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
87
+ '"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
88
+ def verify_offset(fd)
89
+ str=fd.read(2)
90
+ @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
91
+ verify_subtoken_offsets(fd)
92
+ end
93
+
94
+ def verify_subtoken_offsets(fd)
95
+ #verify offsets of subtokens
96
+ 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) }
97
+ return true
98
+ end
99
+
100
+ def check_for_error
101
+ 1.step(@elems.size-1,2){|idx|
102
+ @elems[idx].check_for_error
103
+ }
104
+ super
105
+ end
106
+ end
107
+
108
+ class RubyCode
109
+ def verify_offset(fd)
110
+ thistok=nexttok=endpos=nil
111
+ @ident.each_index{ |tok_i|
112
+ thistok,nexttok=@ident[tok_i,2]
113
+ endpos=nexttok ? nexttok.offset : thistok.offset+thistok.to_s.size
114
+ check_offset(thistok,fd,endpos)
115
+ }
116
+ assert nexttok.nil?
117
+ assert thistok.object_id==@ident.last.object_id
118
+ assert WToken===thistok
119
+ fd.pos=endpos
120
+ end
121
+
122
+ def check_for_error
123
+ @ident.each{|tok| tok.check_for_error }
124
+ end
125
+ end
126
+
127
+
128
+ class NumberToken
129
+ def verify_offset(fd)
130
+ /^[0-9?+-]$/===fd.read(1)
131
+ end
132
+ end
133
+
134
+
135
+ #class ZwToken
136
+ # def to_s
137
+ # $ShowImplicit ? explicit_form : super
138
+ # end
139
+ #end
140
+
141
+ class RuLexer
142
+ def check_offset(tok,file=@file)
143
+ endpos=(@moretokens.empty?)? file.pos : @moretokens[0].offset
144
+ super(tok,file,endpos)
145
+ end
146
+ end
147
+
148
+ def check_offset(tok,file,endpos)
149
+ oldpos=file.pos
150
+
151
+ assert Integer===tok.offset
152
+ assert Integer===endpos
153
+ assert endpos>=tok.offset
154
+
155
+ file.pos=tok.offset
156
+ assert tok.verify_offset(file)
157
+ case tok
158
+ when StringToken,NumberToken,HereBodyToken
159
+ else assert(file.pos==endpos)
160
+ end
161
+ file.pos=oldpos
162
+ end
163
+
164
+
165
+
166
+
167
+
168
+
169
+ def tokentest(name,lexertype,pprinter,input=File.open(name),output=nil)
170
+ input ||= File.open(name)
171
+ if output
172
+ old_stdout=$stdout
173
+ $stdout=File.open(output,'w')
174
+ end
175
+ File.open(name) {|fd|
176
+ lxr=lexertype.new(name,fd,1)
177
+
178
+ begin
179
+ tok=lxr.get1token
180
+ lxr.check_offset(tok)
181
+ tok.check_for_error
182
+ pprinter.pprint(tok)
183
+ end until EoiToken===tok
184
+
185
+ #hack for SimpleTokenPrinter....
186
+ print "\n" if NewlineToken===lxr.last_operative_token and
187
+ SimpleTokenPrinter===pprinter
188
+
189
+ # unless lxr.balanced_braces?
190
+ # raise "unbalanced braces at eof"
191
+ # end
192
+ }
193
+ if output
194
+ $stdout.close
195
+ $stdout=old_stdout
196
+ end
197
+ end
198
+
199
+ #$ShowImplicit=false
200
+ if __FILE__==$0
201
+
202
+ sep,line,showzw='',1,0
203
+ # lexertype= RumaLexer if defined? RumaLexer
204
+ lexertype=RubyLexer
205
+ insertnils=fd=name=nil
206
+ pprinter=SimpleTokenPrinter
207
+
208
+ opts=GetoptLong.new \
209
+ ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT],
210
+ # ["--ruby","-r", GetoptLong::NO_ARGUMENT],
211
+ ["--keepws","-k", GetoptLong::NO_ARGUMENT],
212
+ ["--maxws","-m", GetoptLong::NO_ARGUMENT],
213
+ ["--implicit","-i", GetoptLong::NO_ARGUMENT],
214
+ ["--implicit-all", GetoptLong::NO_ARGUMENT]
215
+
216
+ opts.each do|opt,arg|
217
+ case opt
218
+ when '--eval' then
219
+ pprinter =pprinter.new(sep,line,showzw)
220
+ tokentest('-e',lexertype,pprinter,arg)
221
+ saweval=true
222
+ # when '--ruby' then lexertype=RubyLexer
223
+ when '--keepws' then pprinter= KeepWsTokenPrinter
224
+ when '--maxws' then pprinter= KeepWsTokenPrinter;sep=' '
225
+ when '--implicit' then showzw=1
226
+ when '--implicit-all' then showzw=2
227
+ else raise :impossible
228
+ end
229
+ end
230
+
231
+ pprinter =pprinter.new(sep,line,showzw)
232
+
233
+ ARGV.empty? ? saweval || tokentest('-',lexertype,pprinter,$stdin) :
234
+ ARGV.each{|fn| tokentest(fn,lexertype,pprinter) }
235
+ # ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer #filename with _rb are special hack
236
+
237
+ end