rubylexer 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/symboltable.rb ADDED
@@ -0,0 +1,65 @@
1
+ =begin copyright
2
+ rubylexer - a ruby lexer written in ruby
3
+ Copyright (C) 2004,2005 Caleb Clausen
4
+
5
+ This library is free software; you can redistribute it and/or
6
+ modify it under the terms of the GNU Lesser General Public
7
+ License as published by the Free Software Foundation; either
8
+ version 2.1 of the License, or (at your option) any later version.
9
+
10
+ This library is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
+ Lesser General Public License for more details.
14
+
15
+ You should have received a copy of the GNU Lesser General Public
16
+ License along with this library; if not, write to the Free Software
17
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
+ =end
19
+
20
+
21
+ class SymbolTable
22
+ def initialize
23
+ #note: below Stack means Array (used as a stack)
24
+ @symbols={} #Hash of String to Stack of Object(user-defined)
25
+ @locals_lists=[{}] #Stack of Hash of String to Boolean
26
+ end
27
+
28
+ def start_block
29
+ assert @locals_lists.last
30
+ @locals_lists.push({})
31
+ assert @locals_lists.last
32
+ end
33
+
34
+ def end_block
35
+ assert @locals_lists.last
36
+ list=@locals_lists.pop
37
+ list or raise "unbalanced end block"
38
+ list.each_key {|sym|
39
+ @symbols[sym].pop
40
+ @symbols[sym].empty? and @symbols[sym]=nil
41
+ }
42
+ assert @locals_lists.last
43
+ end
44
+
45
+ def [](name)
46
+ assert @locals_lists.last
47
+ (stack=@symbols[name]) and stack.last
48
+ end
49
+
50
+ alias === []
51
+
52
+ def []=(name, val)
53
+ assert @locals_lists.last
54
+ if @locals_lists.last and @locals_lists.last[name]
55
+ #already defined in this block
56
+ @symbols[name][-1]=val #overwrite current value
57
+ else
58
+ stack=(@symbols[name] ||= [])
59
+ stack.push val
60
+ @locals_lists.last[name]=true
61
+ end
62
+ assert @locals_lists.last
63
+ return val
64
+ end
65
+ end
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module DeleteWarns
4
+ FN='[^\n]+'
5
+ DATETIME='\d+-\d\d?-\d\d? \d\d:\d\d:\d\d\.\d+ -?\d+'
6
+ INDENTLINE='(?: [^\n]*\n)'
7
+
8
+ WARNERRREX='(?:Reading a token: )?-:(\d+): (warning|(?:syntax )error)(?:: ([^\n]+))?'
9
+
10
+ RE=%r"(?#--- #{FN} #{DATETIME}
11
+ \+\+\+ #{FN} #{DATETIME}
12
+ )^@@ -\d+,\d+ \+\d+,\d+ @@
13
+ #{INDENTLINE}+\
14
+ -(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
15
+ \+(?:Reading a token: )?-:(\d+): \2: \3
16
+ #{INDENTLINE}+"mo
17
+
18
+ RE2=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
19
+ #{INDENTLINE}*\
20
+ \+#{WARNERRREX}\n\
21
+ #{INDENTLINE}*"mo
22
+
23
+ RE3=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
24
+ #{INDENTLINE}+\
25
+ -(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
26
+ #{INDENTLINE}+"mo
27
+
28
+ def DeleteWarns.deletewarns(input)
29
+ input.each('\n--- ') {|match|
30
+ yield match.gsub(RE,"\\2 moved from \\1 to \\4: \\3\n") \
31
+ .gsub(RE2,"Created \\2(s) in new file, line \\1: \\3\n") \
32
+ .gsub(RE3,"Removed \\2(s) from old file (?!), line \\1: \\3\n")
33
+ }
34
+ end
35
+ end
36
+
37
+ if __FILE__==$0
38
+ DeleteWarns.deletewarns($stdin){|s| $stdout.print s}
39
+ end
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby -dw
2
+ require 'rubylexer'
3
+ require 'getoptlong'
4
+
5
+
6
+
7
+ #a Token#inspect that omits the object id
8
+ class Token
9
+ def inspect
10
+ ["#<",self.class,": ",instance_variables.sort.collect{|v|
11
+ [v,"=",instance_variable_get(v).inspect," "]
12
+ }].to_s.sub(/ $/,'>')
13
+ end
14
+ end
15
+
16
+ file=nil
17
+
18
+ #allow -e
19
+ opts=GetoptLong.new(["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT])
20
+ opts.each{|opt,arg|
21
+ opt=='--eval' or raise :impossible
22
+ file=arg
23
+ name='-e'
24
+ }
25
+
26
+ #determine input file and its name if not already known
27
+ file||=if name=ARGV.first
28
+ File.open(name)
29
+ else
30
+ name='-'
31
+ $stdout
32
+ end
33
+
34
+ lexer=RubyLexer.new(name, file)
35
+ until EoiToken===(tok=lexer.get1token)
36
+ p tok
37
+ end
38
+ p tok #print eoi token
@@ -0,0 +1,12 @@
1
+ test $RUBY || export RUBY=ruby
2
+ export RUBYLEXERVSRUBY="$RUBY testcode/rubylexervsruby.rb"
3
+
4
+ if $RUBY --version|grep '^ruby 1\.6'; then
5
+ echo 'error: need ruby 1.8'; exit
6
+ fi
7
+
8
+
9
+
10
+ for i in testdata/p.rb `(locate tk.rb;locate examples/examples_test.rb;locate .rb)|egrep -v '/testresults/|files/patch-'`; do
11
+ $RUBYLEXERVSRUBY $i
12
+ done
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/ruby -dw
2
+
3
+ require "testcode/tokentest"
4
+ require "testcode/deletewarns"
5
+
6
+ ENABLEMD5=false
7
+
8
+ #these remain globals only for ease in interpolation
9
+ $cmdpath= `which #$0`
10
+ $cmddir=File.dirname $cmdpath
11
+ $input=ARGV[0]
12
+ $base='testresults/'+File.basename($input)
13
+ $_ttfile=$base+'.tt'
14
+ $mttfile=$base+'.mtt'
15
+ $p_ttfile=$_ttfile+'.prs'
16
+ $pmttfile=$mttfile+'.prs'
17
+ $p_ttdiff=$p_ttfile+'.diff'
18
+ $pmttdiff=$pmttfile+'.diff'
19
+ $nopfile=$base+'.nop'
20
+ $origfile=$nopfile+'.prs'
21
+ $ruby=ENV['RUBY'] || 'ruby'
22
+
23
+ def nop_ruby(cmd,input,output)
24
+ File.open(output,'w'){|f| f.print "BEGIN{exit};\n" }
25
+ system [cmd,input,'>>',output].join(' ')
26
+ end
27
+
28
+ def ruby_parsedump(input,output)
29
+ #todo: use ruby's md5 lib
30
+ #recursive ruby call here is unavoidable because -y flag has to be set
31
+
32
+ #do nothing if input unchanged
33
+ ENABLEMD5 and system "md5sum -c #{input}.md5 2>/dev/null" and return
34
+
35
+ status=0
36
+ IO.popen("#$ruby -w -y < #{input} 2>&1"){ |pipe| File.open(output,"w") { |outfd|
37
+ pipe.each{ |line|
38
+ outfd.print(line) \
39
+ if /^Shifting|^#{DeleteWarns::WARNERRREX}/o===line
40
+ #elsif /(warning|error)/i===line
41
+ # raise("a warning or error, appearently, not caught by rex above: "+line)
42
+ }
43
+ pid,status=Process.waitpid2 pipe.pid #get err status of subprocess
44
+ } }
45
+ ENABLEMD5 and status==0 and system "md5sum #{input} > #{input}.md5" #compute sum only if no errors
46
+ end
47
+
48
+ Dir.chdir $cmddir + '/..'
49
+
50
+ nop_ruby "#{$input[/\.gz$/]&&'z'}cat", $input, $nopfile
51
+
52
+ print "executing: #$ruby testcode/tokentest.rb --keepws #$input\n"
53
+
54
+ ruby_parsedump $nopfile, $origfile
55
+
56
+
57
+
58
+ tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new, nil, $_ttfile
59
+ tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new(' '), nil, $mttfile
60
+
61
+
62
+ ruby_parsedump $_ttfile, $p_ttfile
63
+ ruby_parsedump $mttfile, $pmttfile
64
+
65
+ if File.exists?($p_ttfile)
66
+ IO.popen("diff -u1 -b #$origfile #$p_ttfile"){ |pipe|
67
+ File.open($p_ttdiff,"w") { |diff|
68
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
69
+ }
70
+ }
71
+ File.unlink $p_ttfile
72
+ end
73
+
74
+ if File.exists?($pmttfile)
75
+ IO.popen("diff -u1 -b #$origfile #$pmttfile"){ |pipe|
76
+ File.open($pmttdiff,"w") { |diff|
77
+ DeleteWarns.deletewarns(pipe){|s| diff.print s}
78
+ }
79
+ }
80
+ File.unlink $pmttfile
81
+ end
82
+
83
+ def head(fname)
84
+ File.open(fname){|fd| 10.times{ print((fd.gets or break)) } }
85
+ end
86
+
87
+ case File.zero?($p_ttdiff).to_s +
88
+ File.zero?($pmttdiff).to_s
89
+ when 'falsefalse' then
90
+ head $p_ttdiff
91
+ print "omitting #$pmttdiff\n"
92
+ when 'falsetrue'
93
+ head $p_ttdiff
94
+ when 'truefalse'
95
+ head $pmttdiff
96
+ when 'truetrue'
97
+ #File.unlink $origfile
98
+ exit(0)
99
+ default
100
+ raise "unexpected 2bool val"
101
+ end
102
+
103
+ exit 1
104
+
@@ -0,0 +1,51 @@
1
+ #!/bin/sh
2
+ #set -v
3
+ function nop_ruby() { (echo "BEGIN{exit};"; exec $1 $2); }
4
+ function ruby_parsedump() {
5
+ md5sum -c $1.md5 2>/dev/null && return #do nothing if input unchanged
6
+ ruby -w -y < $1 2>&1 | grep ^Shift|cut -d" " -f3 >$2
7
+ md5sum $1 > $1.md5
8
+ }
9
+
10
+ cmdpath=`which $0`
11
+ cd `dirname $cmdpath`/..
12
+
13
+ cmddir=`dirname $cmdpath`
14
+ base=testresults/`basename $1`
15
+ origfile=$base.nop.prs
16
+ _ttfile=$base.tt
17
+ mttfile=$base.mtt
18
+ p_ttfile=$_ttfile.prs
19
+ pmttfile=$mttfile.prs
20
+ nopfile=$base.nop
21
+
22
+ #if awk "BEGIN{if(\"$1\" ~! /\.gz$/) exit(1)}"; then
23
+ if ruby -e "'$1'[/\.gz\$/] or exit 1"; then
24
+ nop_ruby zcat $1 > $nopfile ;
25
+ else
26
+ nop_ruby cat $1 > $nopfile ;
27
+ fi
28
+
29
+
30
+
31
+ echo executing: testcode/tokentest.rb --ruby --keepws $1 '\|'
32
+ testcode/tokentest.rb --ruby --keepws $nopfile > $_ttfile
33
+ testcode/tokentest.rb --ruby --maxws $nopfile > $mttfile
34
+ ruby_parsedump $nopfile $origfile
35
+ ruby_parsedump $_ttfile $p_ttfile
36
+ ruby_parsedump $mttfile $pmttfile
37
+
38
+ [ -e $p_ttfile ] && (diff -ub $origfile $p_ttfile | \
39
+ testcode/deletewarns.rb > $p_ttfile.diff
40
+ rm $p_ttfile
41
+ )
42
+ [ -e $pmttfile ] && (diff -ub $origfile $pmttfile | \
43
+ testcode/deletewarns.rb > $pmttfile.diff
44
+ rm $pmttfile
45
+ )
46
+ head $p_ttfile.diff
47
+ test -s $p_ttfile.diff || head $pmttfile.diff
48
+ test -s $p_ttfile.diff && test -s $pmttfile.diff && echo omitting $pmttfile.diff
49
+
50
+ test -z $p_ttfile.diff && test -z $pmttfile.diff && exit 0
51
+ exit 1
@@ -0,0 +1,237 @@
1
+ #!/usr/bin/ruby -dw
2
+ require "rubylexer"
3
+ # require "rumalexer"
4
+ require "token"
5
+ require "tokenprinter"
6
+ require "getoptlong"
7
+ require "pp"
8
+
9
+ class Token
10
+ def verify_offset(fd); false end
11
+
12
+ def check_for_error; end
13
+ end
14
+
15
+ module ErrorToken
16
+ def check_for_error; raise @error end
17
+ end
18
+
19
+ class FileAndLineToken
20
+ def verify_offset(fd); true end
21
+ end
22
+
23
+ module SimpleVerify
24
+ def verify_offset(fd)
25
+ fd.read(@ident.length)==@ident
26
+ end
27
+ end
28
+
29
+ class WToken; include SimpleVerify; end
30
+ class NewlineToken; include SimpleVerify; end
31
+ class IgnoreToken; include SimpleVerify; end
32
+ class MethNameToken; include SimpleVerify; end
33
+
34
+ class SymbolToken
35
+ def verify_offset(fd)
36
+ readsym=fd.read(@ident.length)
37
+ @ident[1]==?" or @ident[1]==?' or readsym==@ident
38
+
39
+ end
40
+ end
41
+
42
+ class EoiToken
43
+ def verify_offset(fd)
44
+ result=super(fd)
45
+ fd.pos=fd.stat.size
46
+ return result
47
+ end
48
+ end
49
+
50
+ class NoWsToken
51
+ def verify_offset(fd)
52
+ orig=fd.pos
53
+ fd.pos=orig-1
54
+ result= (/^[^\s\v\t\n\r\f]{2}$/===fd.read(2))
55
+ fd.pos=orig
56
+ return result
57
+ end
58
+ end
59
+
60
+ class HereBodyToken
61
+ def verify_offset(fd)
62
+ @ident.verify_subtoken_offsets(fd)
63
+ end
64
+ end
65
+
66
+ class HerePlaceholderToken
67
+ def verify_offset(fd)
68
+ '<<'==fd.read(2) or return false
69
+ @dash and (?-==fd.getc or return false)
70
+ case ch=fd.getc
71
+ when ?', ?`, ?"
72
+ @quote==ch.chr or return false
73
+ fd.read(@ender.size)==@ender or return false
74
+ return fd.getc.chr==@quote
75
+ when ?a..?z, ?A..?Z, ?_, ?0..?9
76
+ @quote=='"' or return false
77
+ fd.pos-=1
78
+ fd.read(@ender.size)==@ender or return false
79
+ else
80
+ return false
81
+ end
82
+ end
83
+ end
84
+
85
+ class StringToken
86
+ FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
87
+ '"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
88
+ def verify_offset(fd)
89
+ str=fd.read(2)
90
+ @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
91
+ verify_subtoken_offsets(fd)
92
+ end
93
+
94
+ def verify_subtoken_offsets(fd)
95
+ #verify offsets of subtokens
96
+ 1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) }
97
+ return true
98
+ end
99
+
100
+ def check_for_error
101
+ 1.step(@elems.size-1,2){|idx|
102
+ @elems[idx].check_for_error
103
+ }
104
+ super
105
+ end
106
+ end
107
+
108
+ class RubyCode
109
+ def verify_offset(fd)
110
+ thistok=nexttok=endpos=nil
111
+ @ident.each_index{ |tok_i|
112
+ thistok,nexttok=@ident[tok_i,2]
113
+ endpos=nexttok ? nexttok.offset : thistok.offset+thistok.to_s.size
114
+ check_offset(thistok,fd,endpos)
115
+ }
116
+ assert nexttok.nil?
117
+ assert thistok.object_id==@ident.last.object_id
118
+ assert WToken===thistok
119
+ fd.pos=endpos
120
+ end
121
+
122
+ def check_for_error
123
+ @ident.each{|tok| tok.check_for_error }
124
+ end
125
+ end
126
+
127
+
128
+ class NumberToken
129
+ def verify_offset(fd)
130
+ /^[0-9?+-]$/===fd.read(1)
131
+ end
132
+ end
133
+
134
+
135
+ #class ZwToken
136
+ # def to_s
137
+ # $ShowImplicit ? explicit_form : super
138
+ # end
139
+ #end
140
+
141
+ class RuLexer
142
+ def check_offset(tok,file=@file)
143
+ endpos=(@moretokens.empty?)? file.pos : @moretokens[0].offset
144
+ super(tok,file,endpos)
145
+ end
146
+ end
147
+
148
+ def check_offset(tok,file,endpos)
149
+ oldpos=file.pos
150
+
151
+ assert Integer===tok.offset
152
+ assert Integer===endpos
153
+ assert endpos>=tok.offset
154
+
155
+ file.pos=tok.offset
156
+ assert tok.verify_offset(file)
157
+ case tok
158
+ when StringToken,NumberToken,HereBodyToken
159
+ else assert(file.pos==endpos)
160
+ end
161
+ file.pos=oldpos
162
+ end
163
+
164
+
165
+
166
+
167
+
168
+
169
+ def tokentest(name,lexertype,pprinter,input=File.open(name),output=nil)
170
+ input ||= File.open(name)
171
+ if output
172
+ old_stdout=$stdout
173
+ $stdout=File.open(output,'w')
174
+ end
175
+ File.open(name) {|fd|
176
+ lxr=lexertype.new(name,fd,1)
177
+
178
+ begin
179
+ tok=lxr.get1token
180
+ lxr.check_offset(tok)
181
+ tok.check_for_error
182
+ pprinter.pprint(tok)
183
+ end until EoiToken===tok
184
+
185
+ #hack for SimpleTokenPrinter....
186
+ print "\n" if NewlineToken===lxr.last_operative_token and
187
+ SimpleTokenPrinter===pprinter
188
+
189
+ # unless lxr.balanced_braces?
190
+ # raise "unbalanced braces at eof"
191
+ # end
192
+ }
193
+ if output
194
+ $stdout.close
195
+ $stdout=old_stdout
196
+ end
197
+ end
198
+
199
+ #$ShowImplicit=false
200
+ if __FILE__==$0
201
+
202
+ sep,line,showzw='',1,0
203
+ # lexertype= RumaLexer if defined? RumaLexer
204
+ lexertype=RubyLexer
205
+ insertnils=fd=name=nil
206
+ pprinter=SimpleTokenPrinter
207
+
208
+ opts=GetoptLong.new \
209
+ ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT],
210
+ # ["--ruby","-r", GetoptLong::NO_ARGUMENT],
211
+ ["--keepws","-k", GetoptLong::NO_ARGUMENT],
212
+ ["--maxws","-m", GetoptLong::NO_ARGUMENT],
213
+ ["--implicit","-i", GetoptLong::NO_ARGUMENT],
214
+ ["--implicit-all", GetoptLong::NO_ARGUMENT]
215
+
216
+ opts.each do|opt,arg|
217
+ case opt
218
+ when '--eval' then
219
+ pprinter =pprinter.new(sep,line,showzw)
220
+ tokentest('-e',lexertype,pprinter,arg)
221
+ saweval=true
222
+ # when '--ruby' then lexertype=RubyLexer
223
+ when '--keepws' then pprinter= KeepWsTokenPrinter
224
+ when '--maxws' then pprinter= KeepWsTokenPrinter;sep=' '
225
+ when '--implicit' then showzw=1
226
+ when '--implicit-all' then showzw=2
227
+ else raise :impossible
228
+ end
229
+ end
230
+
231
+ pprinter =pprinter.new(sep,line,showzw)
232
+
233
+ ARGV.empty? ? saweval || tokentest('-',lexertype,pprinter,$stdin) :
234
+ ARGV.each{|fn| tokentest(fn,lexertype,pprinter) }
235
+ # ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer #filename with _rb are special hack
236
+
237
+ end