RubyGems - rubylexer - Versions diffs - 0.6.2 - Mend

rubylexer 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

data/COPYING +510 -0
data/README +134 -0
data/Rantfile +37 -0
data/assert.rb +31 -0
data/charhandler.rb +84 -0
data/charset.rb +76 -0
data/context.rb +174 -0
data/howtouse.txt +136 -0
data/io.each_til_charset.rb +247 -0
data/require.rb +103 -0
data/rlold.rb +12 -0
data/rubycode.rb +44 -0
data/rubylexer.rb +1589 -0
data/rulexer.rb +532 -0
data/symboltable.rb +65 -0
data/testcode/deletewarns.rb +39 -0
data/testcode/dumptokens.rb +38 -0
data/testcode/locatetest +12 -0
data/testcode/rubylexervsruby.rb +104 -0
data/testcode/rubylexervsruby.sh +51 -0
data/testcode/tokentest.rb +237 -0
data/testcode/torment +51 -0
data/testdata/1.rb.broken +729 -0
data/testdata/23.rb +24 -0
data/testdata/g.rb +15 -0
data/testdata/newsyntax.rb +18 -0
data/testdata/noeolatend.rb +1 -0
data/testdata/p.rb +1227 -0
data/testdata/pleac.rb.broken +6282 -0
data/testdata/pre.rb +33 -0
data/testdata/pre.unix.rb +33 -0
data/testdata/regtest.rb +621 -0
data/testdata/tokentest.assert.rb.can +7 -0
data/testdata/untitled1.rb +1 -0
data/testdata/w.rb +22 -0
data/testdata/wsdlDriver.rb +499 -0
data/testing.txt +130 -0
data/testresults/placeholder +0 -0
data/token.rb +486 -0
data/tokenprinter.rb +152 -0
metadata +76 -0

data/symboltable.rb ADDED Viewed

@@ -0,0 +1,65 @@
+=begin copyright
+    rubylexer - a ruby lexer written in ruby
+    Copyright (C) 2004,2005  Caleb Clausen
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+=end
+class SymbolTable
+   def initialize
+      #note: below Stack means Array (used as a stack)
+      @symbols={} #Hash of String to Stack of Object(user-defined)
+      @locals_lists=[{}] #Stack of Hash of String to Boolean
+   end
+   def start_block
+      assert @locals_lists.last
+      @locals_lists.push({})
+      assert @locals_lists.last
+   end
+   def end_block
+      assert @locals_lists.last
+      list=@locals_lists.pop
+      list or raise "unbalanced end block"
+      list.each_key {|sym|
+         @symbols[sym].pop
+         @symbols[sym].empty? and @symbols[sym]=nil
+      }
+      assert @locals_lists.last
+   end
+   def [](name)
+      assert @locals_lists.last
+      (stack=@symbols[name]) and stack.last
+   end
+   alias === []
+   def []=(name, val)
+      assert @locals_lists.last
+      if @locals_lists.last and @locals_lists.last[name]
+         #already defined in this block
+         @symbols[name][-1]=val #overwrite current value
+      else
+         stack=(@symbols[name] ||= [])
+         stack.push val
+         @locals_lists.last[name]=true
+      end
+      assert @locals_lists.last
+      return val
+   end
+end

data/testcode/deletewarns.rb ADDED Viewed

@@ -0,0 +1,39 @@
+#!/usr/bin/env ruby
+module DeleteWarns
+FN='[^\n]+'
+DATETIME='\d+-\d\d?-\d\d? \d\d:\d\d:\d\d\.\d+ -?\d+'
+INDENTLINE='(?: [^\n]*\n)'
+WARNERRREX='(?:Reading a token: )?-:(\d+): (warning|(?:syntax )error)(?:: ([^\n]+))?'
+RE=%r"(?#--- #{FN}	#{DATETIME}
+\+\+\+ #{FN}	#{DATETIME}
+)^@@ -\d+,\d+ \+\d+,\d+ @@
+#{INDENTLINE}+\
+-(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
+\+(?:Reading a token: )?-:(\d+): \2: \3
+#{INDENTLINE}+"mo
+RE2=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
+#{INDENTLINE}*\
+\+#{WARNERRREX}\n\
+#{INDENTLINE}*"mo
+RE3=%r"^@@ -\d+,\d+ \+\d+,\d+ @@
+#{INDENTLINE}+\
+-(?:Reading a token: )?-:(\d+): (warning|error): ([^\n]+)\n\
+#{INDENTLINE}+"mo
+def DeleteWarns.deletewarns(input)
+input.each('\n--- ') {|match|
+   yield match.gsub(RE,"\\2 moved from \\1 to \\4: \\3\n")  \
+              .gsub(RE2,"Created \\2(s) in new file, line \\1: \\3\n") \
+              .gsub(RE3,"Removed \\2(s) from old file (?!), line \\1: \\3\n")
+}
+end
+end
+if __FILE__==$0
+  DeleteWarns.deletewarns($stdin){|s| $stdout.print s}
+end

data/testcode/dumptokens.rb ADDED Viewed

@@ -0,0 +1,38 @@
+#!/usr/bin/env ruby -dw
+require 'rubylexer'
+require 'getoptlong'
+#a Token#inspect that omits the object id
+class Token
+  def inspect
+    ["#<",self.class,": ",instance_variables.sort.collect{|v|
+      [v,"=",instance_variable_get(v).inspect," "]
+    }].to_s.sub(/ $/,'>')
+  end
+end
+file=nil
+#allow -e
+opts=GetoptLong.new(["--eval", "-e", GetoptLong::REQUIRED_ARGUMENT])
+opts.each{|opt,arg|
+  opt=='--eval' or raise :impossible
+  file=arg
+  name='-e'
+}
+#determine input file and its name if not already known
+file||=if name=ARGV.first
+    File.open(name)
+  else
+    name='-'
+    $stdout
+  end
+lexer=RubyLexer.new(name, file)
+until EoiToken===(tok=lexer.get1token)
+  p tok
+end
+p tok #print eoi token

data/testcode/locatetest ADDED Viewed

@@ -0,0 +1,12 @@
+test $RUBY || export RUBY=ruby
+export RUBYLEXERVSRUBY="$RUBY testcode/rubylexervsruby.rb"
+if $RUBY --version|grep '^ruby 1\.6'; then
+  echo 'error: need ruby 1.8'; exit
+fi
+for i in testdata/p.rb `(locate tk.rb;locate examples/examples_test.rb;locate .rb)|egrep -v '/testresults/|files/patch-'`; do
+  $RUBYLEXERVSRUBY $i
+done

data/testcode/rubylexervsruby.rb ADDED Viewed

@@ -0,0 +1,104 @@
+#!/usr/bin/ruby -dw
+require "testcode/tokentest"
+require "testcode/deletewarns"
+ENABLEMD5=false
+#these remain globals only for ease in interpolation
+$cmdpath= `which #$0`
+$cmddir=File.dirname $cmdpath
+$input=ARGV[0]
+$base='testresults/'+File.basename($input)
+$_ttfile=$base+'.tt'
+$mttfile=$base+'.mtt'
+$p_ttfile=$_ttfile+'.prs'
+$pmttfile=$mttfile+'.prs'
+$p_ttdiff=$p_ttfile+'.diff'
+$pmttdiff=$pmttfile+'.diff'
+$nopfile=$base+'.nop'
+$origfile=$nopfile+'.prs'
+$ruby=ENV['RUBY'] || 'ruby'
+def nop_ruby(cmd,input,output)
+   File.open(output,'w'){|f| f.print "BEGIN{exit};\n" }
+   system [cmd,input,'>>',output].join(' ')
+end
+def ruby_parsedump(input,output)
+  #todo: use ruby's md5 lib
+  #recursive ruby call here is unavoidable because -y flag has to be set
+  #do nothing if input unchanged
+  ENABLEMD5 and system "md5sum -c #{input}.md5 2>/dev/null" and return
+  status=0
+  IO.popen("#$ruby -w -y < #{input} 2>&1"){ |pipe| File.open(output,"w") { |outfd|
+    pipe.each{ |line|
+      outfd.print(line) \
+        if /^Shifting|^#{DeleteWarns::WARNERRREX}/o===line
+      #elsif /(warning|error)/i===line
+      #  raise("a warning or error, appearently, not caught by rex above: "+line)
+    }
+    pid,status=Process.waitpid2 pipe.pid #get err status of subprocess
+  } }
+  ENABLEMD5 and status==0 and system "md5sum #{input} > #{input}.md5" #compute sum only if no errors
+end
+Dir.chdir $cmddir + '/..'
+nop_ruby "#{$input[/\.gz$/]&&'z'}cat", $input, $nopfile
+print "executing: #$ruby testcode/tokentest.rb --keepws #$input\n"
+ruby_parsedump $nopfile, $origfile
+tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new, nil, $_ttfile
+tokentest $nopfile, RubyLexer, KeepWsTokenPrinter.new(' '), nil, $mttfile
+ruby_parsedump $_ttfile, $p_ttfile
+ruby_parsedump $mttfile, $pmttfile
+if File.exists?($p_ttfile)
+  IO.popen("diff -u1 -b #$origfile #$p_ttfile"){ |pipe|
+  File.open($p_ttdiff,"w") { |diff|
+    DeleteWarns.deletewarns(pipe){|s| diff.print s}
+  }
+  }
+  File.unlink $p_ttfile
+end
+if File.exists?($pmttfile)
+  IO.popen("diff -u1 -b #$origfile #$pmttfile"){ |pipe|
+  File.open($pmttdiff,"w") { |diff|
+    DeleteWarns.deletewarns(pipe){|s| diff.print s}
+  }
+  }
+  File.unlink $pmttfile
+end
+def head(fname)
+  File.open(fname){|fd| 10.times{ print((fd.gets or break)) } }
+end
+case File.zero?($p_ttdiff).to_s +
+     File.zero?($pmttdiff).to_s
+  when 'falsefalse' then
+    head $p_ttdiff
+    print "omitting #$pmttdiff\n"
+  when 'falsetrue'
+    head $p_ttdiff
+  when 'truefalse'
+    head $pmttdiff
+  when 'truetrue'
+    #File.unlink $origfile
+    exit(0)
+  default
+    raise "unexpected 2bool val"
+end
+exit 1

data/testcode/rubylexervsruby.sh ADDED Viewed

@@ -0,0 +1,51 @@
+#!/bin/sh
+#set -v
+function nop_ruby() { (echo "BEGIN{exit};"; exec $1 $2); }
+function ruby_parsedump() {
+ md5sum -c $1.md5 2>/dev/null && return #do nothing if input unchanged
+ ruby -w -y < $1 2>&1 | grep ^Shift|cut -d" " -f3 >$2
+ md5sum $1 > $1.md5
+}
+cmdpath=`which $0`
+cd `dirname $cmdpath`/..
+cmddir=`dirname $cmdpath`
+base=testresults/`basename $1`
+origfile=$base.nop.prs
+_ttfile=$base.tt
+mttfile=$base.mtt
+p_ttfile=$_ttfile.prs
+pmttfile=$mttfile.prs
+nopfile=$base.nop
+#if awk "BEGIN{if(\"$1\" ~! /\.gz$/) exit(1)}"; then
+if ruby -e "'$1'[/\.gz\$/] or exit 1"; then
+   nop_ruby zcat $1 > $nopfile ;
+else
+   nop_ruby cat $1 > $nopfile ;
+fi
+ echo executing: testcode/tokentest.rb --ruby --keepws $1 '\|'
+ testcode/tokentest.rb --ruby --keepws $nopfile > $_ttfile
+ testcode/tokentest.rb --ruby --maxws  $nopfile > $mttfile
+ ruby_parsedump $nopfile $origfile
+ ruby_parsedump $_ttfile $p_ttfile
+ ruby_parsedump $mttfile $pmttfile
+ [ -e $p_ttfile ] && (diff -ub $origfile $p_ttfile | \
+                        testcode/deletewarns.rb > $p_ttfile.diff
+                      rm $p_ttfile
+                     )
+ [ -e $pmttfile ] && (diff -ub $origfile $pmttfile | \
+                        testcode/deletewarns.rb > $pmttfile.diff
+                      rm $pmttfile
+                     )
+ head $p_ttfile.diff
+ test -s $p_ttfile.diff || head $pmttfile.diff
+test -s $p_ttfile.diff && test -s $pmttfile.diff && echo omitting $pmttfile.diff
+test -z $p_ttfile.diff && test -z $pmttfile.diff && exit 0
+exit 1

data/testcode/tokentest.rb ADDED Viewed

@@ -0,0 +1,237 @@
+#!/usr/bin/ruby -dw
+require "rubylexer"
+#  require "rumalexer"
+require "token"
+require "tokenprinter"
+require "getoptlong"
+require "pp"
+class Token
+  def verify_offset(fd); false end
+  def check_for_error; end
+end
+module ErrorToken
+  def check_for_error; raise @error end
+end
+class FileAndLineToken
+  def verify_offset(fd); true  end
+end
+module SimpleVerify
+  def verify_offset(fd)
+    fd.read(@ident.length)==@ident
+  end
+end
+class WToken;      include SimpleVerify; end
+class NewlineToken;     include SimpleVerify; end
+class IgnoreToken; include SimpleVerify; end
+class MethNameToken; include SimpleVerify; end
+class SymbolToken
+  def verify_offset(fd)
+    readsym=fd.read(@ident.length)
+    @ident[1]==?" or @ident[1]==?' or readsym==@ident
+  end
+end
+class EoiToken
+  def verify_offset(fd)
+    result=super(fd)
+    fd.pos=fd.stat.size
+    return result
+  end
+end
+class NoWsToken
+  def verify_offset(fd)
+    orig=fd.pos
+    fd.pos=orig-1
+    result= (/^[^\s\v\t\n\r\f]{2}$/===fd.read(2))
+    fd.pos=orig
+    return result
+  end
+end
+class HereBodyToken
+  def verify_offset(fd)
+    @ident.verify_subtoken_offsets(fd)
+  end
+end
+class HerePlaceholderToken
+  def verify_offset(fd)
+    '<<'==fd.read(2) or return false
+    @dash and (?-==fd.getc or return false)
+    case ch=fd.getc
+      when ?', ?`, ?"
+        @quote==ch.chr or return false
+        fd.read(@ender.size)==@ender or return false
+        return fd.getc.chr==@quote
+      when ?a..?z, ?A..?Z, ?_, ?0..?9
+        @quote=='"' or return false
+        fd.pos-=1
+        fd.read(@ender.size)==@ender or return false
+      else
+        return false
+    end
+  end
+end
+class StringToken
+  FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
+                           '"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
+  def verify_offset(fd)
+    str=fd.read(2)
+    @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
+    verify_subtoken_offsets(fd)
+  end
+  def verify_subtoken_offsets(fd)
+    #verify offsets of subtokens
+    1.step(@elems.length-1,2) { |i| @elems[i].verify_offset(fd) }
+    return true
+  end
+  def check_for_error
+    1.step(@elems.size-1,2){|idx|
+      @elems[idx].check_for_error
+    }
+    super
+  end
+end
+class RubyCode
+  def verify_offset(fd)
+    thistok=nexttok=endpos=nil
+    @ident.each_index{ |tok_i|
+      thistok,nexttok=@ident[tok_i,2]
+      endpos=nexttok ? nexttok.offset : thistok.offset+thistok.to_s.size
+      check_offset(thistok,fd,endpos)
+    }
+    assert nexttok.nil?
+    assert thistok.object_id==@ident.last.object_id
+    assert WToken===thistok
+    fd.pos=endpos
+  end
+  def check_for_error
+    @ident.each{|tok| tok.check_for_error }
+  end
+end
+class NumberToken
+  def verify_offset(fd)
+    /^[0-9?+-]$/===fd.read(1)
+  end
+end
+#class ZwToken
+#  def to_s
+#    $ShowImplicit ? explicit_form : super
+#  end
+#end
+class RuLexer
+  def check_offset(tok,file=@file)
+    endpos=(@moretokens.empty?)? file.pos : @moretokens[0].offset
+    super(tok,file,endpos)
+  end
+end
+def check_offset(tok,file,endpos)
+    oldpos=file.pos
+    assert Integer===tok.offset
+    assert Integer===endpos
+    assert endpos>=tok.offset
+    file.pos=tok.offset
+    assert tok.verify_offset(file)
+    case tok
+      when StringToken,NumberToken,HereBodyToken
+      else assert(file.pos==endpos)
+    end
+    file.pos=oldpos
+end
+def tokentest(name,lexertype,pprinter,input=File.open(name),output=nil)
+  input ||= File.open(name)
+  if output
+    old_stdout=$stdout
+    $stdout=File.open(output,'w')
+  end
+  File.open(name) {|fd|
+    lxr=lexertype.new(name,fd,1)
+    begin
+      tok=lxr.get1token
+      lxr.check_offset(tok)
+      tok.check_for_error
+      pprinter.pprint(tok)
+    end until EoiToken===tok
+    #hack for SimpleTokenPrinter....
+    print "\n" if NewlineToken===lxr.last_operative_token and
+                  SimpleTokenPrinter===pprinter
+#    unless lxr.balanced_braces?
+#      raise "unbalanced braces at eof"
+#    end
+  }
+  if output
+    $stdout.close
+    $stdout=old_stdout
+  end
+end
+#$ShowImplicit=false
+if __FILE__==$0
+  sep,line,showzw='',1,0
+#  lexertype= RumaLexer if defined? RumaLexer
+  lexertype=RubyLexer
+  insertnils=fd=name=nil
+  pprinter=SimpleTokenPrinter
+  opts=GetoptLong.new \
+    ["--eval","-e", GetoptLong::REQUIRED_ARGUMENT],
+#    ["--ruby","-r", GetoptLong::NO_ARGUMENT],
+    ["--keepws","-k", GetoptLong::NO_ARGUMENT],
+    ["--maxws","-m", GetoptLong::NO_ARGUMENT],
+    ["--implicit","-i", GetoptLong::NO_ARGUMENT],
+    ["--implicit-all", GetoptLong::NO_ARGUMENT]
+  opts.each do|opt,arg|
+    case opt
+    when '--eval'   then
+       pprinter =pprinter.new(sep,line,showzw)
+       tokentest('-e',lexertype,pprinter,arg)
+       saweval=true
+#    when '--ruby'   then lexertype=RubyLexer
+    when '--keepws' then pprinter= KeepWsTokenPrinter
+    when '--maxws'  then pprinter= KeepWsTokenPrinter;sep=' '
+    when '--implicit' then showzw=1
+    when '--implicit-all' then showzw=2
+    else raise :impossible
+    end
+  end
+  pprinter =pprinter.new(sep,line,showzw)
+    ARGV.empty?   ?    saweval || tokentest('-',lexertype,pprinter,$stdin) :
+        ARGV.each{|fn| tokentest(fn,lexertype,pprinter) }
+#  ARGV.first[/[_.]rb$/i] and lexertype=RubyLexer  #filename with _rb are special hack
+end