RubyGems - rubylexer - Versions diffs - 0.7.7 → 0.8.0 - Mend

rubylexer 0.7.7 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

checksums.yaml +4 -0
data/History.txt +64 -0
data/Makefile +2 -2
data/README.txt +13 -9
data/bin/rubylexer +113 -0
data/lib/assert.rb +1 -1
data/lib/rubylexer.rb +856 -305
data/lib/rubylexer/charhandler.rb +1 -1
data/lib/rubylexer/charset.rb +15 -7
data/lib/rubylexer/context.rb +10 -2
data/lib/rubylexer/lextable.rb +1 -0
data/lib/rubylexer/rubycode.rb +1 -1
data/lib/rubylexer/rulexer.rb +106 -32
data/lib/rubylexer/symboltable.rb +1 -1
data/lib/rubylexer/test/oneliners.rb +15 -5
data/lib/rubylexer/test/oneliners_1.9.rb +116 -92
data/lib/rubylexer/test/stanzas.rb +49 -27
data/lib/rubylexer/test/testcases.rb +2 -2
data/lib/rubylexer/token.rb +153 -23
data/lib/rubylexer/tokenprinter.rb +9 -6
data/lib/rubylexer/version.rb +1 -1
data/rubylexer.gemspec +12 -8
data/test/bad/ruby_lexer.rb +7 -0
data/test/code/deletewarns.rb +1 -1
data/test/code/dumptokens.rb +1 -81
data/test/code/heredoc_blast_test.rb +112 -0
data/test/code/locatetest.rb +1 -1
data/test/code/regression.rb +23 -23
data/test/code/rubylexervsruby.rb +59 -12
data/test/code/tokentest.rb +62 -52
data/test/data/23.rb +0 -1
data/test/data/g.rb +0 -1
data/test/data/heremonsters.rb +1 -1
data/test/data/heremonsters_dos.rb +1 -1
data/test/data/pre.rb +0 -1
data/test/data/pre.unix.rb +0 -1
data/test/data/putstext.rb +4 -0
data/test/data/regtest.rb +0 -1
data/test/data/stuffydog.rb +5 -0
data/test/data/stuffydog2.rb +5 -0
data/test/data/wsdlDriver.rb +0 -1
data/test/test.sh +1 -1
data/test/test_all.rb +3 -0
data/test/test_bad_rubylexer.rb +16 -0
data/test/test_rubylexer_bad.rb +12 -0
data/testing.txt +40 -20
metadata +51 -38

data/test/code/tokentest.rb CHANGED

@@ -1,7 +1,7 @@
 #!/usr/bin/ruby
-=begin legal crap
+=begin legalia
     rubylexer - a ruby lexer written in ruby
-    Copyright (C) 2004,2005,2008  Caleb Clausen
+    Copyright (C) 2004,2005,2008, 2011  Caleb Clausen
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
@@ -24,7 +24,7 @@ require "pp"
 class RubyLexer
 class Token
-  def verify_offset(fd); false end
+  def verify_offset(fd,lexer); false end
   def check_for_error; end
 end
@@ -36,17 +36,17 @@ module ErrorToken
 end
 class FileAndLineToken
-  def verify_offset(fd); true  end
+  def verify_offset(fd,lexer); true  end
 end
 class ImplicitParamListStartToken
-  def verify_offset(fd); true  end
+  def verify_offset(fd,lexer); true  end
 end
 class ImplicitParamListEndToken
-  def verify_offset(fd); true  end
+  def verify_offset(fd,lexer); true  end
 end
 module SimpleVerify
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     fd.read(@ident.length)==@ident
   end
 end
@@ -57,13 +57,13 @@ class MethNameToken; include SimpleVerify; end
 class NewlineToken
   include SimpleVerify
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     super or fd.eof?
   end
 end
 class SymbolToken
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     la=fd.read(2)
     case la
       when '%s'
@@ -98,15 +98,15 @@ end
 class EoiToken
   include SimpleVerify
-  def verify_offset(fd)
-    result=super(fd)
+  def verify_offset(fd,lexer)
+    result=super(fd,lexer)
     fd.eof?
     return result
   end
 end
 class NoWsToken
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     orig=fd.pos
     fd.pos=orig-1
     result= (/^[^\s\v\t\n\r\f]{2}$/===fd.read(2))
@@ -116,13 +116,13 @@ class NoWsToken
 end
 class HereBodyToken
-  def verify_offset(fd)
-    @ident.verify_subtoken_offsets(fd)
+  def verify_offset(fd,lexer)
+    @ident.verify_subtoken_offsets(fd,lexer)
   end
 end
 class HerePlaceholderToken
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     '<<'==fd.read(2) or return false
     @dash and ('-'==fd.read(1) or return false)
     case ch=fd.read(1)[0]
@@ -143,14 +143,14 @@ end
 class StringToken
   FANCY_QUOTE_BEGINNINGS= {'`'=>'%x', '['=>'%w', '{'=>'%W',
                            '"'=>/('|%[^a-pr-z0-9])/i, '/'=>'%r'}
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     fd.read(open.size)==open  or return false
 #    str=fd.read(2)
 #    @char==str[0,1] or FANCY_QUOTE_BEGINNINGS[@char]===str or return false
-    verify_subtoken_offsets(fd)
+    verify_subtoken_offsets(fd,lexer)
   end
-  def verify_subtoken_offsets(fd)
+  def verify_subtoken_offsets(fd,lexer)
     #verify offsets of subtokens
     @elems.each{|elem|
       case elem
@@ -173,7 +173,7 @@ class StringToken
         #assert now_at<=goal+1 #not needed
         saw[goal..-1]='' unless goal==now_at
         saw==elem  or return false
-      else elem.verify_offset(fd) or raise LexerError
+      else elem.verify_offset(fd,lexer) or raise LexerError
       end
     }
     return true
@@ -188,12 +188,12 @@ class StringToken
 end
 class RubyCode
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     thistok=nexttok=endpos=nil
     @ident.each_index{ |tok_i|
       thistok,nexttok=@ident[tok_i,2]
       endpos=nexttok ? nexttok.offset : thistok.offset+thistok.to_s.size
-      check_offset(thistok,fd,endpos)
+      lexer.check_offset(thistok,fd,endpos)
     }
     assert nexttok.nil?
     assert thistok.object_id==@ident.last.object_id
@@ -208,7 +208,7 @@ end
 class NumberToken
-  def verify_offset(fd)
+  def verify_offset(fd,lexer)
     /^[0-9?+-]$/===fd.read(1)
   end
 end
@@ -221,43 +221,48 @@ end
 #end
 end
-public
-def check_offset(tok,file=nil,endpos=nil)
-  #the errors detected here are now reduced to warnings....
-  file||=@original_file
-  String===file and file=file.to_sequence
-  allow_ooo= @moretokens&&@moretokens[0]&&@moretokens[0].allow_ooo_offset unless endpos
-  endpos||=((@moretokens.empty?)? input_position : @moretokens[0].offset)
-  oldpos=file.pos
-  assert Integer===tok.offset
-  assert Integer===endpos
-  if endpos<tok.offset and !allow_ooo
-    $stderr.puts "expected #{endpos} to be >= #{tok.offset} token #{tok.to_s.gsub("\n","\n  ")}:#{tok.class}"
-  end
+class RubyLexer
+  public
+  attr_reader :offset_failures, :offset_first_failure
+  def check_offset(tok,file=nil,endpos=nil)
+    #the errors detected here are now reduced to warnings....
+    file||=@original_file
+    String===file and file=file.to_sequence
+    allow_ooo= @moretokens&&@moretokens[0]&&@moretokens[0].allow_ooo_offset unless endpos
+    endpos||=((@moretokens.empty?)? input_position : @moretokens[0].offset)
+    oldpos=file.pos
+    assert Integer===tok.offset
+    assert Integer===endpos
+    if endpos<tok.offset and !allow_ooo
+      $stderr.puts "expected #{endpos} to be >= #{tok.offset} token #{tok.to_s.gsub("\n","\n  ")}:#{tok.class}"
+    end
-  file.pos=tok.offset
-  tok.verify_offset(file) or
-     $stderr.puts "couldn't check offset of token #{tok.class}: #{tok.to_s.gsub("\n","\n  ")} at #{tok.offset}"
-  case tok
-    when RubyLexer::StringToken,RubyLexer::NumberToken,
-         RubyLexer::HereBodyToken,RubyLexer::SymbolToken,
-         RubyLexer::HerePlaceholderToken,
-         RubyLexer::FileAndLineToken #do nothing
-    else
-      file.pos==endpos or allow_ooo or
-        $stderr.puts "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok.to_s.gsub("\n","\n  ") }"
+    file.pos=tok.offset
+    unless tok.verify_offset(file,self)
+      @offset_failures ||= 0
+      @offset_failures += 1
+      @offset_first_failure ||= tok
+    end
+    case tok
+      when RubyLexer::StringToken,RubyLexer::NumberToken,
+           RubyLexer::HereBodyToken,RubyLexer::SymbolToken,
+           RubyLexer::HerePlaceholderToken,
+           RubyLexer::FileAndLineToken #do nothing
+      else
+        file.pos==endpos or allow_ooo or
+          $stderr.puts "positions don't line up, expected #{endpos}, got #{file.pos}, token: #{tok.to_s.gsub("\n","\n  ") }"
+    end
+    file.pos=oldpos
+    return
   end
-  file.pos=oldpos
-  return
 end
 def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
   input ||= File.open(name)
   if output!=$stdout
@@ -277,6 +282,11 @@ def tokentest(name,lexertype,pprinter,input=File.open(name),output=$stdout)
       pprinter.pprint(tok,output)
     end until RubyLexer::EoiToken===tok
+    if lxr.offset_failures
+      first=lxr.offset_first_failure
+      $stderr.puts "failed to check offset in #{lxr.offset_failures} cases. first=#{first.class}: #{first.to_s.gsub("\n","\n  ")} at #{first.offset}"
+    end
     #hack for SimpleTokenPrinter....
     print "\n" if RubyLexer::NewlineToken===lxr.last_operative_token and
                   RubyLexer::SimpleTokenPrinter===pprinter

data/test/data/23.rb CHANGED

@@ -14,7 +14,6 @@ r[0]^=r[1]
 $=||=0;$_||=0_0
 i,v=['o',*local_variables].flatten.grep(/\A.\Z/).sort_by{|x|x[0].-(?j).abs.-@}.reverse,false
 y=methods.grep(U).sort_by{|x|x[0].-(?e).abs.-@}
-#breakpoint
 i=[i,y].join(I=Love(%q=(2.**(5).chr)=)).gsub(' '){w=$=^=1;$x[w.inspect[1][0]*F-1,1]}.gsub(/[eyd]/){(?e.+?y.-($&[0])).chr }.delete('z')
 i=i.scan(%r:#{I}|.*?(?=#{I})|.*:).inject([]){|r,p|unless(v=!v if(v=!v)...(v))..(v=!v):r<<[]end;r.last<<p;r};v^=!v
 i,@v,@u=i.reject{true if (v=!v)..v}.join.capitalize,local_variables.inject(?X){|p,v|p^v[0]}.chr,O.constants.grep(U).pop

data/test/data/g.rb CHANGED

@@ -1,6 +1,5 @@
 j=9;def i(n) [n ?"d" : "e" , n] end
-#breakpoint
 p(i ?")
 p(j ?"d" : "e")

data/test/data/heremonsters.rb CHANGED

@@ -70,7 +70,7 @@ baz
 simple2
 p <<oof+"gfert"
-#{gleeble #breakpoint
+#{gleeble
 }
 oof

data/test/data/heremonsters_dos.rb CHANGED

@@ -70,7 +70,7 @@ baz
 simple2
 p <<oof+"gfert"
-#{gleeble #breakpoint
+#{gleeble
 }
 oof

data/test/data/pre.rb CHANGED

@@ -16,7 +16,6 @@ delete( %%% <<'>>
 ):'
 ) ) : ( (
 [ [ # ] ]
-  #breakpoint
   sub( %r
 ^ #{   %q
 (

data/test/data/pre.unix.rb CHANGED

@@ -16,7 +16,6 @@ delete( %%% <<'>>
 ):'
 ) ) : ( (
 [ [ # ] ]
-  #breakpoint
   sub( %r
 ^ #{   %q
 (

data/test/data/putstext.rb ADDED

@@ -0,0 +1,4 @@
+# encoding: utf-8
+#àè
+puts "text"
+# '

data/test/data/regtest.rb CHANGED

@@ -382,7 +382,6 @@ if ($Slow||=nil)
 end
      assert_eee Reg[lhqqbe+0], [ :begin, :"\\", :rand, :end ]
- #breakpoint
      assert_eee +[be], [:begin, :"\\", :"\\", :end]
      assert_eee +[be], [:begin, :"\\", :begin, :end]
      assert_eee +[be], [:begin, :"\\", :end, :end]

data/test/data/stuffydog.rb ADDED

@@ -0,0 +1,5 @@
+p <<stuff+'foobar'.tr('j-l','d-f')\
++"more stuff"
+12345678
+the quick brown fox jumped over the lazy dog
+stuff

data/test/data/stuffydog2.rb ADDED

@@ -0,0 +1,5 @@
+p <<stuff+'foobar'.tr('j-l','d-f')
++"more stuff"
+12345678
+the quick brown fox jumped over the lazy dog
+stuff

data/test/data/wsdlDriver.rb CHANGED

@@ -229,7 +229,6 @@ class WSDLDriver
 	Mapping.fault2exception(e)
       end
-#breakpoint
       ret = res_body.response ?
 	Mapping.soap2obj(res_body.response, @mapping_registry) : nil

data/test/test.sh CHANGED

@@ -1,6 +1,6 @@
 #!/bin/sh
 #    rubylexer - a ruby lexer written in ruby
-#    Copyright (C) 2004,2005  Caleb Clausen
+#    Copyright (C) 2004,2005, 2011  Caleb Clausen
 #
 #    This library is free software; you can redistribute it and/or
 #    modify it under the terms of the GNU Lesser General Public

data/test/test_all.rb CHANGED

@@ -1,2 +1,5 @@
+$:<<File.expand_path(File.dirname(File.dirname(__FILE__)))
 require 'test/code/regression'
 require 'test/code/test_1.9'
+Dir['test/test_*.rb'].each{|test| require test }

data/test/test_bad_rubylexer.rb ADDED

@@ -0,0 +1,16 @@
+require 'rbconfig'
+conf=RbConfig::CONFIG
+ruby=conf['bindir']+"/"+conf['RUBY_INSTALL_NAME']
+ruby='ruby' unless File.exist? ruby
+fail unless system(ruby,  "-e", <<END)
+   begin;
+     require '#{File.expand_path(File.join( File.dirname(__FILE__),'bad/ruby_lexer' ))}';
+     require 'rubygems'
+     require 'rubylexer';
+     rl=RubyLexer.new('eval','eval');
+       fail if $the_wrong_rubylexer==1;
+   end;
+END

data/test/test_rubylexer_bad.rb ADDED

@@ -0,0 +1,12 @@
+alias orig_warn warn
+def warn x; end;
+begin;
+    require 'rubylexer';
+    require 'test/bad/ruby_lexer';
+    rl=RubyLexer.new('eval','eval');
+rescue Exception;
+else fail if $the_wrong_rubylexer
+end
+alias warn orig_warn

data/testing.txt CHANGED

@@ -1,21 +1,22 @@
 Running the tests:
-The simplest thing to do is run "ruby -Ilib test/code/regression.rb". This
-tests against a list of known ruby expressions. It will take several minutes
-to run. Currently, there are 4 (minor) failures.
+The simplest thing to do is run "make test". This tests the lexer with a
+list of known ruby interesting expressions. It will take several minutes
+to run. Currently, there are 8-11 (minor) failures, depending or ruby
+version. The fact that there are a few failures is more a testament to the
+thoroughness of the test suite than an indictment of the lexer. Both lexer
+and test suite are very thorough, but a few more (obscure and unlikely)
+expressions are supported by the latter than the former.
+Most of the tests in the suite use rubylexervsruby, described below.
 If you're ambitious, try this command: "ruby -Ilib test/code/locatetest.rb".
 This will use locate to find as much ruby code on your system and test
 each specimen to see if it can be tokenized correctly (by feeding it to
-testcode/rubylexervsruby.rb, the operation of which is outlined below
+test/code/rubylexervsruby.rb, the operation of which is outlined below
 under 'testing strategy').
-Interpreting the output of rubylexervsruby.rb (and locatetest):
-In rubylexervsruby, I've tried to follow the philosophy that the test program
-doesn't print anything unless there's an error. Perhaps I haven't followed
-this far enough; every run of rubylexervsruby produces a little output, and
-sometimes a run will produce output that doesn't actually indicate a problem,
-or only a low-priority problem. (Since locatetest runs rubylexervsruby over
-and over, it produces lots of (mostly harmless) output. Sorry.)
+Interpreting output of rubylexervsruby (and locatetest and 'make test'):
 The following types of output should be ignored:
@@ -31,17 +32,21 @@ indicate that a warning was added or deleted. Ultimately, these should
 go away, but right now it's a low-priority issue.
 If you ever see ruby stack dump in rubylexervsruby output, that's certainly
-an error.
+a test failure.
 Something that looks like a unidiff chunk body (not header) may indicate
-an error as well. To understand more about how the unidiff output is
+an text failure as well. To understand more about how the unidiff output is
 created, see the section on testing strategy below.
+locatetest produces lots of (mostly harmless) output. Sorry.
 htree/template.rb should be ok now.
-currently, lots of warnings are printed about token offsets being off by 1,
-particularly the AssignmentRhsListToken. This is a problem, but for now I'm
-ignoring it.
+currently, lots of warnings are printed about token offsets being off.
+(like: "failed to check offset in N cases...") This is a problem, but for
+now I'm ignoring it. (Most lexer applications don't need token offsets to
+be correct, and it's only a minority of cases, near here documents, where
+this problem occurs.)
 Diff chunks like this indicate a minor problem with the placement of (empty)
 string fragments. Ignore it for now:
@@ -58,10 +63,26 @@ string fragments. Ignore it for now:
  Shifting token tSTRING_DBEG ()
+Diff chunks like this indicate a minor problem with the placement of newlines.
+Ignore it for now:
+  @@ -8,3 +8,2 @@
+   Shifting token tSTRING_END ()
+  -Shifting token '\n' ()
+   Shifting token "end-of-input" ()
+  @@ -8,3 +8,2 @@
+   Shifting token tSTRING_END ()
+  -Shifting token '\n' ()
+   Shifting token "end-of-input" ()
+There are a few other problems in the test suite as well. Current test status
+is less clean than I'd like, tho the conformance level of rubylexer is still
+very high.
 if you find any output that doesn't look like one of the above exceptions,
-and the input file was valid ruby, please send it to me so that i can add it
-to my arsenal of tests.
+(for cases that aren't in the existing snippet set) and the input file was
+valid ruby, please send it to me so that i can add it to my arsenal of
+tests.
 there are a number of 'ruby' files that i know of out there that actually
 contain syntax errors:
@@ -117,5 +138,4 @@ it is possible, however, that rubylexer is emitting as a single token things tha
 thinks should be 2 tokens. and in fact, this is the case with strings: ruby divides a
 string into string open, string body, and string close tokens with option interpolations,
 whereas rubylexer has just a single string token (with subtokens, if interpolations are
-present.) this difference in handling accounts in part for rubylexer's inability
-to correctly lex certain very complicated strings.
+present.)