RubyGems - parser - Versions diffs - 2.0.0.pre2 → 2.0.0.pre3 - Mend

parser 2.0.0.pre2 → 2.0.0.pre3

Files changed (38) hide show

checksums.yaml +4 -4
data/.yardopts +2 -2
data/CHANGELOG.md +55 -0
data/Gemfile +0 -2
data/README.md +58 -4
data/lib/gauntlet_parser.rb +121 -0
data/lib/parser.rb +31 -24
data/lib/parser/ast/node.rb +6 -4
data/lib/parser/ast/processor.rb +3 -0
data/lib/parser/base.rb +18 -17
data/lib/parser/builders/default.rb +61 -9
data/lib/parser/compatibility/ruby1_8.rb +7 -0
data/lib/parser/diagnostic.rb +18 -5
data/lib/parser/diagnostic/engine.rb +12 -11
data/lib/parser/lexer.rl +288 -133
data/lib/parser/lexer/explanation.rb +1 -1
data/lib/parser/lexer/literal.rb +49 -17
data/lib/parser/rewriter.rb +2 -0
data/lib/parser/ruby18.y +1 -17
data/lib/parser/ruby19.y +7 -18
data/lib/parser/ruby20.y +9 -28
data/lib/parser/ruby21.y +11 -34
data/lib/parser/runner.rb +6 -1
data/lib/parser/source/buffer.rb +44 -21
data/lib/parser/source/comment.rb +35 -0
data/lib/parser/source/comment/associator.rb +3 -0
data/lib/parser/source/map.rb +2 -4
data/lib/parser/source/range.rb +7 -0
data/lib/parser/source/rewriter.rb +3 -0
data/lib/parser/source/rewriter/action.rb +3 -0
data/lib/parser/syntax_error.rb +7 -2
data/lib/parser/version.rb +1 -1
data/parser.gemspec +2 -0
data/test/parse_helper.rb +5 -3
data/test/test_encoding.rb +29 -0
data/test/test_lexer.rb +780 -514
data/test/test_parser.rb +185 -11
metadata +17 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f4144b03e5cb0074bd4e6a62442b3954365d7ec8
-  data.tar.gz: 4276d7b16e60596143e0cc22b19ef0f048446af5
+  metadata.gz: 7d801efbc17526dd9207b668ac390eb9c02fab75
+  data.tar.gz: 198728fb3fb74976aaaa19cd2de1612b42ad2db8
 SHA512:
-  metadata.gz: 644d1bb91060042670f4602788785bec67c8cc594ffa44d78922e74ba8201d954789e9b8953632b8f2c13b11efe1a2588cdde7bed8b3718c40189dad766104c0
-  data.tar.gz: 75a5830a2a534f63663e83caf4333f5d462a2a591bc72adec2a09d61fc90a2f2d3ea106c26d37df225949beb6e6139c2b2033e76d1448ec0f365160bc28d6ba7
+  metadata.gz: 4a6e46bdb13a7348167df9f5a7c5e4f6ce31685e8f70a71f186becf91fe8f19ac3eeb70830ab6ecbf277667e6f269ab0dbeec8bc51f13ae340b6dc47e23b2b22
+  data.tar.gz: 068b0c06f919970b48529366a915f625902be5af227348af442b3fb89aab7455ae2293c2e327109a06cda4ee46b161659b44070a73fd6f636bab0a7f375f64b0

data/.yardopts CHANGED

@@ -3,10 +3,10 @@
 -M kramdown
 -o ./yardoc
 -r ./README.md
---private
---protected
 --asset ./doc/css/common.css:css/common.css
 --verbose
+--api public
+--exclude lib/parser/lexer.rb
 --exclude lib/parser/ruby18.rb
 --exclude lib/parser/ruby19.rb
 --exclude lib/parser/ruby20.rb

data/CHANGELOG.md CHANGED

@@ -1,6 +1,61 @@
 Changelog
 =========
+v2.0.0.pre3 (2013-07-26)
+------------------------
+API modifications:
+ * lexer.rl: add simple explicit output encoding for strings. (Peter Zotov)
+Features implemented:
+ * Source::Buffer: support for -(dos|unix|mac) and utf8-mac encodings. (Peter Zotov)
+ * Source::Range#resize. (Peter Zotov)
+ * Significantly improve speed for large (>100k) and very large (>1M) files. (Peter Zotov)
+Bugs fixed:
+ * ruby21.y: fix typos. (Peter Zotov)
+ * builders/default: respect regexp encoding. (Peter Zotov)
+ * lexer.rl: literal EOF (\0, \x04, \x1a) inside literals and comments. (Peter Zotov)
+ * lexer.rl: "meth (lambda do end)" (1.8), "f x: -> do meth do end end": expr_cmdarg. (Peter Zotov)
+ * lexer.rl: "\<\<E\nE\r\r\n": extraneous CRs are ignored after heredoc delimiter. (Peter Zotov)
+ * lexer.rl: "%\nfoo\n": \n can be used as %-literal delimiter. (Peter Zotov)
+ * source/buffer, lexer.rl: convert CRLF to LF prior to lexing. (Peter Zotov)
+ * lexer.rl: "\<\<w; "\nfoo\nw\n"": interleaved heredoc and non-heredoc literals. (Peter Zotov)
+ * builders/default: 1.8 did not descend into &&/|| in conditional context. (Peter Zotov)
+ * lexer.rl: "1+a:a": respect context sensitivity in 1.8 label fallback. (Peter Zotov)
+ * lexer.rl: ruby 1.8 is context-sensitive wrt/ locals as well. (Peter Zotov)
+ * lexer.rl: "eof??a": expr_arg doesn't need space before character literal. (Peter Zotov)
+ * lexer.rl: interleaved heredoc and interpolated double-quoted string. (Peter Zotov)
+ * lexer.rl: "#{f:a}": interpolation starts expr_value, not expr_beg. (Peter Zotov)
+ * lexer.rl: "\cM" is "\r", not an error. (Peter Zotov)
+ * ruby{20,21}.y: constant op-assignment inside a def is not an error. (Peter Zotov)
+ * lexer.rl: "when Date:" fix label fallback for 1.8 mode. (Peter Zotov)
+ * ruby{19,20,21}.y: "->(scope){}; scope :foo": lambda identifier leakage. (Peter Zotov)
+ * lexer.rl: "eh ?\r\n": don't eat tEH if followed by CRLF. (Peter Zotov)
+ * lexer.rl: "f \<\<-TABLE\ndo |a,b|\nTABLE\nend": leave FSM after lexing heredoc. (Peter Zotov)
+ * lexer.rl: "foo %\n bar": don't % at expr_arg as tSTRING_BEG. (Peter Zotov)
+ * lexer.rl, lexer/literal: use lexer encoding for literal buffer. (Peter Zotov)
+ * lexer.rl: "\u{9}": one-digit braced unicode escapes. (Peter Zotov)
+ * Source::Buffer: don't chew \r from source lines. (Peter Zotov)
+ * builders/default: don't die in eh_keyword_map if else branch is empty. (Peter Zotov)
+ * lexer.rl: "0777_333": octal literals with internal underscores. (Peter Zotov)
+ * lexer.rl: "let [] {}": goto tLBRACE_ARG after any closing braces. (Peter Zotov)
+ * lexer.rl: "while not (1) do end": emit proper kDO* when in cond/cmdarg state. (Peter Zotov)
+ * lexer.rl: "rescue=>": correctly handle rescue+assoc at expr_beg. (Peter Zotov)
+ * lexer.rl: "puts 123do end": only trailing `_' and `e' in number are errors. (Peter Zotov)
+ * lexer.rl: "begin; rescue rescue1; end": accept barewords at expr_mid. (Peter Zotov)
+ * lexer.rl: "f.x!if 1": correct modifier handling in expr_arg. (Peter Zotov)
+ * lexer.rl: "=begin\n#=end\n=end": only recognize =end at bol. (Peter Zotov)
+ * builders/default: don't check for duplicate arguments in 1.8 mode. (Peter Zotov)
+ * Don't attempt to parse magic encoding comment in 1.8 mode. (Peter Zotov)
+ * lexer.rl: "\777": octal literals overflow. (Peter Zotov)
+ * lexer.rl: "foo;\n__END__", "\na:b": whitespace in expr_value. (Peter Zotov)
+ * lexer.rl: "\xE2\x80\x99": concatenation of byte escape sequences. (Peter Zotov)
+ * lexer.rl: "E10", "E4U": don't conflate floats and identifiers. (Peter Zotov)
+ * lexer.rl: "foo.bar= {1=>2}": return fid, = as separate tokens in expr_dot. (Peter Zotov)
+ * lexer.rl: "def defined?": properly return defined? in expr_fname. (Peter Zotov)
+ * lexer.rl: "Rainbows! do end", "foo.S?": allow bareword fid in expr_beg/dot. (Peter Zotov)
 v2.0.0.pre2 (2013-07-11)
 ------------------------

data/Gemfile CHANGED

@@ -2,5 +2,3 @@ source 'https://rubygems.org'
 # Specify your gem's dependencies in parser.gemspec
 gemspec
-gem 'rubocop', :platform => [:ruby_19, :ruby_20]

data/README.md CHANGED

@@ -11,6 +11,8 @@ par or better than Ripper, Melbourne, JRubyParser or ruby\_parser.
 You can also use [unparser](https://github.com/mbj/unparser) to produce
 equivalent source code from Parser's ASTs.
+Sponsored by [Evil Martians](http://evilmartians.com).
 ## Installation
 Most recent version of Parser is 2.0; however, per
@@ -151,6 +153,19 @@ Both `(begin)` and `(kwbegin)` nodes represent compound statements, that is, sev
 and so on.
+```
+$ ruby-parse -e '(foo; bar)'
+(begin
+  (send nil :foo)
+  (send nil :bar))
+$ ruby-parse -e 'def x; foo; bar end'
+(def :x
+  (args)
+  (begin
+    (send nil :foo)
+    (send nil :bar)))
+```
 Note that, despite its name, `kwbegin` node only has tangential relation to the `begin` keyword. Normally, Parser AST is semantic, that is, if two constructs look differently but behave identically, they get parsed to the same node. However, there exists a peculiar construct called post-loop in Ruby:
 ```
@@ -163,20 +178,59 @@ This specific syntactic construct, that is, keyword `begin..end` block followed
   [postloop]: http://rosettacode.org/wiki/Loops/Do-while#Ruby
+```
+$ ruby-parse -e 'begin foo end while cond'
+(while-post
+  (send nil :cond)
+  (kwbegin
+    (send nil :foo)))
+$ ruby-parse -e 'foo while cond'
+(while
+  (send nil :cond)
+  (send nil :foo))
+$ ruby-parse -e '(foo) while cond'
+(while
+  (send nil :cond)
+  (begin
+    (send nil :foo)))
+```
 (Parser also needs the `(kwbegin)` node type internally, and it is highly problematic to map it back to `(begin)`.)
 ## Known issues
+Adding support for the following Ruby MRI features in Parser would needlessly complicate it, and as they all are very specific and rarely occuring corner cases, this is not done.
+Parser has been extensively tested; in particular, it parses almost entire [Rubygems][rg] corpus. For every issue, a breakdown of affected gems is offered.
+ [rg]: http://rubygems.org
 ### Void value expressions
-So-called "void value expressions" are not handled by Parser. For a description
+Ruby MRI prohibits so-called "void value expressions". For a description
 of what a void value expression is, see [this
 gist](https://gist.github.com/JoshCheek/5625007) and [this Parser
 issue](https://github.com/whitequark/parser/issues/72).
-It is not clear which rules this piece of static analysis follows, or which
-problem does it solve. It is not implemented because there is no clear
-specification allowing us to verify the behavior.
+It is unknown whether any gems are affected by this issue.
+### Invalid characters inside comments
+Ruby MRI permits arbitrary non-7-bit characters to appear in comments regardless of source encoding.
+As of 2013-07-25, there are about 180 affected gems.
+### \u escape in 1.8 mode
+Ruby MRI 1.8 permits to specify a bare `\u` escape sequence in a string; it treats it like `u`. Ruby MRI 1.9 and later treat `\u` as a prefix for Unicode escape sequence and do not allow it to appear bare. Parser follows 1.9+ behavior.
+As of 2013-07-25, affected gems are: activerdf, activerdf_net7, fastreader, gkellog-reddy.
+### Invalid Unicode escape sequences
+Ruby MRI 1.9+ permits to specify invalid UTF-8 sequences in Unicode escape sequences, such as `\u{d800}`.
+As of 2013-07-25, affected gems are: aws_cloud_search.
 ## Contributors

data/lib/gauntlet_parser.rb ADDED

@@ -0,0 +1,121 @@
+require 'gauntlet'
+require 'parser/all'
+require 'shellwords'
+class ParserGauntlet < Gauntlet
+  RUBY20 = 'ruby'
+  RUBY19 = 'ruby1.9.1'
+  RUBY18 = '/opt/rubies/ruby-1.8.7-p370/bin/ruby'
+  def try(parser, ruby, file, show_ok: false)
+    try_ruby = lambda do |e|
+      Process.spawn(%{#{ruby} -c #{Shellwords.escape file}},
+                    :err => '/dev/null', :out => '/dev/null')
+      _, status = Process.wait2
+      if status.success?
+        # Bug in Parser.
+        puts "Parser bug."
+        @result[file] = { parser.to_s => "#{e.class}: #{e.to_s}" }
+      else
+        # No, this file is not Ruby.
+        yield if block_given?
+      end
+    end
+    begin
+      parser.parse_file(file)
+    rescue Parser::SyntaxError => e
+      if e.diagnostic.location.resize(2).is?('<%')
+        puts "ERb."
+        return
+      end
+      try_ruby.call(e)
+    rescue ArgumentError, RegexpError,
+           Encoding::UndefinedConversionError => e
+      puts "#{file}: #{e.class}: #{e.to_s}"
+      try_ruby.call(e)
+    rescue Interrupt
+      raise
+    rescue Exception => e
+      puts "Parser bug: #{file} #{e.class}: #{e.to_s}"
+      @result[file] = { parser.to_s => "#{e.class}: #{e.to_s}" }
+    else
+      puts "Ok." if show_ok
+    end
+  end
+  def parse(name)
+    puts "GEM: #{name}"
+    @result = {}
+    if ENV.include?('FAST')
+      total_size = Dir["**/*.rb"].map(&File.method(:size)).reduce(:+)
+      if total_size > 300_000
+        puts "Skip."
+        return
+      end
+    end
+    Dir["**/*.rb"].each do |file|
+      next if File.directory? file
+      try(Parser::Ruby20, RUBY20, file) do
+        puts "Trying 1.9:"
+        try(Parser::Ruby19, RUBY19, file, show_ok: true) do
+          puts "Trying 1.8:"
+          try(Parser::Ruby18, RUBY18, file, show_ok: true) do
+            puts "Invalid syntax."
+          end
+        end
+      end
+    end
+    @result
+  end
+  def run(name)
+    data[name] = parse(name)
+    self.dirty = true
+  end
+  def should_skip?(name)
+    data[name] == {}
+  end
+  def load_yaml(*)
+    data = super
+    @was_errors = data.count { |_name, errs| errs != {} }
+    data
+  end
+  def shutdown
+    super
+    errors  = data.count { |_name, errs| errs != {} }
+    total   = data.count
+    percent = "%.5f" % [100 - errors.to_f / total * 100]
+    puts "!!! was: #{@was_errors} now: #{errors} total: #{total} frac: #{percent}%"
+  end
+end
+filter = ARGV.shift
+filter = Regexp.new filter if filter
+gauntlet = ParserGauntlet.new
+if ENV.include? 'UPDATE'
+  gauntlet.source_index
+  gauntlet.update_gem_tarballs
+end
+gauntlet.run_the_gauntlet filter

data/lib/parser.rb CHANGED

@@ -7,35 +7,39 @@ if RUBY_VERSION < '1.9'
   require 'parser/compatibility/ruby1_8'
 end
-# Library namespace
+##
+# @api public
+#
 module Parser
   require 'parser/version'
   require 'parser/ast/node'
   require 'parser/ast/processor'
-  require 'parser/source/buffer'
-  require 'parser/source/range'
-  require 'parser/source/comment'
-  require 'parser/source/comment/associator'
-  require 'parser/source/rewriter'
-  require 'parser/source/rewriter/action'
-  require 'parser/source/map'
-  require 'parser/source/map/operator'
-  require 'parser/source/map/collection'
-  require 'parser/source/map/constant'
-  require 'parser/source/map/variable'
-  require 'parser/source/map/keyword'
-  require 'parser/source/map/definition'
-  require 'parser/source/map/send'
-  require 'parser/source/map/block'
-  require 'parser/source/map/condition'
-  require 'parser/source/map/ternary'
-  require 'parser/source/map/for'
-  require 'parser/source/map/rescue_body'
+  module Source
+    require 'parser/source/buffer'
+    require 'parser/source/range'
+    require 'parser/source/comment'
+    require 'parser/source/comment/associator'
+    require 'parser/source/rewriter'
+    require 'parser/source/rewriter/action'
+    require 'parser/source/map'
+    require 'parser/source/map/operator'
+    require 'parser/source/map/collection'
+    require 'parser/source/map/constant'
+    require 'parser/source/map/variable'
+    require 'parser/source/map/keyword'
+    require 'parser/source/map/definition'
+    require 'parser/source/map/send'
+    require 'parser/source/map/block'
+    require 'parser/source/map/condition'
+    require 'parser/source/map/ternary'
+    require 'parser/source/map/for'
+    require 'parser/source/map/rescue_body'
+  end
   require 'parser/syntax_error'
   require 'parser/diagnostic'
@@ -68,7 +72,7 @@ module Parser
     :regexp_options          => 'unknown regexp options: %{options}',
     :cvar_name               => "`%{name}' is not allowed as a class variable name",
     :ivar_name               => "`%{name}' is not allowed as an instance variable name",
-    :trailing_underscore     => "trailing `_' in number",
+    :trailing_in_number      => "trailing `%{character}' in number",
     :empty_numeric           => 'numeric literal without digits',
     :invalid_octal           => 'invalid octal digit',
     :no_dot_digit_literal    => 'no .<digit> floating literal anymore; put 0 before dot',
@@ -107,6 +111,9 @@ module Parser
     :useless_else            => 'else without rescue is useless',
   }.freeze
+  ##
+  # Verify that the current Ruby implementation supports Encoding.
+  # @raise [RuntimeError]
   def self.check_for_encoding_support
     unless defined?(Encoding)
       raise RuntimeError, 'Parsing 1.9 and later versions of Ruby is not supported on 1.8 due to the lack of Encoding support'

data/lib/parser/ast/node.rb CHANGED

@@ -3,10 +3,13 @@ module Parser
     ##
     # {Parser::AST::Node} contains information about a single AST node and its
-    # child nodes, it extends the basic `AST::Node` class provided by the "ast"
-    # Gem.
+    # child nodes. It extends the basic [AST::Node](http://rdoc.info/gems/ast/AST/Node)
+    # class provided by gem [ast](http://rdoc.info/gems/ast).
+    #
+    # @api public
     #
     # @!attribute [r] location
+    #  Source map for this Node.
     #  @return [Parser::Source::Map]
     #
     class Node < ::AST::Node
@@ -15,11 +18,10 @@ module Parser
       alias loc location
       ##
-      # Assigns various properties to the current AST node. Currently only the
+      # Assigns various properties to this AST node. Currently only the
       # location can be set.
       #
       # @param [Hash] properties
-      #
       # @option properties [Parser::Source::Map] :location Location information
       #  of the node.
       #

data/lib/parser/ast/processor.rb CHANGED

@@ -1,6 +1,9 @@
 module Parser
   module AST
+    ##
+    # @api public
+    #
     class Processor < ::AST::Processor
       def process_regular_node(node)
         node.updated(nil, process_all(node))

data/lib/parser/base.rb CHANGED

@@ -1,6 +1,10 @@
 module Parser
   ##
+  # Base class for version-specific parsers.
+  #
+  # @api public
+  #
   # @!attribute [r] diagnostics
   #  @return [Parser::Diagnostic::Engine]
   #
@@ -9,7 +13,9 @@ module Parser
   #
   class Base < Racc::Parser
     ##
-    # Parses a string of Ruby code and returns the AST.
+    # Parses a string of Ruby code and returns the AST. If the source
+    # cannot be parsed, {SyntaxError} is raised and a diagnostic is
+    # printed to `stderr`.
     #
     # @example
     #  Parser::Base.parse('puts "hello"')
@@ -25,7 +31,6 @@ module Parser
       parser.diagnostics.all_errors_are_fatal = true
       parser.diagnostics.ignore_warnings      = true
-      # Temporary, for manual testing convenience
       parser.diagnostics.consumer = lambda do |diagnostic|
         $stderr.puts(diagnostic.render)
       end
@@ -33,13 +38,20 @@ module Parser
       string = string.dup.force_encoding(parser.default_encoding)
       source_buffer = Source::Buffer.new(file, line)
-      source_buffer.source = string
+      if name == 'Parser::Ruby18'
+        source_buffer.raw_source = string
+      else
+        source_buffer.source     = string
+      end
       parser.parse(source_buffer)
     end
     ##
-    # Parses Ruby source code by reading it from a file.
+    # Parses Ruby source code by reading it from a file. If the source
+    # cannot be parsed, {SyntaxError} is raised and a diagnostic is
+    # printed to `stderr`.
     #
     # @param [String] filename Path to the file to parse.
     # @see #parse
@@ -49,19 +61,8 @@ module Parser
     end
     attr_reader :diagnostics
     attr_reader :builder
-    ##
-    # @api internal
-    #
     attr_reader :static_env
-    ##
-    # The source file currently being parsed.
-    #
-    # @api internal
-    #
     attr_reader :source_buffer
     ##
@@ -155,14 +156,14 @@ module Parser
     end
     ##
-    # @api internal
+    # @api private
     # @return [TrueClass|FalseClass]
     #
     def in_def?
       @def_level > 0
     end
-    protected
+    private
     def next_token
       @lexer.advance