RubyGems - parser - Versions diffs - 2.3.0.pre.6 → 2.3.0.0 - Mend

parser 2.3.0.pre.6 → 2.3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/.travis.yml +3 -2
data/LICENSE.txt +1 -1
data/README.md +0 -1
data/lib/parser.rb +1 -0
data/lib/parser/builders/default.rb +24 -0
data/lib/parser/current.rb +28 -13
data/lib/parser/lexer.rl +86 -61
data/lib/parser/lexer/dedenter.rb +48 -0
data/lib/parser/lexer/literal.rb +24 -2
data/lib/parser/meta.rb +1 -1
data/lib/parser/ruby23.y +6 -3
data/lib/parser/source/buffer.rb +29 -10
data/lib/parser/version.rb +1 -1
data/test/test_current.rb +2 -0
data/test/test_lexer.rb +49 -0
data/test/test_parser.rb +175 -0
data/test/test_source_buffer.rb +25 -0
metadata +6 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 76b617323cf8143b8935312b8ce39e4c2d847b63
-  data.tar.gz: 13f93cde47689ad2352602d2aac0555b674f860d
+  metadata.gz: 8fcc0b8421a588c61ea9d3969ec4f0e1f13c7d8e
+  data.tar.gz: 241d5a150edb29c9f95c7ecd4361d6e2fd4bc4d3
 SHA512:
-  metadata.gz: ddac7c4fdf0503ff15acbb45eedb8bdd9713ed38ceefd5f306044b68e8ce6c016919a689e543ad7b9648c0dcfee43870164194ed8f02fcfd90b9e383219ad9bf
-  data.tar.gz: 7831e67d2cb5ad9e65e1230a86c5117804e17e088e78f01c28632917c1cf0b3cce3abfb46ab889007acf7232f315989467d756a3a89d4b335d71022b53899c2a
+  metadata.gz: a4f18eb04354a0230053a4b11db78aef983aebcad4a5edd65688a55b42e3410e9ff73d7f2a1ccb95b249c4e9ed210fdd194168aaa2bb9977a3b4d13eb0521ad2
+  data.tar.gz: 3125b274c9f3040d30494112d95c3788800be632508068421b583d22e04851e45b8a886cdc73bc4359648494847d3e76920682fde0d868f26cc7d660335ca840

data/.travis.yml CHANGED Viewed

@@ -4,8 +4,9 @@ rvm:
  - 1.9.2
  - 1.9.3
  - 2.0.0
- - 2.1
- - 2.2
+ - 2.1.8
+ - 2.2.4
+ - 2.3.0
  - ruby-head
  - jruby-18mode
  - jruby-19mode

data/LICENSE.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2013 Peter Zotov  <whitequark@whitequark.org>
+Copyright (c) 2013-2016 whitequark  <whitequark@whitequark.org>
 Parts of the source are derived from ruby_parser:
 Copyright (c) Ryan Davis, seattle.rb

data/README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 [![Gem Version](https://badge.fury.io/rb/parser.png)](https://badge.fury.io/rb/parser)
 [![Build Status](https://travis-ci.org/whitequark/parser.png?branch=master)](https://travis-ci.org/whitequark/parser)
-[![Code Climate](https://codeclimate.com/github/whitequark/parser.png)](https://codeclimate.com/github/whitequark/parser)
 [![Coverage Status](https://coveralls.io/repos/whitequark/parser/badge.png?branch=master)](https://coveralls.io/r/whitequark/parser)
 _Parser_ is a production-ready Ruby parser written in pure Ruby. It recognizes as

data/lib/parser.rb CHANGED Viewed

@@ -60,6 +60,7 @@ module Parser
   require 'parser/lexer'
   require 'parser/lexer/literal'
   require 'parser/lexer/stack_state'
+  require 'parser/lexer/dedenter'
   module Builders
     require 'parser/builders/default'

data/lib/parser/builders/default.rb CHANGED Viewed

@@ -186,6 +186,30 @@ module Parser
         string_map(begin_t, parts, end_t))
     end
+    # Indented (interpolated, noninterpolated, executable) strings
+    def dedent_string(node, dedent_level)
+      if !dedent_level.nil?
+        dedenter = Lexer::Dedenter.new(dedent_level)
+        if node.type == :str
+          str = node.children.first
+          dedenter.dedent(str)
+        elsif node.type == :dstr || node.type == :xstr
+          node.children.each do |str_node|
+            if str_node.type == :str
+              str = str_node.children.first
+              dedenter.dedent(str)
+            else
+              dedenter.interrupt
+            end
+          end
+        end
+      end
+      node
+    end
     # Regular expressions
     def regexp_options(regopt_t)

data/lib/parser/current.rb CHANGED Viewed

@@ -10,48 +10,63 @@ module Parser
   case RUBY_VERSION
   when /^1\.8\./
-    if RUBY_VERSION != '1.8.7'
-      warn_syntax_deviation 'parser/ruby18', '1.8.7'
+    current_version = '1.8.7'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby18', current_version
     end
     require 'parser/ruby18'
     CurrentRuby = Ruby18
   when /^1\.9\./
-    if RUBY_VERSION != '1.9.3'
-      warn_syntax_deviation 'parser/ruby19', '1.9.3'
+    current_version = '1.9.3'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby19', current_version
     end
     require 'parser/ruby19'
     CurrentRuby = Ruby19
   when /^2\.0\./
-    if RUBY_VERSION != '2.0.0'
-      warn_syntax_deviation 'parser/ruby20', '2.0.0'
+    current_version = '2.0.0'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby20', current_version
     end
     require 'parser/ruby20'
     CurrentRuby = Ruby20
   when /^2\.1\./
-    if RUBY_VERSION != '2.1.8'
-      warn_syntax_deviation 'parser/ruby21', '2.1.8'
+    current_version = '2.1.8'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby21', current_version
     end
     require 'parser/ruby21'
     CurrentRuby = Ruby21
   when /^2\.2\./
-    if RUBY_VERSION != '2.2.4'
-      warn_syntax_deviation 'parser/ruby22', '2.2.4'
+    current_version = '2.2.4'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby22', current_version
     end
     require 'parser/ruby22'
     CurrentRuby = Ruby22
   when /^2\.3\./
-    if RUBY_VERSION != '2.3.0'
-      warn_syntax_deviation 'parser/ruby23', '2.3.0'
+    current_version = '2.3.0'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby23', current_version
+    end
+    require 'parser/ruby23'
+    CurrentRuby = Ruby23
+  when /^2\.4\./
+    current_version = 'HEAD'
+    if RUBY_VERSION != current_version
+      warn_syntax_deviation 'parser/ruby23', current_version
     end
     require 'parser/ruby23'
@@ -59,7 +74,7 @@ module Parser
   else # :nocov:
     # Keep this in sync with released Ruby.
-    warn_syntax_deviation 'parser/ruby22', '2.2.x'
+    warn_syntax_deviation 'parser/ruby23', '2.3.x'
     require 'parser/ruby22'
     CurrentRuby = Ruby22
   end

data/lib/parser/lexer.rl CHANGED Viewed

@@ -134,7 +134,7 @@ class Parser::Lexer
     @source        = nil # source string
     @source_pts    = nil # @source as a codepoint array
     @encoding      = nil # target encoding for output strings
-    @need_encode = nil
+    @need_encode   = nil
     @p             = 0   # stream position (saved manually in #advance)
     @ts            = nil # token start
@@ -161,15 +161,21 @@ class Parser::Lexer
     @escape_s      = nil # starting position of current sequence
     @escape        = nil # last escaped sequence, as string
-    # See below the section on parsing heredocs.
-    @heredoc_e     = nil
-    @herebody_s    = nil
+    @herebody_s    = nil # starting position of current heredoc line
     # Ruby 1.9 ->() lambdas emit a distinct token if do/{ is
     # encountered after a matching closing parenthesis.
     @paren_nest    = 0
     @lambda_stack  = []
+    # After encountering the closing line of <<~SQUIGGLY_HEREDOC,
+    # we store the indentation level and give it out to the parser
+    # on request. It is not possible to infer indentation level just
+    # from the AST because escape sequences such as `\ ` or `\t` are
+    # expanded inside the lexer, but count as non-whitespace for
+    # indentation purposes.
+    @dedent_level  = nil
     # If the lexer is in `command state' (aka expr_value)
     # at the entry to #advance, it will transition to expr_cmdarg
     # instead of expr_arg at certain points.
@@ -184,26 +190,21 @@ class Parser::Lexer
     if @source_buffer
       @source = @source_buffer.source
+      @need_encode = false
-      if defined?(Encoding)
+      if @has_encode
         @encoding   = @source.encoding
-        # This is a workaround for 1.9.2, which (without force_encoding)
-        # would convert the result to UTF-8 (source encoding of lexer.rl).
-        @source    += "\0".dup.force_encoding(@encoding)
-      else
-        @source    += "\0"
       end
-      if defined?(Encoding) && @source.encoding == Encoding::UTF_8
+      if @has_encode && @source.encoding == Encoding::UTF_8
         @source_pts = @source.unpack('U*')
-        @need_encode = @has_encode && @encoding != Encoding::UTF_8
       else
         @source_pts = @source.unpack('C*')
       end
-      if (@source_pts.size > 1_000_000 && @has_encode) ||
-         @force_utf32
+      if @has_encode &&
+        (@source_pts.size > 1_000_000 || @force_utf32) &&
+        @encoding != Encoding::UTF_32LE
         # A heuristic: if the buffer is larger than 1M, then
         # store it in UTF-32 and convert the tokens as they're
         # going out. If it's smaller, the conversion overhead
@@ -216,7 +217,7 @@ class Parser::Lexer
         #
         # Patches accepted.
         @source = @source.encode(Encoding::UTF_32LE)
-        @need_encode = @has_encode && @encoding != Encoding::UTF_32LE
+        @need_encode = true
       end
       if @source_pts[0] == 0xfeff
@@ -275,6 +276,13 @@ class Parser::Lexer
     @cond = @cond_stack.pop
   end
+  def dedent_level
+    # We erase @dedent_level as a precaution to avoid accidentally
+    # using a stale value.
+    dedent_level, @dedent_level = @dedent_level, nil
+    dedent_level
+  end
   # Return next token: [type, value].
   def advance
     if @token_queue.any?
@@ -293,7 +301,7 @@ class Parser::Lexer
     _lex_from_state_actions = klass.send :_lex_from_state_actions
     _lex_eof_trans          = klass.send :_lex_eof_trans
-    pe = @source.length + 1
+    pe = @source_pts.size + 2
     p, eof = @p, pe
     @command_state = (@cs == klass.lex_en_expr_value ||
@@ -309,7 +317,7 @@ class Parser::Lexer
     elsif @cs == klass.lex_error
       [ false, [ '$error', range(p - 1, p) ] ]
     else
-      eof = @source.length
+      eof = @source_pts.size + 1
       [ false, [ '$eof',   range(eof, eof) ] ]
     end
   end
@@ -434,6 +442,8 @@ class Parser::Lexer
   def pop_literal
     old_literal = @literal_stack.pop
+    @dedent_level = old_literal.dedent_level
     if old_literal.type == :tREGEXP_BEG
       # Fetch modifiers.
       self.class.lex_en_regexp_modifiers
@@ -739,10 +749,10 @@ class Parser::Lexer
       # %q[\u123] %q[\u{12]
     | 'u' ( c_any{0,4}  -
-            xdigit{4}   -           # \u1234 is valid
-            ( '{' xdigit{1,3}       # \u{1 \u{12 \u{123 are valid
-            | '{' xdigit [ \t}] any # \u{1. \u{1} are valid
-            | '{' xdigit{2} [ \t}]  # \u{12. \u{12} are valid
+            xdigit{4}   -            # \u1234 is valid
+            ( '{' xdigit{1,3}        # \u{1 \u{12 \u{123 are valid
+            | '{' xdigit [ \t}] any? # \u{1. \u{1} are valid
+            | '{' xdigit{2} [ \t}]   # \u{12. \u{12} are valid
             )
           )
       % {
@@ -818,10 +828,10 @@ class Parser::Lexer
   #     the result is: "  i am a heredoc\n"
   #
   # To parse them, lexer refers to two kinds (remember, nested heredocs)
-  # of positions in the input stream, namely @heredoc_e
+  # of positions in the input stream, namely heredoc_e
   # (HEREDOC declaration End) and @herebody_s (HEREdoc BODY line Start).
   #
-  # @heredoc_e is simply contained inside the corresponding Literal, and
+  # heredoc_e is simply contained inside the corresponding Literal, and
   # when the heredoc is closed, the lexing is restarted from that position.
   #
   # @herebody_s is quite more complex. First, @herebody_s changes after each
@@ -844,7 +854,7 @@ class Parser::Lexer
   };
   action extend_string {
-    string = @source[@ts...@te]
+    string = tok
     string = string.encode(@encoding) if @need_encode
     # tLABEL_END is only possible in non-cond context on >= 2.2
@@ -950,6 +960,9 @@ class Parser::Lexer
         p = current_literal.heredoc_e - 1
         fnext *pop_literal; fbreak;
       else
+        # Calculate indentation level for <<~HEREDOCs.
+        current_literal.infer_indent_level(line)
         # Ditto.
         @herebody_s = @te
       end
@@ -1288,7 +1301,7 @@ class Parser::Lexer
   #
   expr_fname := |*
       keyword
-      => { emit(KEYWORDS_BEGIN[tok]);
+      => { emit_table(KEYWORDS_BEGIN);
            fnext expr_endfn; fbreak; };
       constant
@@ -1642,21 +1655,21 @@ class Parser::Lexer
       # /=/ (disambiguation with /=)
       '/' c_any
       => {
-        type = delimiter = tok[0].chr
+        type = delimiter = @source[@ts].chr
         fhold; fgoto *push_literal(type, delimiter, @ts);
       };
       # %<string>
       '%' ( any - [A-Za-z] )
       => {
-        type, delimiter = tok[0].chr, tok[-1].chr
+        type, delimiter = @source[@ts].chr, tok[-1].chr
         fgoto *push_literal(type, delimiter, @ts);
       };
       # %w(we are the people)
       '%' [A-Za-z]+ c_any
       => {
-        type, delimiter = tok[0..-2], tok[-1].chr
+        type, delimiter = tok[0..-2], @source[@te - 1].chr
         fgoto *push_literal(type, delimiter, @ts);
       };
@@ -1666,27 +1679,36 @@ class Parser::Lexer
       };
       # Heredoc start.
-      # <<EOF | <<-END | <<"FOOBAR" | <<-`SMTH`
-      '<<' '-'?
+      # <<END  | <<'END'  | <<"END"  | <<`END`  |
+      # <<-END | <<-'END' | <<-"END" | <<-`END` |
+      # <<~END | <<~'END' | <<~"END" | <<~`END`
+      '<<' [~\-]?
         ( '"' ( c_line - '"' )* '"'
         | "'" ( c_line - "'" )* "'"
         | "`" ( c_line - "`" )* "`"
-        | bareword ) % { @heredoc_e     = p }
+        | bareword ) % { heredoc_e     = p }
         c_line* c_nl % { new_herebody_s = p }
       => {
-        tok(@ts, @heredoc_e) =~ /^<<(-?)(["'`]?)(.*)\2$/
+        tok(@ts, heredoc_e) =~ /^<<(-?)(~?)(["'`]?)(.*)\3$/
-        indent    = !$1.empty?
-        type      =  '<<' + ($2.empty? ? '"' : $2)
-        delimiter =  $3
+        indent      = !$1.empty? || !$2.empty?
+        dedent_body = !$2.empty?
+        type        =  '<<' + ($3.empty? ? '"' : $3)
+        delimiter   =  $4
-        fnext *push_literal(type, delimiter, @ts, @heredoc_e, indent);
+        if dedent_body && version?(18, 19, 20, 21, 22)
+          emit(:tLSHFT, '<<', @ts, @ts + 2)
+          p = @ts + 1
+          fnext expr_beg; fbreak;
+        else
+          fnext *push_literal(type, delimiter, @ts, heredoc_e, indent, dedent_body);
-        if @herebody_s.nil?
-          @herebody_s = new_herebody_s
-        end
+          if @herebody_s.nil?
+            @herebody_s = new_herebody_s
+          end
-        p = @herebody_s - 1
+          p = @herebody_s - 1
+        end
       };
       #
@@ -1696,7 +1718,7 @@ class Parser::Lexer
       # :"bar", :'baz'
       ':' ['"] # '
       => {
-        type, delimiter = tok, tok[-1].chr
+        type, delimiter = tok, @source[@te - 1].chr
         fgoto *push_literal(type, delimiter, @ts);
       };
@@ -1718,7 +1740,9 @@ class Parser::Lexer
       # AMBIGUOUS TERNARY OPERATOR
       #
-      '?' ( e_bs escape
+      # Character constant, like ?a, ?\n, ?\u1000, and so on
+      # Don't accept \u escape with multiple codepoints, like \u{1 2 3}
+      '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
           | (c_any - c_space_nl - e_bs) % { @escape = nil }
           )
       => {
@@ -1739,7 +1763,7 @@ class Parser::Lexer
       '?' c_space_nl
       => {
         escape = { " "  => '\s', "\r" => '\r', "\n" => '\n', "\t" => '\t',
-                   "\v" => '\v', "\f" => '\f' }[tok[1]]
+                   "\v" => '\v', "\f" => '\f' }[@source[@ts + 1]]
         diagnostic :warning, :invalid_escape_use, { :escape => escape }, range
         p = @ts - 1
@@ -1769,16 +1793,19 @@ class Parser::Lexer
           @lambda_stack.pop
           emit(:tLAMBEG)
         else
-          emit_table(PUNCTUATION_BEGIN)
+          emit(:tLBRACE)
         end
         fbreak;
       };
       # a([1, 2])
-      e_lbrack    |
+      e_lbrack
+      => { emit(:tLBRACK)
+           fbreak; };
       # a()
       e_lparen
-      => { emit_table(PUNCTUATION_BEGIN)
+      => { emit(:tLPAREN)
            fbreak; };
       # a(+b)
@@ -1789,7 +1816,7 @@ class Parser::Lexer
       # rescue Exception => e: Block rescue.
       # Special because it should transition to expr_mid.
       'rescue' %{ tm = p } '=>'?
-      => { emit_table(KEYWORDS_BEGIN, @ts, tm)
+      => { emit(:kRESCUE, tok(@ts, tm), @ts, tm)
            p = tm - 1
            fnext expr_mid; fbreak; };
@@ -1809,7 +1836,7 @@ class Parser::Lexer
         if version?(18)
           ident = tok(@ts, @te - 2)
-          emit((tok[0] =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
+          emit((@source[@ts] =~ /[A-Z]/) ? :tCONSTANT : :tIDENTIFIER,
                ident, @ts, @te - 2)
           fhold; # continue as a symbol
@@ -1898,7 +1925,7 @@ class Parser::Lexer
       # "bar", 'baz'
       ['"] # '
       => {
-        fgoto *push_literal(tok, tok, @ts, nil, false, false);
+        fgoto *push_literal(tok, tok, @ts);
       };
       w_space_comment;
@@ -1919,7 +1946,7 @@ class Parser::Lexer
       '->'
       => {
-        emit_table(PUNCTUATION, @ts, @ts + 2)
+        emit(:tLAMBDA, tok(@ts, @ts + 2), @ts, @ts + 2)
         @lambda_stack.push @paren_nest
         fnext expr_endfn; fbreak;
@@ -1937,7 +1964,7 @@ class Parser::Lexer
           end
         else
           if tok == '{'
-            emit_table(PUNCTUATION)
+            emit(:tLCURLY)
           else # 'do'
             emit_do
           end
@@ -1989,13 +2016,11 @@ class Parser::Lexer
         if version?(18)
           emit(:tIDENTIFIER)
-          if !@static_env.nil? && @static_env.declared?(tok)
-            fnext expr_end;
-          else
+          unless !@static_env.nil? && @static_env.declared?(tok)
             fnext *arg_or_cmdarg;
           end
         else
-          emit_table(KEYWORDS)
+          emit(:k__ENCODING__)
         end
         fbreak;
       };
@@ -2093,8 +2118,8 @@ class Parser::Lexer
       # `echo foo`, "bar", 'baz'
       '`' | ['"] # '
       => {
-        type, delimiter = tok, tok[-1].chr
-        fgoto *push_literal(type, delimiter, @ts, nil, false, true);
+        type, delimiter = tok, @source[@te - 1].chr
+        fgoto *push_literal(type, delimiter, @ts, nil, false, false, true);
       };
       #
@@ -2166,11 +2191,11 @@ class Parser::Lexer
            fnext expr_beg; fbreak; };
       '?'
-      => { emit_table(PUNCTUATION)
+      => { emit(:tEH)
            fnext expr_value; fbreak; };
       e_lbrack
-      => { emit_table(PUNCTUATION)
+      => { emit(:tLBRACK2)
            fnext expr_beg; fbreak; };
       punctuation_end
@@ -2187,7 +2212,7 @@ class Parser::Lexer
       => { fgoto leading_dot; };
       ';'
-      => { emit_table(PUNCTUATION)
+      => { emit(:tSEMI)
            fnext expr_value; fbreak; };
       '\\' c_line {

data/lib/parser/lexer/dedenter.rb ADDED Viewed

@@ -0,0 +1,48 @@
+module Parser
+  class Lexer::Dedenter
+    def initialize(dedent_level)
+      @dedent_level = dedent_level
+      @at_line_begin = true
+      @indent_level  = 0
+    end
+    def dedent(string)
+      space_begin = space_end = offset = 0
+      string.chars.each_with_index do |char, index|
+        if @at_line_begin
+          if char == ?\n || @indent_level >= @dedent_level
+            string.slice!(space_begin...space_end)
+            offset += space_end - space_begin - 1
+            @at_line_begin = false
+            redo if char == ?\n
+          end
+          case char
+          when ?\s
+            @indent_level += 1
+            space_end += 1
+          when ?\t
+            @indent_level += 8 - @indent_level % 8
+            space_end += 1
+          end
+        elsif char == ?\n
+          @at_line_begin = true
+          @indent_level  = 0
+          space_begin = space_end = index - offset + 1
+        end
+      end
+      if @at_line_begin
+        string.slice!(space_begin..space_end)
+      end
+      nil
+    end
+    def interrupt
+      @at_line_begin = false
+    end
+  end
+end

data/lib/parser/lexer/literal.rb CHANGED Viewed

@@ -34,11 +34,11 @@ module Parser
       '<<`' => [ :tXSTRING_BEG,  true  ],
     }
-    attr_reader   :heredoc_e, :str_s
+    attr_reader   :heredoc_e, :str_s, :dedent_level
     attr_accessor :saved_herebody_s
     def initialize(lexer, str_type, delimiter, str_s, heredoc_e = nil,
-                   indent = false, label_allowed = false)
+                   indent = false, dedent_body = false, label_allowed = false)
       @lexer       = lexer
       @nesting     = 1
@@ -65,6 +65,9 @@ module Parser
       @indent        = indent
       @label_allowed = label_allowed
+      @dedent_body   = dedent_body
+      @dedent_level  = nil
       @interp_braces = 0
       @space_emitted = true
@@ -150,6 +153,25 @@ module Parser
       end
     end
+    def infer_indent_level(line)
+      return if !@dedent_body
+      indent_level = 0
+      line.each_char do |char|
+        case char
+        when ?\s
+          indent_level += 1
+        when ?\t
+          indent_level += (8 - indent_level % 8)
+        else
+          if @dedent_level.nil? || @dedent_level > indent_level
+            @dedent_level = indent_level
+          end
+          break
+        end
+      end
+    end
     def start_interp_brace
       @interp_braces += 1
     end

data/lib/parser/meta.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Parser
     # will be able to produce every possible node.
     NODE_TYPES =
       %w(
-        true false nil int float str dstr str
+        true false nil int float str dstr
         sym dsym xstr regopt regexp array splat
         array pair kwsplat hash irange erange self
         lvar ivar cvar gvar const defined? lvasgn

data/lib/parser/ruby23.y CHANGED Viewed

@@ -1676,11 +1676,13 @@ opt_block_args_tail:
          string1: tSTRING_BEG string_contents tSTRING_END
                     {
-                      result = @builder.string_compose(val[0], val[1], val[2])
+                      string = @builder.string_compose(val[0], val[1], val[2])
+                      result = @builder.dedent_string(string, @lexer.dedent_level)
                     }
                 | tSTRING
                     {
-                      result = @builder.string(val[0])
+                      string = @builder.string(val[0])
+                      result = @builder.dedent_string(string, @lexer.dedent_level)
                     }
                 | tCHARACTER
                     {
@@ -1689,7 +1691,8 @@ opt_block_args_tail:
          xstring: tXSTRING_BEG xstring_contents tSTRING_END
                     {
-                      result = @builder.xstring_compose(val[0], val[1], val[2])
+                      string = @builder.xstring_compose(val[0], val[1], val[2])
+                      result = @builder.dedent_string(string, @lexer.dedent_level)
                     }
           regexp: tREGEXP_BEG regexp_contents tSTRING_END tREGEXP_OPT

data/lib/parser/source/buffer.rb CHANGED Viewed

@@ -190,6 +190,25 @@ module Parser
         [ @first_line + line_no, position - line_begin ]
       end
+      ##
+      # Return an `Array` of source code lines.
+      #
+      # @return [Array<String>]
+      #
+      def source_lines
+        @lines ||= begin
+          lines = @source.lines.to_a
+          lines << '' if @source.end_with?("\n")
+          lines.each do |line|
+            line.chomp!(NEW_LINE)
+            line.freeze
+          end
+          lines.freeze
+        end
+      end
       ##
       # Extract line `lineno` from source, taking `first_line` into account.
       #
@@ -198,16 +217,7 @@ module Parser
       # @raise  [IndexError] if `lineno` is out of bounds
       #
       def source_line(lineno)
-        unless @lines
-          @lines = @source.lines.to_a
-          @lines.each { |line| line.chomp!(NEW_LINE) }
-          # If a file ends with a newline, the EOF token will appear
-          # to be one line further than the end of file.
-          @lines << ""
-        end
-        @lines.fetch(lineno - @first_line).dup
+        source_lines.fetch(lineno - @first_line).dup
       end
       ##
@@ -230,6 +240,15 @@ module Parser
         end
       end
+      ##
+      # Number of last line in the buffer
+      #
+      # @return [Integer]
+      #
+      def last_line
+        line_begins.size + @first_line - 1
+      end
       private
       def line_begins

data/lib/parser/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Parser
-  VERSION = '2.3.0.pre.6'
+  VERSION = '2.3.0.0'
 end

data/test/test_current.rb CHANGED Viewed

@@ -16,6 +16,8 @@ class TestCurrent < Minitest::Test
       assert_equal Parser::Ruby22, Parser::CurrentRuby
     when /^2\.3\.\d+/
       assert_equal Parser::Ruby23, Parser::CurrentRuby
+    when /^2\.4\.\d+/
+      assert_equal Parser::Ruby23, Parser::CurrentRuby
     else
       flunk "Update test_current for #{RUBY_VERSION}"
     end

data/test/test_lexer.rb CHANGED Viewed

@@ -1107,6 +1107,55 @@ class TestLexer < Minitest::Test
     assert_scanned '?\M-\C-a', :tCHARACTER, "\M-\C-a"
   end
+  def test_question_eh_escape_u_1_digit
+    setup_lexer 19
+    refute_scanned '?\\u1'
+  end
+  def test_question_eh_escape_u_2_digits
+    setup_lexer 19
+    refute_scanned '?\\u12'
+  end
+  def test_question_eh_escape_u_3_digits
+    setup_lexer 19
+    refute_scanned '?\\u123'
+  end
+  def test_question_eh_escape_u_4_digits
+    if RUBY_VERSION >= '1.9'
+      setup_lexer 19
+      assert_scanned '?\\u0001', :tCHARACTER, "\u0001"
+    end
+  end
+  def test_question_eh_single_unicode_point
+    if RUBY_VERSION >= '1.9'
+      setup_lexer 19
+      assert_scanned '?\\u{123}', :tCHARACTER, "\u0123"
+      setup_lexer 19
+      assert_scanned '?\\u{a}',  :tCHARACTER, "\n"
+    end
+  end
+  def test_question_eh_multiple_unicode_points
+    setup_lexer 19
+    refute_scanned '?\\u{1 2 3}'
+    setup_lexer 19
+    refute_scanned '?\\u{a b}'
+  end
+  def test_question_eh_escape_u_unclosed_bracket
+    setup_lexer 19
+    refute_scanned '?\\u{123'
+  end
   def test_integer_hex
     assert_scanned "0x2a", :tINTEGER, 42
   end

data/test/test_parser.rb CHANGED Viewed

@@ -262,6 +262,159 @@ class TestParser < Minitest::Test
         |                 ~~~~ heredoc_end})
   end
+  def test_dedenting_heredoc
+    assert_parses(
+      s(:begin,
+        s(:send,
+          s(:send, nil, :p), :<<,
+          s(:send,
+            s(:const, nil, :E), :~)),
+        s(:const, nil, :E)),
+      %Q{p <<~E\nE},
+      %q{},
+      %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr)),
+      %Q{p <<~E\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr)),
+      %Q{p <<~E\n  E},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:str, "x\n")),
+      %Q{p <<~E\n  x\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "  y\n"))),
+      %Q{p <<~E\n  x\n    y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n\tx\n    y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n\tx\n        y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n    \tx\n        y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "\tx\n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n        \tx\n\ty\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "  x\n"),
+          s(:str, "\n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n  x\n\ny\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "  \n"),
+          s(:str, "y\n"))),
+      %Q{p <<~E\n  x\n    \n  y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "  x\n"),
+          s(:str, "  y\n"))),
+      %Q{p <<~E\n    x\n  \\  y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "  x\n"),
+          s(:str, "\ty\n"))),
+      %Q{p <<~E\n    x\n  \\\ty\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "  x\n"),
+          s(:str, ""),
+          s(:begin,
+            s(:lvar, :foo)),
+          s(:str, "\n"))),
+      %Q{p <<~"E"\n    x\n  \#{foo}\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:xstr,
+          s(:str, "  x\n"),
+          s(:str, ""),
+          s(:begin,
+            s(:lvar, :foo)),
+          s(:str, "\n"))),
+      %Q{p <<~`E`\n    x\n  \#{foo}\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "  x\n"),
+          s(:str, ""),
+          s(:begin,
+            s(:str, "  y")),
+          s(:str, "\n"))),
+      %Q{p <<~"E"\n    x\n  \#{"  y"}\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+  end
   # Symbols
   def test_symbol_plain
@@ -5124,6 +5277,28 @@ class TestParser < Minitest::Test
       ALL_VERSIONS - %w(1.8 1.9 mac ios 2.0)) # no 1.9 backport
   end
+  # We implement broken behavior, and Ruby is not fixed as of 2016-01-14.
+  def test_ruby_bug_11989
+    assert_parses(
+      s(:send, nil, :p,
+        s(:str, "x\n y\n")),
+      %Q{p <<~"E"\n  x\\n   y\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+  end
+  # We implement correct behavior, but Ruby is not fixed as of 2016-01-14.
+  def test_ruby_bug_11990
+    assert_parses(
+      s(:send, nil, :p,
+        s(:dstr,
+          s(:str, "x\n"),
+          s(:str, "  y"))),
+      %Q{p <<~E "  y"\n  x\nE},
+      %q{},
+      ALL_VERSIONS - %w(1.8 1.9 2.0 2.1 2.2 ios mac))
+  end
   def test_parser_bug_198
     assert_parses(
       s(:array,

data/test/test_source_buffer.rb CHANGED Viewed

@@ -116,4 +116,29 @@ class TestSourceBuffer < Minitest::Test
       @buffer.line_range(9)
     end
   end
+  def test_last_line
+    @buffer.source = "1\nfoo\nbar"
+    assert_equal 3, @buffer.last_line
+    @buffer = Parser::Source::Buffer.new('(string)', 5)
+    @buffer.source = ""
+    assert_equal 5, @buffer.last_line
+    @buffer = Parser::Source::Buffer.new('(string)', 5)
+    @buffer.source = "abc\n"
+    assert_equal 6, @buffer.last_line
+  end
+  def test_source_lines
+    @buffer.source = "1\nfoo\nbar\n"
+    assert_equal ['1', 'foo', 'bar', ''], @buffer.source_lines
+    assert @buffer.source_lines.frozen?
+    assert @buffer.source_lines.all?(&:frozen?)
+    @buffer = Parser::Source::Buffer.new('(string)', 5)
+    @buffer.source = "foo\nbar"
+    assert_equal ['foo', 'bar'], @buffer.source_lines
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: parser
 version: !ruby/object:Gem::Version
-  version: 2.3.0.pre.6
+  version: 2.3.0.0
 platform: ruby
 authors:
 - whitequark
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-12-20 00:00:00.000000000 Z
+date: 2016-01-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ast
@@ -260,6 +260,7 @@ files:
 - lib/parser/diagnostic/engine.rb
 - lib/parser/lexer.rb
 - lib/parser/lexer.rl
+- lib/parser/lexer/dedenter.rb
 - lib/parser/lexer/explanation.rb
 - lib/parser/lexer/literal.rb
 - lib/parser/lexer/stack_state.rb
@@ -348,12 +349,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">"
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.1
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.1
+rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: A Ruby parser written in pure Ruby.