RubyGems - ruby_parser - Versions diffs - 3.14.0 → 3.16.0 - Mend

ruby_parser 3.14.0 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

data/lib/ruby_lexer.rb CHANGED Viewed

@@ -25,11 +25,10 @@ class RubyLexer
   HAS_ENC = "".respond_to? :encoding
-  IDENT_CHAR = if HAS_ENC then
-                 /[\w\u0080-\u{10ffff}]/u
-               else
-                 /[\w\x80-\xFF]/n
-               end
+  BTOKENS = {
+    ".."  => :tBDOT2,
+    "..." => :tBDOT3,
+  }
   TOKENS = {
     "!"   => :tBANG,
@@ -137,6 +136,10 @@ class RubyLexer
     ss.eos?
   end
+  def expr_beg?
+    lex_state =~ EXPR_BEG
+  end
   def expr_dot?
     lex_state =~ EXPR_DOT
   end
@@ -162,7 +165,7 @@ class RubyLexer
     indent         = func =~ STR_FUNC_INDENT ? "[ \t]*" : nil
     expand         = func =~ STR_FUNC_EXPAND
     eol            = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n"
-    eos_re         = /#{indent}#{Regexp.escape eos}(#{eol}|\z)/
+    eos_re         = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
     err_msg        = "can't match #{eos_re.inspect} anywhere in "
     rb_compile_error err_msg if end_of_stream?
@@ -177,10 +180,15 @@ class RubyLexer
     if expand then
       case
-      when scan(/#[$@]/) then
-        ss.pos -= 1 # FIX omg stupid
+      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
+        # TODO: !ISASCII
+        # ?! see parser_peek_variable_name
+        return :tSTRING_DVAR, matched
+      when scan(/#(?=\@\@?[a-zA-Z_])/) then
+        # TODO: !ISASCII
         return :tSTRING_DVAR, matched
       when scan(/#[{]/) then
+        self.command_start = true
         return :tSTRING_DBEG, matched
       when scan(/#/) then
         string_buffer << "#"
@@ -320,6 +328,11 @@ class RubyLexer
     lpar_beg && lpar_beg == paren_nest
   end
+  def is_local_id id
+    # maybe just make this false for now
+    self.parser.env[id.to_sym] == :lvar # HACK: this isn't remotely right
+  end
   def lvar_defined? id
     # TODO: (dyna_in_block? && dvar_defined?(id)) || local_id?(id)
     self.parser.env[id.to_sym] == :lvar
@@ -338,9 +351,9 @@ class RubyLexer
     if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
       rb_compile_error "unknown type of %string" if ss.matched_size == 2
-      c, beg, short_hand = matched, ss.getch, false
+      c, beg, short_hand = matched, getch, false
     else                               # Short-hand (e.g. %{, %., %!, etc)
-      c, beg, short_hand = "Q", ss.getch, true
+      c, beg, short_hand = "Q", getch, true
     end
     if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
@@ -457,7 +470,7 @@ class RubyLexer
     if text =~ check then
       content.gsub(ESC) { unescape $1 }
     else
-      content.gsub(/\\\\/, "\\").gsub(/\\'/, "'")
+      content.gsub(/\\\\/, "\\").gsub(/\\\'/, "'")
     end
   end
@@ -495,16 +508,19 @@ class RubyLexer
   end
   def process_brace_close text
-    # matching compare/parse23.y:8561
-    cond.lexpop
-    cmdarg.lexpop
     case matched
     when "}" then
       self.brace_nest -= 1
-      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tSTRING_DEND, matched if brace_nest < 0
+    end
+    # matching compare/parse26.y:8099
+    cond.pop
+    cmdarg.pop
+    case matched
+    when "}" then
+      self.lex_state   = ruby24minus? ? EXPR_ENDARG : EXPR_END
       return :tRCURLY, matched
     when "]" then
       self.paren_nest -= 1
@@ -573,6 +589,12 @@ class RubyLexer
     end
   end
+  def process_dots text
+    tokens = ruby27plus? && expr_beg? ? BTOKENS : TOKENS
+    result EXPR_BEG, tokens[text], text
+  end
   def process_float text
     rb_compile_error "Invalid numeric format" if text =~ /__/
@@ -605,7 +627,7 @@ class RubyLexer
   end
   def process_label text
-    symbol = possibly_escape_string text, /^"/
+    symbol = possibly_escape_string text, /^\"/
     result EXPR_LAB, :tLABEL, [symbol, self.lineno]
   end
@@ -619,7 +641,7 @@ class RubyLexer
       text = text[0..-2]
     end
-    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'")
+    result EXPR_END, :tSTRING, text[1..-2].gsub(/\\\\/, "\\").gsub(/\\\'/, "\'")
   end
   def process_lchevron text
@@ -791,12 +813,22 @@ class RubyLexer
     c = if scan(/\\/) then
           self.read_escape
         else
-          ss.getch
+          getch
         end
     result EXPR_END, :tSTRING, c
   end
+  def process_simple_string text
+    replacement = text[1..-2].gsub(ESC) {
+      unescape($1).b.force_encoding Encoding::UTF_8
+    }
+    replacement = replacement.b unless replacement.valid_encoding?
+    result EXPR_END, :tSTRING, replacement
+  end
   def process_slash text
     if is_beg? then
       string STR_REGEXP
@@ -870,16 +902,16 @@ class RubyLexer
     if [:tSTRING_END, :tREGEXP_END, :tLABEL_END].include? token_type then
       self.lex_strterm = nil
-      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_END|EXPR_ENDARG
+      self.lex_state   = (token_type == :tLABEL_END) ? EXPR_PAR : EXPR_LIT
     end
     return token
   end
   def process_symbol text
-    symbol = possibly_escape_string text, /^:"/
+    symbol = possibly_escape_string text, /^:\"/ # stupid emacs
-    result EXPR_END|EXPR_ENDARG, :tSYMBOL, symbol
+    result EXPR_LIT, :tSYMBOL, symbol
   end
   def process_token text
@@ -928,6 +960,8 @@ class RubyLexer
               EXPR_END
             end
+    tok_id = :tIDENTIFIER if tok_id == :tCONSTANT && is_local_id(token)
     if last_state !~ EXPR_DOT|EXPR_FNAME and
         (tok_id == :tIDENTIFIER) and # not EXPR_FNAME, not attrasgn
         lvar_defined?(token) then
@@ -951,18 +985,16 @@ class RubyLexer
     self.command_start = true if lex_state =~ EXPR_BEG
     case
-    when keyword.id0 == :kDO then
+    when keyword.id0 == :kDO then # parse26.y line 7591
       case
       when lambda_beginning? then
         self.lpar_beg = nil # lambda_beginning? == FALSE in the body of "-> do ... end"
-        self.paren_nest -= 1
+        self.paren_nest -= 1 # TODO: question this?
         result lex_state, :kDO_LAMBDA, value
       when cond.is_in_state then
         result lex_state, :kDO_COND, value
       when cmdarg.is_in_state && state != EXPR_CMDARG then
         result lex_state, :kDO_BLOCK, value
-      when state =~ EXPR_BEG|EXPR_ENDARG then
-        result lex_state, :kDO_BLOCK, value
       else
         result lex_state, :kDO, value
       end
@@ -979,9 +1011,9 @@ class RubyLexer
     ss.unscan # put back "_"
     if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
-      return [RubyLexer::EOF, RubyLexer::EOF]
-    elsif scan(/\_\w*/) then
-      return process_token matched
+      [RubyLexer::EOF, RubyLexer::EOF]
+    elsif scan(/#{IDENT_CHAR}+/) then
+      process_token matched
     end
   end
@@ -1018,7 +1050,7 @@ class RubyLexer
     when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
       # TODO: force encode everything to UTF-8?
       ss[1].to_i(16).chr.force_encoding Encoding::UTF_8
-    when check(/M-\\[\\MCc]/) then
+    when check(/M-\\./) then
       scan(/M-\\/) # eat it
       c = self.read_escape
       c[0] = (c[0].ord | 0x80).chr
@@ -1032,6 +1064,11 @@ class RubyLexer
       c = self.read_escape
       c[0] = (c[0].ord & 0x9f).chr
       c
+    when check(/(C-|c)\\(?!u|\\)/) then
+      scan(/(C-|c)\\/) # eat it
+      c = read_escape
+      c[0] = (c[0].ord & 0x9f).chr
+      c
     when scan(/C-\?|c\?/) then
       127.chr
     when scan(/(C-|c)(.)/) then
@@ -1040,17 +1077,25 @@ class RubyLexer
       c
     when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
       matched
-    when scan(/u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/) then
-      [ss[1].delete("{}").to_i(16)].pack("U")
-    when scan(/u([0-9a-fA-F]{1,3})/) then
+    when scan(/u(\h{4})/) then
+      [ss[1].to_i(16)].pack("U")
+    when scan(/u(\h{1,3})/) then
       rb_compile_error "Invalid escape character syntax"
+    when scan(/u\{(\h+(?:\s+\h+)*)\}/) then
+      ss[1].split.map { |s| s.to_i(16) }.pack("U*")
     when scan(/[McCx0-9]/) || end_of_stream? then
       rb_compile_error("Invalid escape character syntax")
     else
-      ss.getch
+      getch
     end.dup
   end
+  def getch
+    c = ss.getch
+    c = ss.getch if c == "\r" && ss.peek(1) == "\n"
+    c
+  end
   def regx_options # TODO: rewrite / remove
     good, bad = [], []
@@ -1106,6 +1151,10 @@ class RubyLexer
     parser.class.version <= 24
   end
+  def ruby27plus?
+    parser.class.version >= 27
+  end
   def scan re
     ss.scan re
   end
@@ -1177,8 +1226,6 @@ class RubyLexer
       handled = true
       case
-      when paren_re && scan(paren_re) then
-        self.string_nest += 1
       when scan(term_re) then
         if self.string_nest == 0 then
           ss.pos -= 1
@@ -1186,6 +1233,8 @@ class RubyLexer
         else
           self.string_nest -= 1
         end
+      when paren_re && scan(paren_re) then
+        self.string_nest += 1
       when expand && scan(/#(?=[\$\@\{])/) then # TODO: this seems wrong
         ss.pos -= 1
         break
@@ -1232,9 +1281,9 @@ class RubyLexer
             end
         x = Regexp.escape paren if paren && paren != "\000"
         re = if qwords then
-               /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
+               /[^#{t}#{x}\#\\\s]+|./ # |. to pick up whatever
              else
-               /[^#{t}#{x}\#\0\\]+|./
+               /[^#{t}#{x}\#\\]+|./
              end
         scan re
@@ -1274,10 +1323,12 @@ class RubyLexer
           s
         when /^[McCx0-9]/ then
           rb_compile_error("Invalid escape character syntax")
-        when /u([0-9a-fA-F]{4}|\{[0-9a-fA-F]{2,6}\})/ then
+        when /u(\h{4})/ then
           [$1.delete("{}").to_i(16)].pack("U")
-        when /u([0-9a-fA-F]{1,3})/ then
+        when /u(\h{1,3})/ then
           rb_compile_error("Invalid escape character syntax")
+        when /u\{(\h+(?:\s+\h+)*)\}/ then
+          $1.split.map { |s| s.to_i(16) }.pack("U*")
         else
           s
         end
@@ -1355,11 +1406,11 @@ class RubyLexer
       # extra fake lex_state names to make things a bit cleaner
       EXPR_LAB = EXPR_ARG|EXPR_LABELED
-      EXPR_NUM = EXPR_END|EXPR_ENDARG
+      EXPR_LIT = EXPR_END|EXPR_ENDARG
       EXPR_PAR = EXPR_BEG|EXPR_LABEL
       EXPR_PAD = EXPR_BEG|EXPR_LABELED
-      EXPR_LIT = EXPR_NUM # TODO: migrate to EXPR_LIT
+      EXPR_NUM = EXPR_LIT
       expr_names.merge!(EXPR_NONE    => "EXPR_NONE",
                         EXPR_BEG     => "EXPR_BEG",

data/lib/ruby_lexer.rex CHANGED Viewed

@@ -6,9 +6,9 @@ class RubyLexer
 macro
-  IDENT         /^#{IDENT_CHAR}+/o
+  IDENT_CHAR    /[a-zA-Z0-9_[:^ascii:]]/
-  ESC           /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
+  ESC           /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
   SIMPLE_STRING /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
   SSTRING       /((\\.|[^\'])*)/
@@ -48,7 +48,7 @@ rule
 |               /\![=~]?/               { result :arg_state, TOKENS[text], text }
 : /\./
-|               /\.\.\.?/               { result EXPR_BEG, TOKENS[text], text }
+|               /\.\.\.?/               process_dots
 |               /\.\d/                  { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
 |               /\./                    { self.lex_state = EXPR_BEG; result EXPR_DOT, :tDOT, "." }
@@ -62,7 +62,7 @@ rule
 |               /\=(?=begin\b)/         { result arg_state, TOKENS[text], text }
 ruby22_label?   /\"#{SIMPLE_STRING}\":/o process_label
-                /\"(#{SIMPLE_STRING})\"/o { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
+                /\"(#{SIMPLE_STRING})\"/o process_simple_string
                 /\"/                    { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
                 /\@\@?\d/               { rb_compile_error "`#{text}` is not allowed as a variable name" }
@@ -164,13 +164,12 @@ was_label?        /\'#{SSTRING}\':?/o   process_label_or_string
 | in_fname?     /\$([1-9]\d*)/                   process_gvar
 |               /\$([1-9]\d*)/                   process_nthref
 |               /\$0/                            process_gvar
-|               /\$[^[:ascii:]]+/                process_gvar
+|               /\$#{IDENT_CHAR}+/               process_gvar
 |               /\$\W|\$\z/                      process_gvar_oddity
-|               /\$\w+/                          process_gvar
                 /\_/                    process_underscore
-                /#{IDENT}/o             process_token
+                /#{IDENT_CHAR}+/o       process_token
                 /\004|\032|\000|\Z/     { [RubyLexer::EOF, RubyLexer::EOF] }

data/lib/ruby_lexer.rex.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # encoding: UTF-8
 #--
 # This file is automatically generated. Do not modify it.
-# Generated by: oedipus_lex version 2.5.1.
+# Generated by: oedipus_lex version 2.5.2.
 # Source: lib/ruby_lexer.rex
 #++
@@ -16,8 +16,8 @@ class RubyLexer
   require 'strscan'
   # :stopdoc:
-  IDENT         = /^#{IDENT_CHAR}+/o
-  ESC           = /\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]{1,4}|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
+  IDENT_CHAR    = /[a-zA-Z0-9_[:^ascii:]]/
+  ESC           = /\\((?>[0-7]{1,3}|x\h{1,2}|M-[^\\]|(C-|c)[^\\]|u\h{1,4}|u\{\h+(?:\s+\h+)*\}|[^0-7xMCc]))/
   SIMPLE_STRING = /((#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*)/o
   SSTRING       = /((\\.|[^\'])*)/
   INT_DEC       = /[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)(ri|r|i)?\b|0d[0-9_]+)(ri|r|i)?/i
@@ -138,7 +138,7 @@ class RubyLexer
           when ss.match?(/\./) then
             case
             when text = ss.scan(/\.\.\.?/) then
-              action { result EXPR_BEG, TOKENS[text], text }
+              process_dots text
             when ss.skip(/\.\d/) then
               action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
             when ss.skip(/\./) then
@@ -160,7 +160,7 @@ class RubyLexer
           when ruby22_label? && (text = ss.scan(/\"#{SIMPLE_STRING}\":/o)) then
             process_label text
           when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
-            action { result EXPR_END, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
+            process_simple_string text
           when text = ss.scan(/\"/) then
             action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
           when text = ss.scan(/\@\@?\d/) then
@@ -328,16 +328,14 @@ class RubyLexer
               process_nthref text
             when text = ss.scan(/\$0/) then
               process_gvar text
-            when text = ss.scan(/\$[^[:ascii:]]+/) then
+            when text = ss.scan(/\$#{IDENT_CHAR}+/) then
               process_gvar text
             when text = ss.scan(/\$\W|\$\z/) then
               process_gvar_oddity text
-            when text = ss.scan(/\$\w+/) then
-              process_gvar text
             end # group /\$/
           when text = ss.scan(/\_/) then
             process_underscore text
-          when text = ss.scan(/#{IDENT}/o) then
+          when text = ss.scan(/#{IDENT_CHAR}+/o) then
             process_token text
           when ss.skip(/\004|\032|\000|\Z/) then
             action { [RubyLexer::EOF, RubyLexer::EOF] }

data/lib/ruby_parser.rb CHANGED Viewed

@@ -78,10 +78,14 @@ require "ruby23_parser"
 require "ruby24_parser"
 require "ruby25_parser"
 require "ruby26_parser"
+require "ruby27_parser"
+require "ruby30_parser"
 class RubyParser # HACK
   VERSIONS.clear # also a HACK caused by racc namespace issues
+  class V30 < ::Ruby30Parser; end
+  class V27 < ::Ruby27Parser; end
   class V26 < ::Ruby26Parser; end
   class V25 < ::Ruby25Parser; end
   class V24 < ::Ruby24Parser; end