RubyGems - ruby_parser - Versions diffs - 3.17.0 → 3.18.0 - Mend

ruby_parser 3.17.0 → 3.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/History.rdoc +76 -0
data/Manifest.txt +3 -0
data/README.rdoc +1 -0
data/Rakefile +68 -18
data/bin/ruby_parse_extract_error +1 -1
data/compare/normalize.rb +6 -1
data/gauntlet.md +106 -0
data/lib/rp_extensions.rb +15 -36
data/lib/rp_stringscanner.rb +20 -51
data/lib/ruby20_parser.rb +3445 -3394
data/lib/ruby20_parser.y +326 -248
data/lib/ruby21_parser.rb +3543 -3511
data/lib/ruby21_parser.y +321 -245
data/lib/ruby22_parser.rb +3553 -3512
data/lib/ruby22_parser.y +325 -247
data/lib/ruby23_parser.rb +3566 -3530
data/lib/ruby23_parser.y +325 -247
data/lib/ruby24_parser.rb +3595 -3548
data/lib/ruby24_parser.y +325 -247
data/lib/ruby25_parser.rb +3595 -3547
data/lib/ruby25_parser.y +325 -247
data/lib/ruby26_parser.rb +3605 -3560
data/lib/ruby26_parser.y +324 -246
data/lib/ruby27_parser.rb +4657 -3539
data/lib/ruby27_parser.y +878 -253
data/lib/ruby30_parser.rb +5230 -3882
data/lib/ruby30_parser.y +1069 -321
data/lib/ruby3_parser.yy +3467 -0
data/lib/ruby_lexer.rb +261 -609
data/lib/ruby_lexer.rex +27 -20
data/lib/ruby_lexer.rex.rb +59 -23
data/lib/ruby_lexer_strings.rb +638 -0
data/lib/ruby_parser.yy +910 -263
data/lib/ruby_parser_extras.rb +289 -114
data/test/test_ruby_lexer.rb +181 -129
data/test/test_ruby_parser.rb +1213 -108
data/tools/munge.rb +34 -6
data/tools/ripper.rb +15 -10
data.tar.gz.sig +0 -0
metadata +11 -12
metadata.gz.sig +0 -0

data/lib/ruby_lexer_strings.rb ADDED Viewed

@@ -0,0 +1,638 @@
+# frozen_string_literal: true
+class RubyLexer
+  def eat_whitespace
+    r = scan(/\s+/)
+    self.lineno += r.count("\n") if r
+    r += eat_whitespace if eos? && in_heredoc? # forces heredoc pop
+    r
+  end
+  def heredoc here                              # ../compare/parse30.y:7678
+    _, term, func, _indent_max, _lineno, range = here
+    start_line = lineno
+    eos = term # HACK
+    indent = func =~ STR_FUNC_INDENT
+    self.string_buffer = []
+    last_line = self.ss_string[range] if range
+    eol = last_line && last_line.end_with?("\r\n") ? "\r\n" : "\n" # HACK
+    expand = func =~ STR_FUNC_EXPAND
+    # TODO? p->heredoc_line_indent == -1
+    indent_re = indent ? "[ \t]*" : nil
+    eos_re    = /#{indent_re}#{Regexp.escape eos}(?=\r?\n|\z)/
+    err_msg   = "can't match #{eos_re.inspect} anywhere in "
+    maybe_pop_stack
+    rb_compile_error err_msg if end_of_stream?
+    if beginning_of_line? && scan(eos_re) then
+      scan(/\r?\n|\z/)
+      self.lineno += 1 if matched =~ /\n/
+      heredoc_restore
+      self.lex_strterm = nil
+      self.lex_state = EXPR_END
+      return :tSTRING_END, [term, func, range]
+    end
+    if expand then
+      case
+      when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
+        # TODO: !ISASCII
+        # ?! see parser_peek_variable_name
+        return :tSTRING_DVAR, matched
+      when scan(/#(?=\@\@?[a-zA-Z_])/) then
+        # TODO: !ISASCII
+        return :tSTRING_DVAR, matched
+      when scan(/#[{]/) then
+        self.command_start = true
+        return :tSTRING_DBEG, matched
+      when scan(/#/) then
+        string_buffer << "#"
+      end
+      begin
+        # NOTE: this visibly diverges from the C code but uses tokadd_string
+        #       to stay clean.
+        str = tokadd_string func, eol, nil
+        rb_compile_error err_msg if str == RubyLexer::EOF
+        if str != eol then
+          str = string_buffer.join
+          string_buffer.clear
+          return result nil, :tSTRING_CONTENT, str, start_line
+        else
+          string_buffer << scan(/\r?\n/)
+          self.lineno += 1 # TODO: try to remove most scan(/\n/) and friends
+        end
+      end until check eos_re
+    else
+      until check(eos_re) do
+        string_buffer << scan(/.*(\r?\n|\z)/)
+        self.lineno += 1
+        rb_compile_error err_msg if end_of_stream?
+      end
+    end
+    string_content = begin
+                       s = string_buffer.join
+                       s.b.force_encoding Encoding::UTF_8
+                       s
+                     end
+    string_buffer.clear
+    result nil, :tSTRING_CONTENT, string_content, start_line
+  end
+  def heredoc_identifier                        # ../compare/parse30.y:7354
+    token  = :tSTRING_BEG
+    func   = STR_FUNC_BORING
+    term   = nil
+    indent = nil
+    quote  = nil
+    char_pos = nil
+    byte_pos = nil
+    heredoc_indent_mods = "-"
+    heredoc_indent_mods += '\~' if ruby23plus?
+    case
+    when scan(/([#{heredoc_indent_mods}]?)([\'\"\`])(.*?)\2/) then
+      mods, quote, term = match[1], match[2], match[3]
+      char_pos = ss.charpos
+      byte_pos = ss.pos
+      func |= STR_FUNC_INDENT unless mods.empty?
+      func |= STR_FUNC_DEDENT if mods == "~"
+      func |= case quote
+              when "\'" then
+                STR_SQUOTE
+              when '"' then
+                STR_DQUOTE
+              when "`" then
+                token = :tXSTRING_BEG
+                STR_XQUOTE
+              else
+                debug 1
+              end
+    when scan(/[#{heredoc_indent_mods}]?([\'\"\`])(?!\1*\Z)/) then
+      rb_compile_error "unterminated here document identifier"
+    when scan(/([#{heredoc_indent_mods}]?)(#{IDENT_CHAR}+)/) then
+      mods, term = match[1], match[2]
+      quote = '"'
+      char_pos = ss.charpos
+      byte_pos = ss.pos
+      func |= STR_FUNC_INDENT unless mods.empty?
+      func |= STR_FUNC_DEDENT if mods == "~"
+      func |= STR_DQUOTE
+    else
+      return
+    end
+    old_lineno = self.lineno
+    rest_of_line = scan(/.*(?:\r?\n|\z)/)
+    self.lineno += rest_of_line.count "\n"
+    char_pos_end = ss.charpos - 1
+    range = nil
+    range = char_pos..char_pos_end unless rest_of_line.empty?
+    self.lex_strterm = [:heredoc, term, func, indent, old_lineno, range, byte_pos]
+    result nil, token, quote, old_lineno
+  end
+  def heredoc_restore                           # ../compare/parse30.y:7438
+    _, _term, _func, _indent, lineno, range, bytepos = lex_strterm
+    new_ss = ss.class.new self.ss_string[0..range.max]
+    new_ss.pos = bytepos
+    lineno_push lineno
+    ss_push new_ss
+    nil
+  end
+  def newtok
+    string_buffer.clear
+  end
+  def nextc
+    # TODO:
+    # if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
+    #     if (nextline(p)) return -1;
+    # }
+    maybe_pop_stack
+    c = ss.getch
+    if c == "\n" then
+      ss.unscan
+      c = nil
+    end
+    c
+  end
+  def parse_string quote                         # ../compare/parse30.y:7273
+    _, func, term, paren = quote
+    qwords = func =~ STR_FUNC_QWORDS
+    regexp = func =~ STR_FUNC_REGEXP
+    expand = func =~ STR_FUNC_EXPAND
+    list   = func =~ STR_FUNC_LIST
+    termx  = func =~ STR_FUNC_TERM # TODO: document wtf this means
+    space = false
+    term_re = regexp_cache[term]
+    if termx then
+      # self.nextc if qwords # delayed term
+      self.lex_strterm = nil
+      return result EXPR_END, regexp ? :tREGEXP_END : :tSTRING_END, term
+    end
+    space = true if qwords and eat_whitespace
+    if list then
+      debug 4
+      # quote[1] -= STR_FUNC_LIST
+      # space = true
+    end
+    # TODO: move to quote.nest!
+    if string_nest == 0 && scan(term_re) then
+      if qwords then
+        quote[1] |= STR_FUNC_TERM
+        return :tSPACE, matched
+      end
+      return string_term func
+    end
+    return result nil, :tSPACE, " " if space
+    newtok
+    if expand && check(/#/) then
+      t = self.scan_variable_name
+      return t if t
+      tokadd "#"
+    end
+    # TODO: add string_nest, enc, base_enc ?
+    lineno = self.lineno
+    if tokadd_string(func, term, paren) == RubyLexer::EOF then
+      if qwords then
+        rb_compile_error "unterminated list meets end of file"
+      end
+      if regexp then
+        rb_compile_error "unterminated regexp meets end of file"
+      else
+        rb_compile_error "unterminated string meets end of file"
+      end
+    end
+    result nil, :tSTRING_CONTENT, string_buffer.join, lineno
+  end
+  # called from process_percent
+  def process_percent_quote                      # ../compare/parse30.y:8645
+    c = getch # type %<type><term>...<term>
+    long_hand = !!(c =~ /[QqWwIixrs]/)
+    if end_of_stream? || c !~ /\p{Alnum}/ then
+      term = c # TODO? PERCENT_END[c] || c
+      debug 2 if c && c !~ /\p{ASCII}/
+      c = "Q"
+    else
+      term = getch
+      debug 3 if term =~ /\p{Alnum}|\P{ASCII}/
+    end
+    if end_of_stream? or c == RubyLexer::EOF or term == RubyLexer::EOF then
+      rb_compile_error "unterminated quoted string meets end of file"
+    end
+    # "\0" is special to indicate beg=nnd and that no nesting?
+    paren = term
+    term = PERCENT_END[term]
+    term, paren = paren, "\0" if term.nil? # TODO: "\0" -> nil
+    text = long_hand ? "%#{c}#{paren}" : "%#{term}"
+    current_line = self.lineno
+    token_type, string_type =
+      case c
+      when "Q" then
+        [:tSTRING_BEG,   STR_DQUOTE]
+      when "q" then
+        [:tSTRING_BEG,   STR_SQUOTE]
+      when "W" then
+        eat_whitespace
+        [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
+      when "w" then
+        eat_whitespace
+        [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
+      when "I" then
+        eat_whitespace
+        [:tSYMBOLS_BEG,  STR_DQUOTE | STR_FUNC_QWORDS]
+      when "i" then
+        eat_whitespace
+        [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
+      when "x" then
+        [:tXSTRING_BEG,  STR_XQUOTE]
+      when "r" then
+        [:tREGEXP_BEG,   STR_REGEXP]
+      when "s" then
+        self.lex_state = EXPR_FNAME
+        [:tSYMBEG,       STR_SSYM]
+      else
+        rb_compile_error "unknown type of %string. Expected [QqWwIixrs], found '#{c}'."
+      end
+    string string_type, term, paren
+    result nil, token_type, text, current_line
+  end
+  def process_string_or_heredoc                  # ../compare/parse30.y:9075
+    if lex_strterm[0] == :heredoc then
+      self.heredoc lex_strterm
+    else
+      self.parse_string lex_strterm
+    end
+  end
+  def read_escape flags = nil                    # ../compare/parse30.y:6712
+    case
+    when scan(/\\/) then                  # Backslash
+      '\\'
+    when scan(/n/) then                   # newline
+      "\n"
+    when scan(/t/) then                   # horizontal tab
+      "\t"
+    when scan(/r/) then                   # carriage-return
+      "\r"
+    when scan(/f/) then                   # form-feed
+      "\f"
+    when scan(/v/) then                   # vertical tab
+      "\13"
+    when scan(/a/) then                   # alarm(bell)
+      "\007"
+    when scan(/e/) then                   # escape
+      "\033"
+    when scan(/[0-7]{1,3}/) then          # octal constant
+      (matched.to_i(8) & 0xFF).chr.force_encoding Encoding::UTF_8
+    when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
+      # TODO: force encode everything to UTF-8?
+      match[1].to_i(16).chr.force_encoding Encoding::UTF_8
+    when scan(/b/) then                   # backspace
+      "\010"
+    when scan(/s/) then                   # space
+      " "
+    when check(/M-\\u/) then
+      debug 5
+    when scan(/M-\\(?=.)/) then
+      c = read_escape
+      c[0] = (c[0].ord | 0x80).chr
+      c
+    when scan(/M-(\p{ASCII})/) then
+      # TODO: ISCNTRL(c) -> goto eof
+      c = match[1]
+      c[0] = (c[0].ord | 0x80).chr
+      c
+    when check(/(C-|c)\\u/) then
+      debug 6
+    when scan(/(C-|c)\\?\?/) then
+      127.chr
+    when scan(/(C-|c)\\/) then
+      c = read_escape
+      c[0] = (c[0].ord & 0x9f).chr
+      c
+    when scan(/(?:C-|c)(.)/) then
+      c = match[1]
+      c[0] = (c[0].ord & 0x9f).chr
+      c
+    when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
+      matched
+    when scan(/u(\h{4})/) then
+      [match[1].to_i(16)].pack("U")
+    when scan(/u(\h{1,3})/) then
+      debug 7
+      rb_compile_error "Invalid escape character syntax"
+    when scan(/u\{(\h+(?: +\h+)*)\}/) then
+      match[1].split.map { |s| s.to_i(16) }.pack("U*")
+    when scan(/[McCx0-9]/) || end_of_stream? then
+      rb_compile_error("Invalid escape character syntax")
+    else
+      getch
+    end.dup
+  end
+  def regx_options                               # ../compare/parse30.y:6914
+    newtok
+    options = scan(/\p{Alpha}+/) || ""
+    rb_compile_error("unknown regexp options: %s" % [options]) if
+      options =~ /[^ixmonesu]/
+    options
+  end
+  def scan_variable_name                        # ../compare/parse30.y:7208
+    case
+    when scan(/#(?=\$(-.|[a-zA-Z_0-9~\*\$\?!@\/\\;,\.=:<>\"\&\`\'+]))/) then
+      # TODO: !ISASCII
+      return :tSTRING_DVAR, matched
+    when scan(/#(?=\@\@?[a-zA-Z_])/) then
+      # TODO: !ISASCII
+      return :tSTRING_DVAR, matched
+    when scan(/#[{]/) then
+      self.command_start = true
+      return :tSTRING_DBEG, matched
+    when scan(/#/) then
+      # do nothing but swallow
+    end
+    # if scan(/\P{ASCII}|_|\p{Alpha}/) then # TODO: fold into above DVAR cases
+    #   # if (!ISASCII(c) || c == '_' || ISALPHA(c))
+    #   #     return tSTRING_DVAR;
+    # end
+    nil
+  end
+  def string type, beg, nnd = nil
+    # label = (IS_LABEL_POSSIBLE() ? str_label : 0);
+    # p->lex.strterm = NEW_STRTERM(str_dquote | label, '"', 0);
+    # p->lex.ptok = p->lex.pcur-1;
+    type |= STR_FUNC_LABEL if is_label_possible?
+    self.lex_strterm = [:strterm, type, beg, nnd || "\0"]
+  end
+  def string_term func                          # ../compare/parse30.y:7254
+    self.lex_strterm = nil
+    return result EXPR_END, :tREGEXP_END, self.regx_options if
+      func =~ STR_FUNC_REGEXP
+    if func =~ STR_FUNC_LABEL && is_label_suffix? then
+      self.getch
+      self.lex_state = EXPR_BEG|EXPR_LABEL
+      return :tLABEL_END, string_buffer.join
+    end
+    self.lex_state = EXPR_END
+    return :tSTRING_END, [self.matched, func]
+  end
+  def tokadd c                                  # ../compare/parse30.y:6548
+    string_buffer << c
+  end
+  def tokadd_escape                              # ../compare/parse30.y:6840
+    case
+    when scan(/\\\n/) then
+      # just ignore
+    when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
+      tokadd matched
+    when scan(/\\([MC]-|c)(?=\\)/) then
+      tokadd matched
+      self.tokadd_escape
+    when scan(/\\([MC]-|c)(.)/) then
+      tokadd matched
+      self.tokadd_escape if check(/\\/) # recurse if continued!
+    when scan(/\\[McCx]/) then # all unprocessed branches from above have failed
+      rb_compile_error "Invalid escape character syntax"
+    when scan(/\\(.)/m) then
+      chr, = match[1]
+      tokadd "\\"
+      tokadd chr
+    else
+      rb_compile_error "Invalid escape character syntax: %p" % [self.rest.lines.first]
+    end
+  end
+  def tokadd_string func, term, paren           # ../compare/parse30.y:7020
+    qwords = func =~ STR_FUNC_QWORDS
+    escape = func =~ STR_FUNC_ESCAPE
+    expand = func =~ STR_FUNC_EXPAND
+    regexp = func =~ STR_FUNC_REGEXP
+    paren_re = regexp_cache[paren] if paren != "\0"
+    term_re  = if term == "\n"
+                 /\r?\n/
+               else
+                 regexp_cache[term]
+               end
+    until end_of_stream? do
+      case
+      when paren_re && scan(paren_re) then
+        self.string_nest += 1
+      when scan(term_re) then
+        if self.string_nest == 0 then
+          self.pos -= 1 # TODO: ss.unscan 665 errors #$ HACK: why do we depend on this so hard?
+          break # leave eos loop, go parse term in caller (heredoc or parse_string)
+        else
+          self.lineno += matched.count("\n")
+          self.string_nest -= 1
+        end
+      when expand && check(/#[\$\@\{]/) then
+        # do nothing since we used `check`
+        break # leave eos loop
+      when check(/\\/) then
+        case
+        when scan(/\\\n/) then
+          self.lineno += 1
+          case
+          when qwords then
+            tokadd "\n"
+            next
+          when expand then
+            next if func !~ STR_FUNC_INDENT
+            if term == "\n" then
+              unscan     # rollback
+              scan(/\\/) # and split
+              scan(/\n/) # this is `matched`
+              break
+            end
+            tokadd "\\"
+            debug 9
+          else
+            unscan     # rollback
+            scan(/\\/) # this is `matched`
+          end
+        when check(/\\\\/) then
+          tokadd '\\' if escape
+          nextc # ignore 1st \\
+          nextc # for tokadd ss.matched, below
+        when scan(/\\u/) then
+          unless expand then
+            tokadd "\\"
+            next
+          end
+          tokadd_utf8 term, func, regexp
+          next
+        else
+          scan(/\\/) # eat it, we know it's there
+          return RubyLexer::EOF if end_of_stream?
+          if scan(/\P{ASCII}/) then
+            tokadd "\\" unless expand
+            tokadd self.matched
+            next
+          end
+          case
+          when regexp then
+            if term !~ SIMPLE_RE_META && scan(term_re) then
+              tokadd matched
+              next
+            end
+            self.pos -= 1 # TODO: ss.unscan 15 errors
+            # HACK? decide whether to eat the \\ above
+            if _esc = tokadd_escape && end_of_stream? then
+              debug 10
+            end
+            next # C's continue = Ruby's next
+          when expand then
+            tokadd "\\" if escape
+            tokadd read_escape
+            next
+          when qwords && scan(/\s/) then
+            # ignore backslashed spaces in %w
+          when !check(term_re) && !(paren_re && check(paren_re)) then
+            tokadd "\\"
+            next
+          else
+            getch # slurp it too for matched below
+          end
+        end # inner case for /\\/
+      when scan(/\P{ASCII}/) then
+        # not currently checking encoding stuff -- drops to tokadd below
+      when qwords && check(/\s/) then
+        break # leave eos loop
+      else
+        t  = Regexp.escape term == "\n" ? "\r\n" : term
+        x  = Regexp.escape paren if paren && paren != "\000"
+        q  = "\\s" if qwords
+        re = /[^#{t}#{x}\#\\#{q}]+/
+        scan re or getch
+        self.lineno += matched.count "\n" if matched
+      end # big case
+      tokadd self.matched
+    end # until end_of_stream?
+    if self.matched then
+      self.matched
+    elsif end_of_stream? then
+      RubyLexer::EOF
+    end
+  end # tokadd_string
+  def tokadd_utf8 term, func, regexp_literal    # ../compare/parse30.y:6646
+    tokadd "\\u" if regexp_literal
+    case
+    when scan(/\h{4}/) then
+      codepoint = [matched.to_i(16)].pack("U")
+      tokadd regexp_literal ? matched : codepoint
+    when scan(/\{\s*(\h{1,6}(?:\s+\h{1,6})*)\s*\}/) then
+      codepoints = match[1].split.map { |s| s.to_i 16 }.pack("U")
+      if regexp_literal then
+        tokadd "{"
+        tokadd match[1].split.join(" ")
+        tokadd "}"
+      else
+        tokadd codepoints
+      end
+    else
+      rb_compile_error "unterminated Unicode escape"
+    end
+  end
+end