prism 0.29.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +115 -1
- data/CONTRIBUTING.md +0 -4
- data/Makefile +1 -1
- data/README.md +4 -0
- data/config.yml +920 -148
- data/docs/build_system.md +8 -11
- data/docs/fuzzing.md +1 -1
- data/docs/parsing_rules.md +4 -1
- data/docs/relocation.md +34 -0
- data/docs/ripper_translation.md +22 -0
- data/docs/serialization.md +3 -0
- data/ext/prism/api_node.c +2863 -2079
- data/ext/prism/extconf.rb +14 -37
- data/ext/prism/extension.c +241 -391
- data/ext/prism/extension.h +2 -2
- data/include/prism/ast.h +2156 -453
- data/include/prism/defines.h +58 -7
- data/include/prism/diagnostic.h +24 -6
- data/include/prism/node.h +0 -21
- data/include/prism/options.h +94 -3
- data/include/prism/parser.h +82 -40
- data/include/prism/regexp.h +18 -8
- data/include/prism/static_literals.h +3 -2
- data/include/prism/util/pm_char.h +1 -2
- data/include/prism/util/pm_constant_pool.h +0 -8
- data/include/prism/util/pm_integer.h +22 -15
- data/include/prism/util/pm_newline_list.h +11 -0
- data/include/prism/util/pm_string.h +28 -12
- data/include/prism/version.h +3 -3
- data/include/prism.h +47 -11
- data/lib/prism/compiler.rb +3 -0
- data/lib/prism/desugar_compiler.rb +111 -74
- data/lib/prism/dispatcher.rb +16 -1
- data/lib/prism/dot_visitor.rb +55 -34
- data/lib/prism/dsl.rb +660 -468
- data/lib/prism/ffi.rb +113 -8
- data/lib/prism/inspect_visitor.rb +296 -64
- data/lib/prism/lex_compat.rb +1 -1
- data/lib/prism/mutation_compiler.rb +11 -6
- data/lib/prism/node.rb +4262 -5023
- data/lib/prism/node_ext.rb +91 -14
- data/lib/prism/parse_result/comments.rb +0 -7
- data/lib/prism/parse_result/errors.rb +65 -0
- data/lib/prism/parse_result/newlines.rb +101 -11
- data/lib/prism/parse_result.rb +183 -6
- data/lib/prism/reflection.rb +12 -10
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +496 -609
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/compiler.rb +185 -155
- data/lib/prism/translation/parser/lexer.rb +26 -4
- data/lib/prism/translation/parser.rb +9 -4
- data/lib/prism/translation/ripper.rb +23 -25
- data/lib/prism/translation/ruby_parser.rb +86 -17
- data/lib/prism/visitor.rb +3 -0
- data/lib/prism.rb +6 -8
- data/prism.gemspec +9 -5
- data/rbi/prism/dsl.rbi +521 -0
- data/rbi/prism/node.rbi +1115 -1120
- data/rbi/prism/parse_result.rbi +29 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/visitor.rbi +3 -0
- data/rbi/prism.rbi +36 -30
- data/sig/prism/dsl.rbs +190 -303
- data/sig/prism/mutation_compiler.rbs +1 -0
- data/sig/prism/node.rbs +678 -632
- data/sig/prism/parse_result.rbs +22 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism/visitor.rbs +1 -0
- data/sig/prism.rbs +103 -64
- data/src/diagnostic.c +64 -28
- data/src/node.c +502 -1739
- data/src/options.c +76 -27
- data/src/prettyprint.c +188 -112
- data/src/prism.c +3376 -2293
- data/src/regexp.c +208 -71
- data/src/serialize.c +182 -50
- data/src/static_literals.c +64 -85
- data/src/token_type.c +4 -4
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +0 -8
- data/src/util/pm_integer.c +53 -25
- data/src/util/pm_newline_list.c +29 -0
- data/src/util/pm_string.c +131 -80
- data/src/util/pm_strpbrk.c +32 -6
- metadata +11 -7
- data/include/prism/util/pm_string_list.h +0 -44
- data/lib/prism/debug.rb +0 -249
- data/lib/prism/translation/parser/rubocop.rb +0 -73
- data/src/util/pm_string_list.c +0 -28
    
        data/lib/prism/node_ext.rb
    CHANGED
    
    | @@ -5,10 +5,13 @@ | |
| 5 5 | 
             
            module Prism
         | 
| 6 6 | 
             
              class Node
         | 
| 7 7 | 
             
                def deprecated(*replacements) # :nodoc:
         | 
| 8 | 
            +
                  location = caller_locations(1, 1)
         | 
| 9 | 
            +
                  location = location[0].label if location
         | 
| 8 10 | 
             
                  suggest = replacements.map { |replacement| "#{self.class}##{replacement}" }
         | 
| 11 | 
            +
             | 
| 9 12 | 
             
                  warn(<<~MSG, category: :deprecated)
         | 
| 10 | 
            -
                    [deprecation]: #{self.class}##{ | 
| 11 | 
            -
                     | 
| 13 | 
            +
                    [deprecation]: #{self.class}##{location} is deprecated and will be \
         | 
| 14 | 
            +
                    removed in the next major version. Use #{suggest.join("/")} instead.
         | 
| 12 15 | 
             
                    #{(caller(1, 3) || []).join("\n")}
         | 
| 13 16 | 
             
                  MSG
         | 
| 14 17 | 
             
                end
         | 
| @@ -18,7 +21,10 @@ module Prism | |
| 18 21 | 
             
                # Returns a numeric value that represents the flags that were used to create
         | 
| 19 22 | 
             
                # the regular expression.
         | 
| 20 23 | 
             
                def options
         | 
| 21 | 
            -
                  o =  | 
| 24 | 
            +
                  o = 0
         | 
| 25 | 
            +
                  o |= Regexp::IGNORECASE if flags.anybits?(RegularExpressionFlags::IGNORE_CASE)
         | 
| 26 | 
            +
                  o |= Regexp::EXTENDED if flags.anybits?(RegularExpressionFlags::EXTENDED)
         | 
| 27 | 
            +
                  o |= Regexp::MULTILINE if flags.anybits?(RegularExpressionFlags::MULTI_LINE)
         | 
| 22 28 | 
             
                  o |= Regexp::FIXEDENCODING if flags.anybits?(RegularExpressionFlags::EUC_JP | RegularExpressionFlags::WINDOWS_31J | RegularExpressionFlags::UTF_8)
         | 
| 23 29 | 
             
                  o |= Regexp::NOENCODING if flags.anybits?(RegularExpressionFlags::ASCII_8BIT)
         | 
| 24 30 | 
             
                  o
         | 
| @@ -66,11 +72,12 @@ module Prism | |
| 66 72 | 
             
                def to_interpolated
         | 
| 67 73 | 
             
                  InterpolatedStringNode.new(
         | 
| 68 74 | 
             
                    source,
         | 
| 75 | 
            +
                    -1,
         | 
| 76 | 
            +
                    location,
         | 
| 69 77 | 
             
                    frozen? ? InterpolatedStringNodeFlags::FROZEN : 0,
         | 
| 70 78 | 
             
                    opening_loc,
         | 
| 71 | 
            -
                    [copy( | 
| 72 | 
            -
                    closing_loc | 
| 73 | 
            -
                    location
         | 
| 79 | 
            +
                    [copy(location: content_loc, opening_loc: nil, closing_loc: nil)],
         | 
| 80 | 
            +
                    closing_loc
         | 
| 74 81 | 
             
                  )
         | 
| 75 82 | 
             
                end
         | 
| 76 83 | 
             
              end
         | 
| @@ -83,10 +90,12 @@ module Prism | |
| 83 90 | 
             
                def to_interpolated
         | 
| 84 91 | 
             
                  InterpolatedXStringNode.new(
         | 
| 85 92 | 
             
                    source,
         | 
| 93 | 
            +
                    -1,
         | 
| 94 | 
            +
                    location,
         | 
| 95 | 
            +
                    flags,
         | 
| 86 96 | 
             
                    opening_loc,
         | 
| 87 | 
            -
                    [StringNode.new(source, 0, nil, content_loc, nil, unescaped | 
| 88 | 
            -
                    closing_loc | 
| 89 | 
            -
                    location
         | 
| 97 | 
            +
                    [StringNode.new(source, node_id, content_loc, 0, nil, content_loc, nil, unescaped)],
         | 
| 98 | 
            +
                    closing_loc
         | 
| 90 99 | 
             
                  )
         | 
| 91 100 | 
             
                end
         | 
| 92 101 | 
             
              end
         | 
| @@ -103,7 +112,19 @@ module Prism | |
| 103 112 | 
             
              class RationalNode < Node
         | 
| 104 113 | 
             
                # Returns the value of the node as a Ruby Rational.
         | 
| 105 114 | 
             
                def value
         | 
| 106 | 
            -
                  Rational( | 
| 115 | 
            +
                  Rational(numerator, denominator)
         | 
| 116 | 
            +
                end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                # Returns the value of the node as an IntegerNode or a FloatNode. This
         | 
| 119 | 
            +
                # method is deprecated in favor of #value or #numerator/#denominator.
         | 
| 120 | 
            +
                def numeric
         | 
| 121 | 
            +
                  deprecated("value", "numerator", "denominator")
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                  if denominator == 1
         | 
| 124 | 
            +
                    IntegerNode.new(source, -1, location.chop, flags, numerator)
         | 
| 125 | 
            +
                  else
         | 
| 126 | 
            +
                    FloatNode.new(source, -1, location.chop, 0, numerator.to_f / denominator)
         | 
| 127 | 
            +
                  end
         | 
| 107 128 | 
             
                end
         | 
| 108 129 | 
             
              end
         | 
| 109 130 |  | 
| @@ -180,7 +201,12 @@ module Prism | |
| 180 201 | 
             
                # continue to supply that API.
         | 
| 181 202 | 
             
                def child
         | 
| 182 203 | 
             
                  deprecated("name", "name_loc")
         | 
| 183 | 
            -
             | 
| 204 | 
            +
             | 
| 205 | 
            +
                  if name
         | 
| 206 | 
            +
                    ConstantReadNode.new(source, -1, name_loc, 0, name)
         | 
| 207 | 
            +
                  else
         | 
| 208 | 
            +
                    MissingNode.new(source, -1, location, 0)
         | 
| 209 | 
            +
                  end
         | 
| 184 210 | 
             
                end
         | 
| 185 211 | 
             
              end
         | 
| 186 212 |  | 
| @@ -216,7 +242,12 @@ module Prism | |
| 216 242 | 
             
                # continue to supply that API.
         | 
| 217 243 | 
             
                def child
         | 
| 218 244 | 
             
                  deprecated("name", "name_loc")
         | 
| 219 | 
            -
             | 
| 245 | 
            +
             | 
| 246 | 
            +
                  if name
         | 
| 247 | 
            +
                    ConstantReadNode.new(source, -1, name_loc, 0, name)
         | 
| 248 | 
            +
                  else
         | 
| 249 | 
            +
                    MissingNode.new(source, -1, location, 0)
         | 
| 250 | 
            +
                  end
         | 
| 220 251 | 
             
                end
         | 
| 221 252 | 
             
              end
         | 
| 222 253 |  | 
| @@ -249,9 +280,10 @@ module Prism | |
| 249 280 | 
             
                  end
         | 
| 250 281 |  | 
| 251 282 | 
             
                  posts.each do |param|
         | 
| 252 | 
            -
                     | 
| 283 | 
            +
                    case param
         | 
| 284 | 
            +
                    when MultiTargetNode
         | 
| 253 285 | 
             
                      names << [:req]
         | 
| 254 | 
            -
                     | 
| 286 | 
            +
                    when NoKeywordsParameterNode, KeywordRestParameterNode, ForwardingParameterNode
         | 
| 255 287 | 
             
                      # Invalid syntax, e.g. "def f(**nil, ...)" moves the NoKeywordsParameterNode to posts
         | 
| 256 288 | 
             
                      raise "Invalid syntax"
         | 
| 257 289 | 
             
                    else
         | 
| @@ -428,4 +460,49 @@ module Prism | |
| 428 460 | 
             
                  binary_operator_loc
         | 
| 429 461 | 
             
                end
         | 
| 430 462 | 
             
              end
         | 
| 463 | 
            +
             | 
| 464 | 
            +
              class CaseMatchNode < Node
         | 
| 465 | 
            +
                # Returns the else clause of the case match node. This method is deprecated
         | 
| 466 | 
            +
                # in favor of #else_clause.
         | 
| 467 | 
            +
                def consequent
         | 
| 468 | 
            +
                  deprecated("else_clause")
         | 
| 469 | 
            +
                  else_clause
         | 
| 470 | 
            +
                end
         | 
| 471 | 
            +
              end
         | 
| 472 | 
            +
             | 
| 473 | 
            +
              class CaseNode < Node
         | 
| 474 | 
            +
                # Returns the else clause of the case node. This method is deprecated in
         | 
| 475 | 
            +
                # favor of #else_clause.
         | 
| 476 | 
            +
                def consequent
         | 
| 477 | 
            +
                  deprecated("else_clause")
         | 
| 478 | 
            +
                  else_clause
         | 
| 479 | 
            +
                end
         | 
| 480 | 
            +
              end
         | 
| 481 | 
            +
             | 
| 482 | 
            +
              class IfNode < Node
         | 
| 483 | 
            +
                # Returns the subsequent if/elsif/else clause of the if node. This method is
         | 
| 484 | 
            +
                # deprecated in favor of #subsequent.
         | 
| 485 | 
            +
                def consequent
         | 
| 486 | 
            +
                  deprecated("subsequent")
         | 
| 487 | 
            +
                  subsequent
         | 
| 488 | 
            +
                end
         | 
| 489 | 
            +
              end
         | 
| 490 | 
            +
             | 
| 491 | 
            +
              class RescueNode < Node
         | 
| 492 | 
            +
                # Returns the subsequent rescue clause of the rescue node. This method is
         | 
| 493 | 
            +
                # deprecated in favor of #subsequent.
         | 
| 494 | 
            +
                def consequent
         | 
| 495 | 
            +
                  deprecated("subsequent")
         | 
| 496 | 
            +
                  subsequent
         | 
| 497 | 
            +
                end
         | 
| 498 | 
            +
              end
         | 
| 499 | 
            +
             | 
| 500 | 
            +
              class UnlessNode < Node
         | 
| 501 | 
            +
                # Returns the else clause of the unless node. This method is deprecated in
         | 
| 502 | 
            +
                # favor of #else_clause.
         | 
| 503 | 
            +
                def consequent
         | 
| 504 | 
            +
                  deprecated("else_clause")
         | 
| 505 | 
            +
                  else_clause
         | 
| 506 | 
            +
                end
         | 
| 507 | 
            +
              end
         | 
| 431 508 | 
             
            end
         | 
| @@ -183,12 +183,5 @@ module Prism | |
| 183 183 | 
             
                    [preceding, NodeTarget.new(node), following]
         | 
| 184 184 | 
             
                  end
         | 
| 185 185 | 
             
                end
         | 
| 186 | 
            -
             | 
| 187 | 
            -
                private_constant :Comments
         | 
| 188 | 
            -
             | 
| 189 | 
            -
                # Attach the list of comments to their respective locations in the tree.
         | 
| 190 | 
            -
                def attach_comments!
         | 
| 191 | 
            -
                  Comments.new(self).attach! # steep:ignore
         | 
| 192 | 
            -
                end
         | 
| 193 186 | 
             
              end
         | 
| 194 187 | 
             
            end
         | 
| @@ -0,0 +1,65 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require "stringio"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Prism
         | 
| 6 | 
            +
              class ParseResult < Result
         | 
| 7 | 
            +
                # An object to represent the set of errors on a parse result. This object
         | 
| 8 | 
            +
                # can be used to format the errors in a human-readable way.
         | 
| 9 | 
            +
                class Errors
         | 
| 10 | 
            +
                  # The parse result that contains the errors.
         | 
| 11 | 
            +
                  attr_reader :parse_result
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                  # Initialize a new set of errors from the given parse result.
         | 
| 14 | 
            +
                  def initialize(parse_result)
         | 
| 15 | 
            +
                    @parse_result = parse_result
         | 
| 16 | 
            +
                  end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                  # Formats the errors in a human-readable way and return them as a string.
         | 
| 19 | 
            +
                  def format
         | 
| 20 | 
            +
                    error_lines = {} #: Hash[Integer, Array[ParseError]]
         | 
| 21 | 
            +
                    parse_result.errors.each do |error|
         | 
| 22 | 
            +
                      location = error.location
         | 
| 23 | 
            +
                      (location.start_line..location.end_line).each do |line|
         | 
| 24 | 
            +
                        error_lines[line] ||= []
         | 
| 25 | 
            +
                        error_lines[line] << error
         | 
| 26 | 
            +
                      end
         | 
| 27 | 
            +
                    end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    source_lines = parse_result.source.source.lines
         | 
| 30 | 
            +
                    source_lines << "" if error_lines.key?(source_lines.size + 1)
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    io = StringIO.new
         | 
| 33 | 
            +
                    source_lines.each.with_index(1) do |line, line_number|
         | 
| 34 | 
            +
                      io.puts(line)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                      (error_lines.delete(line_number) || []).each do |error|
         | 
| 37 | 
            +
                        location = error.location
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                        case line_number
         | 
| 40 | 
            +
                        when location.start_line
         | 
| 41 | 
            +
                          io.print(" " * location.start_column + "^")
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                          if location.start_line == location.end_line
         | 
| 44 | 
            +
                            if location.start_column != location.end_column
         | 
| 45 | 
            +
                              io.print("~" * (location.end_column - location.start_column - 1))
         | 
| 46 | 
            +
                            end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                            io.puts(" " + error.message)
         | 
| 49 | 
            +
                          else
         | 
| 50 | 
            +
                            io.puts("~" * (line.bytesize - location.start_column))
         | 
| 51 | 
            +
                          end
         | 
| 52 | 
            +
                        when location.end_line
         | 
| 53 | 
            +
                          io.puts("~" * location.end_column + " " + error.message)
         | 
| 54 | 
            +
                        else
         | 
| 55 | 
            +
                          io.puts("~" * line.bytesize)
         | 
| 56 | 
            +
                        end
         | 
| 57 | 
            +
                      end
         | 
| 58 | 
            +
                    end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                    io.puts
         | 
| 61 | 
            +
                    io.string
         | 
| 62 | 
            +
                  end
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
            end
         | 
| @@ -17,21 +17,27 @@ module Prism | |
| 17 17 | 
             
                # Note that the logic in this file should be kept in sync with the Java
         | 
| 18 18 | 
             
                # MarkNewlinesVisitor, since that visitor is responsible for marking the
         | 
| 19 19 | 
             
                # newlines for JRuby/TruffleRuby.
         | 
| 20 | 
            +
                #
         | 
| 21 | 
            +
                # This file is autoloaded only when `mark_newlines!` is called, so the
         | 
| 22 | 
            +
                # re-opening of the various nodes in this file will only be performed in
         | 
| 23 | 
            +
                # that case. We do that to avoid storing the extra `@newline` instance
         | 
| 24 | 
            +
                # variable on every node if we don't need it.
         | 
| 20 25 | 
             
                class Newlines < Visitor
         | 
| 21 26 | 
             
                  # Create a new Newlines visitor with the given newline offsets.
         | 
| 22 | 
            -
                  def initialize( | 
| 23 | 
            -
                    @ | 
| 27 | 
            +
                  def initialize(lines)
         | 
| 28 | 
            +
                    # @type var lines: Integer
         | 
| 29 | 
            +
                    @lines = Array.new(1 + lines, false)
         | 
| 24 30 | 
             
                  end
         | 
| 25 31 |  | 
| 26 32 | 
             
                  # Permit block/lambda nodes to mark newlines within themselves.
         | 
| 27 33 | 
             
                  def visit_block_node(node)
         | 
| 28 | 
            -
                     | 
| 29 | 
            -
                    @ | 
| 34 | 
            +
                    old_lines = @lines
         | 
| 35 | 
            +
                    @lines = Array.new(old_lines.size, false)
         | 
| 30 36 |  | 
| 31 37 | 
             
                    begin
         | 
| 32 38 | 
             
                      super(node)
         | 
| 33 39 | 
             
                    ensure
         | 
| 34 | 
            -
                      @ | 
| 40 | 
            +
                      @lines = old_lines
         | 
| 35 41 | 
             
                    end
         | 
| 36 42 | 
             
                  end
         | 
| 37 43 |  | 
| @@ -39,7 +45,7 @@ module Prism | |
| 39 45 |  | 
| 40 46 | 
             
                  # Mark if/unless nodes as newlines.
         | 
| 41 47 | 
             
                  def visit_if_node(node)
         | 
| 42 | 
            -
                    node. | 
| 48 | 
            +
                    node.newline_flag!(@lines)
         | 
| 43 49 | 
             
                    super(node)
         | 
| 44 50 | 
             
                  end
         | 
| 45 51 |  | 
| @@ -48,17 +54,101 @@ module Prism | |
| 48 54 | 
             
                  # Permit statements lists to mark newlines within themselves.
         | 
| 49 55 | 
             
                  def visit_statements_node(node)
         | 
| 50 56 | 
             
                    node.body.each do |child|
         | 
| 51 | 
            -
                      child. | 
| 57 | 
            +
                      child.newline_flag!(@lines)
         | 
| 52 58 | 
             
                    end
         | 
| 53 59 | 
             
                    super(node)
         | 
| 54 60 | 
             
                  end
         | 
| 55 61 | 
             
                end
         | 
| 62 | 
            +
              end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
              class Node
         | 
| 65 | 
            +
                def newline_flag? # :nodoc:
         | 
| 66 | 
            +
                  @newline_flag ? true : false
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 70 | 
            +
                  line = location.start_line
         | 
| 71 | 
            +
                  unless lines[line]
         | 
| 72 | 
            +
                    lines[line] = true
         | 
| 73 | 
            +
                    @newline_flag = true
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
              end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              class BeginNode < Node
         | 
| 79 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 80 | 
            +
                  # Never mark BeginNode with a newline flag, mark children instead.
         | 
| 81 | 
            +
                end
         | 
| 82 | 
            +
              end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
              class ParenthesesNode < Node
         | 
| 85 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 86 | 
            +
                  # Never mark ParenthesesNode with a newline flag, mark children instead.
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
              end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
              class IfNode < Node
         | 
| 91 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 92 | 
            +
                  predicate.newline_flag!(lines)
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
              end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
              class UnlessNode < Node
         | 
| 97 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 98 | 
            +
                  predicate.newline_flag!(lines)
         | 
| 99 | 
            +
                end
         | 
| 100 | 
            +
              end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
              class UntilNode < Node
         | 
| 103 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 104 | 
            +
                  predicate.newline_flag!(lines)
         | 
| 105 | 
            +
                end
         | 
| 106 | 
            +
              end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
              class WhileNode < Node
         | 
| 109 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 110 | 
            +
                  predicate.newline_flag!(lines)
         | 
| 111 | 
            +
                end
         | 
| 112 | 
            +
              end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
              class RescueModifierNode < Node
         | 
| 115 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 116 | 
            +
                  expression.newline_flag!(lines)
         | 
| 117 | 
            +
                end
         | 
| 118 | 
            +
              end
         | 
| 119 | 
            +
             | 
| 120 | 
            +
              class InterpolatedMatchLastLineNode < Node
         | 
| 121 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 122 | 
            +
                  first = parts.first
         | 
| 123 | 
            +
                  first.newline_flag!(lines) if first
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
              end
         | 
| 126 | 
            +
             | 
| 127 | 
            +
              class InterpolatedRegularExpressionNode < Node
         | 
| 128 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 129 | 
            +
                  first = parts.first
         | 
| 130 | 
            +
                  first.newline_flag!(lines) if first
         | 
| 131 | 
            +
                end
         | 
| 132 | 
            +
              end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
              class InterpolatedStringNode < Node
         | 
| 135 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 136 | 
            +
                  first = parts.first
         | 
| 137 | 
            +
                  first.newline_flag!(lines) if first
         | 
| 138 | 
            +
                end
         | 
| 139 | 
            +
              end
         | 
| 56 140 |  | 
| 57 | 
            -
             | 
| 141 | 
            +
              class InterpolatedSymbolNode < Node
         | 
| 142 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 143 | 
            +
                  first = parts.first
         | 
| 144 | 
            +
                  first.newline_flag!(lines) if first
         | 
| 145 | 
            +
                end
         | 
| 146 | 
            +
              end
         | 
| 58 147 |  | 
| 59 | 
            -
             | 
| 60 | 
            -
                def  | 
| 61 | 
            -
                   | 
| 148 | 
            +
              class InterpolatedXStringNode < Node
         | 
| 149 | 
            +
                def newline_flag!(lines) # :nodoc:
         | 
| 150 | 
            +
                  first = parts.first
         | 
| 151 | 
            +
                  first.newline_flag!(lines) if first
         | 
| 62 152 | 
             
                end
         | 
| 63 153 | 
             
              end
         | 
| 64 154 | 
             
            end
         | 
    
        data/lib/prism/parse_result.rb
    CHANGED
    
    | @@ -10,7 +10,26 @@ module Prism | |
| 10 10 | 
             
                # specialized and more performant `ASCIISource` if no multibyte characters
         | 
| 11 11 | 
             
                # are present in the source code.
         | 
| 12 12 | 
             
                def self.for(source, start_line = 1, offsets = [])
         | 
| 13 | 
            -
                  source.ascii_only? | 
| 13 | 
            +
                  if source.ascii_only?
         | 
| 14 | 
            +
                    ASCIISource.new(source, start_line, offsets)
         | 
| 15 | 
            +
                  elsif source.encoding == Encoding::BINARY
         | 
| 16 | 
            +
                    source.force_encoding(Encoding::UTF_8)
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                    if source.valid_encoding?
         | 
| 19 | 
            +
                      new(source, start_line, offsets)
         | 
| 20 | 
            +
                    else
         | 
| 21 | 
            +
                      # This is an extremely niche use case where the file is marked as
         | 
| 22 | 
            +
                      # binary, contains multi-byte characters, and those characters are not
         | 
| 23 | 
            +
                      # valid UTF-8. In this case we'll mark it as binary and fall back to
         | 
| 24 | 
            +
                      # treating everything as a single-byte character. This _may_ cause
         | 
| 25 | 
            +
                      # problems when asking for code units, but it appears to be the
         | 
| 26 | 
            +
                      # cleanest solution at the moment.
         | 
| 27 | 
            +
                      source.force_encoding(Encoding::BINARY)
         | 
| 28 | 
            +
                      ASCIISource.new(source, start_line, offsets)
         | 
| 29 | 
            +
                    end
         | 
| 30 | 
            +
                  else
         | 
| 31 | 
            +
                    new(source, start_line, offsets)
         | 
| 32 | 
            +
                  end
         | 
| 14 33 | 
             
                end
         | 
| 15 34 |  | 
| 16 35 | 
             
                # The source code that this source object represents.
         | 
| @@ -85,9 +104,26 @@ module Prism | |
| 85 104 | 
             
                # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
         | 
| 86 105 | 
             
                # concept of code units that differs from the number of characters in other
         | 
| 87 106 | 
             
                # encodings, it is not captured here.
         | 
| 107 | 
            +
                #
         | 
| 108 | 
            +
                # We purposefully replace invalid and undefined characters with replacement
         | 
| 109 | 
            +
                # characters in this conversion. This happens for two reasons. First, it's
         | 
| 110 | 
            +
                # possible that the given byte offset will not occur on a character
         | 
| 111 | 
            +
                # boundary. Second, it's possible that the source code will contain a
         | 
| 112 | 
            +
                # character that has no equivalent in the given encoding.
         | 
| 88 113 | 
             
                def code_units_offset(byte_offset, encoding)
         | 
| 89 | 
            -
                  byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
         | 
| 90 | 
            -
             | 
| 114 | 
            +
                  byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                  if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
         | 
| 117 | 
            +
                    byteslice.bytesize / 2
         | 
| 118 | 
            +
                  else
         | 
| 119 | 
            +
                    byteslice.length
         | 
| 120 | 
            +
                  end
         | 
| 121 | 
            +
                end
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                # Generate a cache that targets a specific encoding for calculating code
         | 
| 124 | 
            +
                # unit offsets.
         | 
| 125 | 
            +
                def code_units_cache(encoding)
         | 
| 126 | 
            +
                  CodeUnitsCache.new(source, encoding)
         | 
| 91 127 | 
             
                end
         | 
| 92 128 |  | 
| 93 129 | 
             
                # Returns the column number in code units for the given encoding for the
         | 
| @@ -119,10 +155,84 @@ module Prism | |
| 119 155 | 
             
                end
         | 
| 120 156 | 
             
              end
         | 
| 121 157 |  | 
| 158 | 
            +
              # A cache that can be used to quickly compute code unit offsets from byte
         | 
| 159 | 
            +
              # offsets. It purposefully provides only a single #[] method to access the
         | 
| 160 | 
            +
              # cache in order to minimize surface area.
         | 
| 161 | 
            +
              #
         | 
| 162 | 
            +
              # Note that there are some known issues here that may or may not be addressed
         | 
| 163 | 
            +
              # in the future:
         | 
| 164 | 
            +
              #
         | 
| 165 | 
            +
              # * The first is that there are issues when the cache computes values that are
         | 
| 166 | 
            +
              #   not on character boundaries. This can result in subsequent computations
         | 
| 167 | 
            +
              #   being off by one or more code units.
         | 
| 168 | 
            +
              # * The second is that this cache is currently unbounded. In theory we could
         | 
| 169 | 
            +
              #   introduce some kind of LRU cache to limit the number of entries, but this
         | 
| 170 | 
            +
              #   has not yet been implemented.
         | 
| 171 | 
            +
              #
         | 
| 172 | 
            +
              class CodeUnitsCache
         | 
| 173 | 
            +
                class UTF16Counter # :nodoc:
         | 
| 174 | 
            +
                  def initialize(source, encoding)
         | 
| 175 | 
            +
                    @source = source
         | 
| 176 | 
            +
                    @encoding = encoding
         | 
| 177 | 
            +
                  end
         | 
| 178 | 
            +
             | 
| 179 | 
            +
                  def count(byte_offset, byte_length)
         | 
| 180 | 
            +
                    @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
         | 
| 181 | 
            +
                  end
         | 
| 182 | 
            +
                end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                class LengthCounter # :nodoc:
         | 
| 185 | 
            +
                  def initialize(source, encoding)
         | 
| 186 | 
            +
                    @source = source
         | 
| 187 | 
            +
                    @encoding = encoding
         | 
| 188 | 
            +
                  end
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                  def count(byte_offset, byte_length)
         | 
| 191 | 
            +
                    @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
         | 
| 192 | 
            +
                  end
         | 
| 193 | 
            +
                end
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                private_constant :UTF16Counter, :LengthCounter
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                # Initialize a new cache with the given source and encoding.
         | 
| 198 | 
            +
                def initialize(source, encoding)
         | 
| 199 | 
            +
                  @source = source
         | 
| 200 | 
            +
                  @counter =
         | 
| 201 | 
            +
                    if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
         | 
| 202 | 
            +
                      UTF16Counter.new(source, encoding)
         | 
| 203 | 
            +
                    else
         | 
| 204 | 
            +
                      LengthCounter.new(source, encoding)
         | 
| 205 | 
            +
                    end
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                  @cache = {} #: Hash[Integer, Integer]
         | 
| 208 | 
            +
                  @offsets = [] #: Array[Integer]
         | 
| 209 | 
            +
                end
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                # Retrieve the code units offset from the given byte offset.
         | 
| 212 | 
            +
                def [](byte_offset)
         | 
| 213 | 
            +
                  @cache[byte_offset] ||=
         | 
| 214 | 
            +
                    if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
         | 
| 215 | 
            +
                      @offsets << byte_offset
         | 
| 216 | 
            +
                      @counter.count(0, byte_offset)
         | 
| 217 | 
            +
                    elsif index == 0
         | 
| 218 | 
            +
                      @offsets.unshift(byte_offset)
         | 
| 219 | 
            +
                      @counter.count(0, byte_offset)
         | 
| 220 | 
            +
                    else
         | 
| 221 | 
            +
                      @offsets.insert(index, byte_offset)
         | 
| 222 | 
            +
                      offset = @offsets[index - 1]
         | 
| 223 | 
            +
                      @cache[offset] + @counter.count(offset, byte_offset - offset)
         | 
| 224 | 
            +
                    end
         | 
| 225 | 
            +
                end
         | 
| 226 | 
            +
              end
         | 
| 227 | 
            +
             | 
| 122 228 | 
             
              # Specialized version of Prism::Source for source code that includes ASCII
         | 
| 123 229 | 
             
              # characters only. This class is used to apply performance optimizations that
         | 
| 124 | 
            -
              # cannot be applied to sources that include multibyte characters. | 
| 125 | 
            -
              # | 
| 230 | 
            +
              # cannot be applied to sources that include multibyte characters.
         | 
| 231 | 
            +
              #
         | 
| 232 | 
            +
              # In the extremely rare case that a source includes multi-byte characters but
         | 
| 233 | 
            +
              # is marked as binary because of a magic encoding comment and it cannot be
         | 
| 234 | 
            +
              # eagerly converted to UTF-8, this class will be used as well. This is because
         | 
| 235 | 
            +
              # at that point we will treat everything as single-byte characters.
         | 
| 126 236 | 
             
              class ASCIISource < Source
         | 
| 127 237 | 
             
                # Return the character offset for the given byte offset.
         | 
| 128 238 | 
             
                def character_offset(byte_offset)
         | 
| @@ -144,9 +254,16 @@ module Prism | |
| 144 254 | 
             
                  byte_offset
         | 
| 145 255 | 
             
                end
         | 
| 146 256 |  | 
| 257 | 
            +
                # Returns a cache that is the identity function in order to maintain the
         | 
| 258 | 
            +
                # same interface. We can do this because code units are always equivalent to
         | 
| 259 | 
            +
                # byte offsets for ASCII-only sources.
         | 
| 260 | 
            +
                def code_units_cache(encoding)
         | 
| 261 | 
            +
                  ->(byte_offset) { byte_offset }
         | 
| 262 | 
            +
                end
         | 
| 263 | 
            +
             | 
| 147 264 | 
             
                # Specialized version of `code_units_column` that does not depend on
         | 
| 148 265 | 
             
                # `code_units_offset`, which is a more expensive operation. This is
         | 
| 149 | 
            -
                #  | 
| 266 | 
            +
                # essentially the same as `Prism::Source#column`.
         | 
| 150 267 | 
             
                def code_units_column(byte_offset, encoding)
         | 
| 151 268 | 
             
                  byte_offset - line_start(byte_offset)
         | 
| 152 269 | 
             
                end
         | 
| @@ -253,6 +370,12 @@ module Prism | |
| 253 370 | 
             
                  source.code_units_offset(start_offset, encoding)
         | 
| 254 371 | 
             
                end
         | 
| 255 372 |  | 
| 373 | 
            +
                # The start offset from the start of the file in code units using the given
         | 
| 374 | 
            +
                # cache to fetch or calculate the value.
         | 
| 375 | 
            +
                def cached_start_code_units_offset(cache)
         | 
| 376 | 
            +
                  cache[start_offset]
         | 
| 377 | 
            +
                end
         | 
| 378 | 
            +
             | 
| 256 379 | 
             
                # The byte offset from the beginning of the source where this location ends.
         | 
| 257 380 | 
             
                def end_offset
         | 
| 258 381 | 
             
                  start_offset + length
         | 
| @@ -269,6 +392,12 @@ module Prism | |
| 269 392 | 
             
                  source.code_units_offset(end_offset, encoding)
         | 
| 270 393 | 
             
                end
         | 
| 271 394 |  | 
| 395 | 
            +
                # The end offset from the start of the file in code units using the given
         | 
| 396 | 
            +
                # cache to fetch or calculate the value.
         | 
| 397 | 
            +
                def cached_end_code_units_offset(cache)
         | 
| 398 | 
            +
                  cache[end_offset]
         | 
| 399 | 
            +
                end
         | 
| 400 | 
            +
             | 
| 272 401 | 
             
                # The line number where this location starts.
         | 
| 273 402 | 
             
                def start_line
         | 
| 274 403 | 
             
                  source.line(start_offset)
         | 
| @@ -303,6 +432,12 @@ module Prism | |
| 303 432 | 
             
                  source.code_units_column(start_offset, encoding)
         | 
| 304 433 | 
             
                end
         | 
| 305 434 |  | 
| 435 | 
            +
                # The start column in code units using the given cache to fetch or calculate
         | 
| 436 | 
            +
                # the value.
         | 
| 437 | 
            +
                def cached_start_code_units_column(cache)
         | 
| 438 | 
            +
                  cache[start_offset] - cache[source.line_start(start_offset)]
         | 
| 439 | 
            +
                end
         | 
| 440 | 
            +
             | 
| 306 441 | 
             
                # The column number in bytes where this location ends from the start of the
         | 
| 307 442 | 
             
                # line.
         | 
| 308 443 | 
             
                def end_column
         | 
| @@ -321,6 +456,12 @@ module Prism | |
| 321 456 | 
             
                  source.code_units_column(end_offset, encoding)
         | 
| 322 457 | 
             
                end
         | 
| 323 458 |  | 
| 459 | 
            +
                # The end column in code units using the given cache to fetch or calculate
         | 
| 460 | 
            +
                # the value.
         | 
| 461 | 
            +
                def cached_end_code_units_column(cache)
         | 
| 462 | 
            +
                  cache[end_offset] - cache[source.line_start(end_offset)]
         | 
| 463 | 
            +
                end
         | 
| 464 | 
            +
             | 
| 324 465 | 
             
                # Implement the hash pattern matching interface for Location.
         | 
| 325 466 | 
             
                def deconstruct_keys(keys)
         | 
| 326 467 | 
             
                  { start_offset: start_offset, end_offset: end_offset }
         | 
| @@ -570,10 +711,23 @@ module Prism | |
| 570 711 | 
             
                def failure?
         | 
| 571 712 | 
             
                  !success?
         | 
| 572 713 | 
             
                end
         | 
| 714 | 
            +
             | 
| 715 | 
            +
                # Create a code units cache for the given encoding.
         | 
| 716 | 
            +
                def code_units_cache(encoding)
         | 
| 717 | 
            +
                  source.code_units_cache(encoding)
         | 
| 718 | 
            +
                end
         | 
| 573 719 | 
             
              end
         | 
| 574 720 |  | 
| 575 721 | 
             
              # This is a result specific to the `parse` and `parse_file` methods.
         | 
| 576 722 | 
             
              class ParseResult < Result
         | 
| 723 | 
            +
                autoload :Comments, "prism/parse_result/comments"
         | 
| 724 | 
            +
                autoload :Errors, "prism/parse_result/errors"
         | 
| 725 | 
            +
                autoload :Newlines, "prism/parse_result/newlines"
         | 
| 726 | 
            +
             | 
| 727 | 
            +
                private_constant :Comments
         | 
| 728 | 
            +
                private_constant :Errors
         | 
| 729 | 
            +
                private_constant :Newlines
         | 
| 730 | 
            +
             | 
| 577 731 | 
             
                # The syntax tree that was parsed from the source code.
         | 
| 578 732 | 
             
                attr_reader :value
         | 
| 579 733 |  | 
| @@ -587,6 +741,23 @@ module Prism | |
| 587 741 | 
             
                def deconstruct_keys(keys)
         | 
| 588 742 | 
             
                  super.merge!(value: value)
         | 
| 589 743 | 
             
                end
         | 
| 744 | 
            +
             | 
| 745 | 
            +
                # Attach the list of comments to their respective locations in the tree.
         | 
| 746 | 
            +
                def attach_comments!
         | 
| 747 | 
            +
                  Comments.new(self).attach! # steep:ignore
         | 
| 748 | 
            +
                end
         | 
| 749 | 
            +
             | 
| 750 | 
            +
                # Walk the tree and mark nodes that are on a new line, loosely emulating
         | 
| 751 | 
            +
                # the behavior of CRuby's `:line` tracepoint event.
         | 
| 752 | 
            +
                def mark_newlines!
         | 
| 753 | 
            +
                  value.accept(Newlines.new(source.offsets.size)) # steep:ignore
         | 
| 754 | 
            +
                end
         | 
| 755 | 
            +
             | 
| 756 | 
            +
                # Returns a string representation of the syntax tree with the errors
         | 
| 757 | 
            +
                # displayed inline.
         | 
| 758 | 
            +
                def errors_format
         | 
| 759 | 
            +
                  Errors.new(self).format
         | 
| 760 | 
            +
                end
         | 
| 590 761 | 
             
              end
         | 
| 591 762 |  | 
| 592 763 | 
             
              # This is a result specific to the `lex` and `lex_file` methods.
         | 
| @@ -677,5 +848,11 @@ module Prism | |
| 677 848 | 
             
                    other.type == type &&
         | 
| 678 849 | 
             
                    other.value == value
         | 
| 679 850 | 
             
                end
         | 
| 851 | 
            +
             | 
| 852 | 
            +
                # Returns a string representation of this token.
         | 
| 853 | 
            +
                def inspect
         | 
| 854 | 
            +
                  location
         | 
| 855 | 
            +
                  super
         | 
| 856 | 
            +
                end
         | 
| 680 857 | 
             
              end
         | 
| 681 858 | 
             
            end
         |