RubyGems - prism - Versions diffs - 1.3.0 → 1.4.0 - Mend

prism 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +24 -1
data/config.yml +9 -0
data/docs/releasing.md +1 -1
data/docs/ruby_api.md +1 -1
data/ext/prism/api_node.c +1814 -1303
data/ext/prism/extension.c +230 -109
data/ext/prism/extension.h +4 -4
data/include/prism/ast.h +16 -0
data/include/prism/defines.h +4 -1
data/include/prism/options.h +47 -1
data/include/prism/util/pm_buffer.h +10 -0
data/include/prism/version.h +2 -2
data/include/prism.h +4 -4
data/lib/prism/dot_visitor.rb +16 -0
data/lib/prism/dsl.rb +10 -2
data/lib/prism/ffi.rb +45 -27
data/lib/prism/inspect_visitor.rb +2 -1
data/lib/prism/node.rb +48 -10
data/lib/prism/parse_result/newlines.rb +1 -1
data/lib/prism/parse_result.rb +52 -0
data/lib/prism/polyfill/append_as_bytes.rb +15 -0
data/lib/prism/reflection.rb +2 -2
data/lib/prism/serialize.rb +1252 -765
data/lib/prism/translation/parser/builder.rb +61 -0
data/lib/prism/translation/parser/compiler.rb +192 -136
data/lib/prism/translation/parser/lexer.rb +435 -61
data/lib/prism/translation/parser.rb +51 -3
data/lib/prism/translation/parser35.rb +12 -0
data/lib/prism/translation/ripper.rb +13 -3
data/lib/prism/translation/ruby_parser.rb +5 -4
data/lib/prism/translation.rb +1 -0
data/lib/prism.rb +3 -3
data/prism.gemspec +5 -1
data/rbi/prism/dsl.rbi +6 -3
data/rbi/prism/node.rbi +22 -7
data/rbi/prism/parse_result.rbi +17 -0
data/rbi/prism/translation/parser35.rbi +6 -0
data/rbi/prism.rbi +39 -36
data/sig/prism/dsl.rbs +4 -2
data/sig/prism/node.rbs +17 -7
data/sig/prism/parse_result.rbs +10 -0
data/sig/prism/serialize.rbs +4 -2
data/sig/prism.rbs +22 -1
data/src/diagnostic.c +2 -2
data/src/node.c +21 -0
data/src/options.c +31 -0
data/src/prettyprint.c +30 -0
data/src/prism.c +374 -118
data/src/serialize.c +6 -0
data/src/util/pm_buffer.c +40 -0
data/src/util/pm_constant_pool.c +6 -2
data/src/util/pm_strncasecmp.c +13 -1
metadata +7 -7

data/lib/prism/translation/parser/builder.rb ADDED Viewed

@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+module Prism
+  module Translation
+    class Parser
+      # A builder that knows how to convert more modern Ruby syntax
+      # into whitequark/parser gem's syntax tree.
+      class Builder < ::Parser::Builders::Default
+        # It represents the `it` block argument, which is not yet implemented in the Parser gem.
+        def itarg
+          n(:itarg, [:it], nil)
+        end
+        # The following three lines have been added to support the `it` block parameter syntax in the source code below.
+        #
+        #   if args.type == :itarg
+        #     block_type = :itblock
+        #     args = :it
+        #
+        # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
+        def block(method_call, begin_t, args, body, end_t)
+          _receiver, _selector, *call_args = *method_call
+          if method_call.type == :yield
+            diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
+          end
+          last_arg = call_args.last
+          if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
+            diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
+          end
+          if args.type == :itarg
+            block_type = :itblock
+            args = :it
+          elsif args.type == :numargs
+            block_type = :numblock
+            args = args.children[0]
+          else
+            block_type = :block
+          end
+          if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
+            n(block_type, [ method_call, args, body ],
+              block_map(method_call.loc.expression, begin_t, end_t))
+          else
+            # Code like "return foo 1 do end" is reduced in a weird sequence.
+            # Here, method_call is actually (return).
+            actual_send, = *method_call
+            block =
+              n(block_type, [ actual_send, args, body ],
+                block_map(actual_send.loc.expression, begin_t, end_t))
+            n(method_call.type, [ block ],
+              method_call.loc.with_expression(join_exprs(method_call, block)))
+          end
+        end
+      end
+    end
+  end
+end

data/lib/prism/translation/parser/compiler.rb CHANGED Viewed

@@ -74,7 +74,29 @@ module Prism
         # []
         # ^^
         def visit_array_node(node)
-          builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
+          if node.opening&.start_with?("%w", "%W", "%i", "%I")
+            elements = node.elements.flat_map do |element|
+              if element.is_a?(StringNode)
+                if element.content.include?("\n")
+                  string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
+                else
+                  [builder.string_internal([element.unescaped, srange(element.content_loc)])]
+                end
+              elsif element.is_a?(InterpolatedStringNode)
+                builder.string_compose(
+                  token(element.opening_loc),
+                  string_nodes_from_interpolation(element, node.opening),
+                  token(element.closing_loc)
+                )
+              else
+                [visit(element)]
+              end
+            end
+          else
+            elements = visit_all(node.elements)
+          end
+          builder.array(token(node.opening_loc), elements, token(node.closing_loc))
         end
         # foo => [bar]
@@ -128,14 +150,17 @@ module Prism
               builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
             end
           elsif node.value.is_a?(ImplicitNode)
-            if (value = node.value.value).is_a?(LocalVariableReadNode)
-              builder.pair_keyword(
-                [key.unescaped, srange(key)],
-                builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
-              )
+            value = node.value.value
+            implicit_value = if value.is_a?(CallNode)
+              builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
+            elsif value.is_a?(ConstantReadNode)
+              builder.const([value.name, srange(key.value_loc)])
             else
-              builder.pair_label([key.unescaped, srange(key.location)])
+              builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
             end
+            builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
           elsif node.operator_loc
             builder.pair(visit(key), token(node.operator_loc), visit(node.value))
           elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
@@ -181,7 +206,14 @@ module Prism
           if (rescue_clause = node.rescue_clause)
             begin
               find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
-              find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
+              find_end_offset = (
+                rescue_clause.statements&.location&.start_offset ||
+                rescue_clause.subsequent&.location&.start_offset ||
+                node.else_clause&.location&.start_offset ||
+                node.ensure_clause&.location&.start_offset ||
+                node.end_keyword_loc&.start_offset ||
+                find_start_offset + 1
+              )
               rescue_bodies << builder.rescue_body(
                 token(rescue_clause.keyword_loc),
@@ -1068,7 +1100,7 @@ module Prism
         def visit_interpolated_regular_expression_node(node)
           builder.regexp_compose(
             token(node.opening_loc),
-            visit_all(node.parts),
+            string_nodes_from_interpolation(node, node.opening),
             [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
             builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
           )
@@ -1085,29 +1117,9 @@ module Prism
             return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
           end
-          parts = if node.parts.one? { |part| part.type == :string_node }
-            node.parts.flat_map do |node|
-              if node.type == :string_node && node.unescaped.lines.count >= 2
-                start_offset = node.content_loc.start_offset
-                node.unescaped.lines.map do |line|
-                  end_offset = start_offset + line.length
-                  offsets = srange_offsets(start_offset, end_offset)
-                  start_offset = end_offset
-                  builder.string_internal([line, offsets])
-                end
-              else
-                visit(node)
-              end
-            end
-          else
-            visit_all(node.parts)
-          end
           builder.string_compose(
             token(node.opening_loc),
-            parts,
+            string_nodes_from_interpolation(node, node.opening),
             token(node.closing_loc)
           )
         end
@@ -1117,7 +1129,7 @@ module Prism
         def visit_interpolated_symbol_node(node)
           builder.symbol_compose(
             token(node.opening_loc),
-            visit_all(node.parts),
+            string_nodes_from_interpolation(node, node.opening),
             token(node.closing_loc)
           )
         end
@@ -1126,14 +1138,14 @@ module Prism
         # ^^^^^^^^^^^^
         def visit_interpolated_x_string_node(node)
           if node.heredoc?
-            visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
-          else
-            builder.xstring_compose(
-              token(node.opening_loc),
-              visit_all(node.parts),
-              token(node.closing_loc)
-            )
+            return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
           end
+          builder.xstring_compose(
+            token(node.opening_loc),
+            string_nodes_from_interpolation(node, node.opening),
+            token(node.closing_loc)
+          )
         end
         # -> { it }
@@ -1145,7 +1157,17 @@ module Prism
         # -> { it }
         # ^^^^^^^^^
         def visit_it_parameters_node(node)
-          builder.args(nil, [], nil, false)
+          # FIXME: The builder _should_ always be a subclass of the prism builder.
+          # Currently RuboCop passes in its own builder that always inherits from the
+          # parser builder (which is lacking the `itarg` method). Once rubocop-ast
+          # opts in to use the custom prism builder a warning can be emitted when
+          # it is not the expected class, and eventually raise.
+          # https://github.com/rubocop/rubocop-ast/pull/354
+          if builder.is_a?(Translation::Parser::Builder)
+            builder.itarg
+          else
+            builder.args(nil, [], nil, false)
+          end
         end
         # foo(bar: baz)
@@ -1187,7 +1209,7 @@ module Prism
                 false
               )
             end,
-            node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
+            visit(node.body),
             [node.closing, srange(node.closing_loc)]
           )
         end
@@ -1311,7 +1333,7 @@ module Prism
         def visit_multi_write_node(node)
           elements = multi_target_elements(node)
-          if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
+          if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
             elements = multi_target_elements(elements.first)
           end
@@ -1511,15 +1533,13 @@ module Prism
         # /foo/
         # ^^^^^
         def visit_regular_expression_node(node)
-          content = node.content
           parts =
-            if content.include?("\n")
-              offset = node.content_loc.start_offset
-              content.lines.map do |line|
-                builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
-              end
+            if node.content == ""
+              []
+            elsif node.content.include?("\n")
+              string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
             else
-              [builder.string_internal(token(node.content_loc))]
+              [builder.string_internal([node.unescaped, srange(node.content_loc)])]
             end
           builder.regexp_compose(
@@ -1676,28 +1696,11 @@ module Prism
           elsif node.opening&.start_with?("%") && node.unescaped.empty?
             builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
           else
-            content_lines = node.content.lines
-            unescaped_lines = node.unescaped.lines
             parts =
-              if content_lines.length <= 1 || unescaped_lines.length <= 1
-                [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-              elsif content_lines.length != unescaped_lines.length
-                # This occurs when we have line continuations in the string. We
-                # need to come back and fix this, but for now this stops the
-                # code from breaking when we encounter it because of trying to
-                # transpose arrays of different lengths.
-                [builder.string_internal([node.unescaped, srange(node.content_loc)])]
+              if node.content.include?("\n")
+                string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
               else
-                start_offset = node.content_loc.start_offset
-                [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
-                  end_offset = start_offset + content_line.length
-                  offsets = srange_offsets(start_offset, end_offset)
-                  start_offset = end_offset
-                  builder.string_internal([unescaped_line, offsets])
-                end
+                [builder.string_internal([node.unescaped, srange(node.content_loc)])]
               end
             builder.string_compose(
@@ -1741,19 +1744,14 @@ module Prism
               builder.symbol([node.unescaped, srange(node.location)])
             end
           else
-            parts = if node.value.lines.one?
-              [builder.string_internal([node.unescaped, srange(node.value_loc)])]
-            else
-              start_offset = node.value_loc.start_offset
-              node.value.lines.map do |line|
-                end_offset = start_offset + line.length
-                offsets = srange_offsets(start_offset, end_offset)
-                start_offset = end_offset
-                builder.string_internal([line, offsets])
+            parts =
+              if node.value == ""
+                []
+              elsif node.value.include?("\n")
+                string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
+              else
+                [builder.string_internal([node.unescaped, srange(node.value_loc)])]
               end
-            end
             builder.symbol_compose(
               token(node.opening_loc),
@@ -1882,28 +1880,23 @@ module Prism
         # ^^^^^
         def visit_x_string_node(node)
           if node.heredoc?
-            visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
-          else
-            parts = if node.unescaped.lines.one?
-              [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-            else
-              start_offset = node.content_loc.start_offset
-              node.unescaped.lines.map do |line|
-                end_offset = start_offset + line.length
-                offsets = srange_offsets(start_offset, end_offset)
-                start_offset = end_offset
+            return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
+          end
-                builder.string_internal([line, offsets])
-              end
+          parts =
+            if node.content == ""
+              []
+            elsif node.content.include?("\n")
+              string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
+            else
+              [builder.string_internal([node.unescaped, srange(node.content_loc)])]
             end
-            builder.xstring_compose(
-              token(node.opening_loc),
-              parts,
-              token(node.closing_loc)
-            )
-          end
+          builder.xstring_compose(
+            token(node.opening_loc),
+            parts,
+            token(node.closing_loc)
+          )
         end
         # yield
@@ -2042,7 +2035,7 @@ module Prism
                   false
                 )
               end,
-              block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
+              visit(block.body),
               token(block.closing_loc)
             )
           else
@@ -2050,13 +2043,6 @@ module Prism
           end
         end
-        # The parser gem automatically converts \r\n to \n, meaning our offsets
-        # need to be adjusted to always subtract 1 from the length.
-        def chomped_bytesize(line)
-          chomped = line.chomp
-          chomped.bytesize + (chomped == line ? 0 : 1)
-        end
         # Visit a heredoc that can be either a string or an xstring.
         def visit_heredoc(node)
           children = Array.new
@@ -2073,34 +2059,8 @@ module Prism
           node.parts.each do |part|
             pushing =
-              if part.is_a?(StringNode) && part.unescaped.include?("\n")
-                unescaped = part.unescaped.lines
-                escaped = part.content.lines
-                escaped_lengths = []
-                normalized_lengths = []
-                if node.opening.end_with?("'")
-                  escaped.each do |line|
-                    escaped_lengths << line.bytesize
-                    normalized_lengths << chomped_bytesize(line)
-                  end
-                else
-                  escaped
-                    .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
-                    .each do |lines|
-                      escaped_lengths << lines.sum(&:bytesize)
-                      normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
-                    end
-                end
-                start_offset = part.location.start_offset
-                unescaped.map.with_index do |unescaped_line, index|
-                  inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
-                  start_offset += escaped_lengths.fetch(index, 0)
-                  inner_part
-                end
+              if part.is_a?(StringNode) && part.content.include?("\n")
+                string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
               else
                 [visit(part)]
               end
@@ -2114,7 +2074,7 @@ module Prism
                 location = appendee.loc
                 location = location.with_expression(location.expression.join(child.loc.expression))
-                children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
+                children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
               else
                 children << child
               end
@@ -2150,6 +2110,102 @@ module Prism
             parser.pattern_variables.pop
           end
         end
+        # When the content of a string node is split across multiple lines, the
+        # parser gem creates individual string nodes for each line the content is part of.
+        def string_nodes_from_interpolation(node, opening)
+          node.parts.flat_map do |part|
+            if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
+              string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
+            else
+              visit(part)
+            end
+          end
+        end
+        # Create parser string nodes from a single prism node. The parser gem
+        # "glues" strings together when a line continuation is encountered.
+        def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
+          unescaped = unescaped.lines
+          escaped = escaped.lines
+          percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
+          regex = opening == "/" || opening&.start_with?("%r")
+          # Non-interpolating strings
+          if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
+            current_length = 0
+            current_line = +""
+            escaped.filter_map.with_index do |escaped_line, index|
+              unescaped_line = unescaped.fetch(index, "")
+              current_length += escaped_line.bytesize
+              current_line << unescaped_line
+              # Glue line continuations together. Only %w and %i arrays can contain these.
+              if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
+                next unless index == escaped.count - 1
+              end
+              s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
+              start_offset += escaped_line.bytesize
+              current_line = +""
+              current_length = 0
+              s
+            end
+          else
+            escaped_lengths = []
+            normalized_lengths = []
+            # Keeps track of where an unescaped line should start a new token. An unescaped
+            # \n would otherwise be indistinguishable from the actual newline at the end of
+            # of the line. The parser gem only emits a new string node at "real" newlines,
+            # line continuations don't start a new node as well.
+            do_next_tokens = []
+            escaped
+              .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
+              .each do |lines|
+                escaped_lengths << lines.sum(&:bytesize)
+                unescaped_lines_count =
+                  if regex
+                    0 # Will always be preserved as is
+                  else
+                    lines.sum do |line|
+                      count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+                      count -= 1 if !line.end_with?("\n") && count > 0
+                      count
+                    end
+                  end
+                extra = 1
+                extra = lines.count if percent_array # Account for line continuations in percent arrays
+                normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
+                normalized_lengths[-1] = lines.sum { |line| line.bytesize }
+                do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
+                do_next_tokens[-1] = true
+              end
+            current_line = +""
+            current_normalized_length = 0
+            emitted_count = 0
+            unescaped.filter_map.with_index do |unescaped_line, index|
+              current_line << unescaped_line
+              current_normalized_length += normalized_lengths.fetch(index, 0)
+              if do_next_tokens[index]
+                inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
+                start_offset += escaped_lengths.fetch(emitted_count, 0)
+                current_line = +""
+                current_normalized_length = 0
+                emitted_count += 1
+                inner_part
+              else
+                nil
+              end
+            end
+          end
+        end
       end
     end
   end