prism 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -1
  3. data/Makefile +2 -1
  4. data/README.md +1 -0
  5. data/config.yml +273 -37
  6. data/docs/parser_translation.md +8 -23
  7. data/docs/releasing.md +1 -1
  8. data/docs/ripper_translation.md +1 -1
  9. data/docs/ruby_api.md +1 -1
  10. data/ext/prism/api_node.c +1816 -1303
  11. data/ext/prism/extension.c +244 -110
  12. data/ext/prism/extension.h +4 -4
  13. data/include/prism/ast.h +291 -49
  14. data/include/prism/defines.h +4 -1
  15. data/include/prism/diagnostic.h +4 -0
  16. data/include/prism/options.h +89 -3
  17. data/include/prism/regexp.h +2 -2
  18. data/include/prism/util/pm_buffer.h +18 -0
  19. data/include/prism/util/pm_integer.h +4 -0
  20. data/include/prism/util/pm_list.h +6 -0
  21. data/include/prism/util/pm_string.h +12 -2
  22. data/include/prism/version.h +2 -2
  23. data/include/prism.h +41 -16
  24. data/lib/prism/compiler.rb +456 -151
  25. data/lib/prism/desugar_compiler.rb +1 -0
  26. data/lib/prism/dispatcher.rb +16 -0
  27. data/lib/prism/dot_visitor.rb +21 -1
  28. data/lib/prism/dsl.rb +13 -2
  29. data/lib/prism/ffi.rb +62 -34
  30. data/lib/prism/inspect_visitor.rb +5 -1
  31. data/lib/prism/lex_compat.rb +1 -0
  32. data/lib/prism/mutation_compiler.rb +3 -0
  33. data/lib/prism/node.rb +554 -345
  34. data/lib/prism/node_ext.rb +4 -1
  35. data/lib/prism/pack.rb +2 -0
  36. data/lib/prism/parse_result/comments.rb +1 -0
  37. data/lib/prism/parse_result/errors.rb +1 -0
  38. data/lib/prism/parse_result/newlines.rb +2 -1
  39. data/lib/prism/parse_result.rb +53 -0
  40. data/lib/prism/pattern.rb +1 -0
  41. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  42. data/lib/prism/polyfill/scan_byte.rb +14 -0
  43. data/lib/prism/polyfill/warn.rb +42 -0
  44. data/lib/prism/reflection.rb +5 -2
  45. data/lib/prism/relocation.rb +1 -0
  46. data/lib/prism/serialize.rb +1275 -783
  47. data/lib/prism/string_query.rb +1 -0
  48. data/lib/prism/translation/parser/builder.rb +62 -0
  49. data/lib/prism/translation/parser/compiler.rb +230 -152
  50. data/lib/prism/translation/parser/lexer.rb +446 -64
  51. data/lib/prism/translation/parser.rb +64 -4
  52. data/lib/prism/translation/parser33.rb +1 -0
  53. data/lib/prism/translation/parser34.rb +1 -0
  54. data/lib/prism/translation/parser35.rb +13 -0
  55. data/lib/prism/translation/parser_current.rb +24 -0
  56. data/lib/prism/translation/ripper/sexp.rb +1 -0
  57. data/lib/prism/translation/ripper.rb +30 -4
  58. data/lib/prism/translation/ruby_parser.rb +291 -7
  59. data/lib/prism/translation.rb +3 -0
  60. data/lib/prism/visitor.rb +457 -152
  61. data/lib/prism.rb +5 -3
  62. data/prism.gemspec +9 -1
  63. data/rbi/prism/dsl.rbi +9 -6
  64. data/rbi/prism/node.rbi +43 -16
  65. data/rbi/prism/parse_result.rbi +17 -0
  66. data/rbi/prism/translation/parser35.rbi +6 -0
  67. data/rbi/prism.rbi +39 -36
  68. data/sig/prism/dispatcher.rbs +3 -0
  69. data/sig/prism/dsl.rbs +7 -5
  70. data/sig/prism/node.rbs +461 -37
  71. data/sig/prism/node_ext.rbs +84 -17
  72. data/sig/prism/parse_result/comments.rbs +38 -0
  73. data/sig/prism/parse_result.rbs +14 -0
  74. data/sig/prism/reflection.rbs +1 -1
  75. data/sig/prism/serialize.rbs +4 -2
  76. data/sig/prism.rbs +22 -1
  77. data/src/diagnostic.c +9 -3
  78. data/src/node.c +23 -0
  79. data/src/options.c +33 -2
  80. data/src/prettyprint.c +32 -0
  81. data/src/prism.c +620 -242
  82. data/src/serialize.c +8 -0
  83. data/src/token_type.c +36 -34
  84. data/src/util/pm_buffer.c +40 -0
  85. data/src/util/pm_constant_pool.c +6 -2
  86. data/src/util/pm_strncasecmp.c +13 -1
  87. metadata +11 -7
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  module Prism
4
5
  # Query methods that allow categorizing strings based on their context for
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+ # :markup: markdown
3
+
4
+ module Prism
5
+ module Translation
6
+ class Parser
7
+ # A builder that knows how to convert more modern Ruby syntax
8
+ # into whitequark/parser gem's syntax tree.
9
+ class Builder < ::Parser::Builders::Default
10
+ # It represents the `it` block argument, which is not yet implemented in the Parser gem.
11
+ def itarg
12
+ n(:itarg, [:it], nil)
13
+ end
14
+
15
+ # The following three lines have been added to support the `it` block parameter syntax in the source code below.
16
+ #
17
+ # if args.type == :itarg
18
+ # block_type = :itblock
19
+ # args = :it
20
+ #
21
+ # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
22
+ def block(method_call, begin_t, args, body, end_t)
23
+ _receiver, _selector, *call_args = *method_call
24
+
25
+ if method_call.type == :yield
26
+ diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
27
+ end
28
+
29
+ last_arg = call_args.last
30
+ if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
31
+ diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
32
+ end
33
+
34
+ if args.type == :itarg
35
+ block_type = :itblock
36
+ args = :it
37
+ elsif args.type == :numargs
38
+ block_type = :numblock
39
+ args = args.children[0]
40
+ else
41
+ block_type = :block
42
+ end
43
+
44
+ if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
45
+ n(block_type, [ method_call, args, body ],
46
+ block_map(method_call.loc.expression, begin_t, end_t))
47
+ else
48
+ # Code like "return foo 1 do end" is reduced in a weird sequence.
49
+ # Here, method_call is actually (return).
50
+ actual_send, = *method_call
51
+ block =
52
+ n(block_type, [ actual_send, args, body ],
53
+ block_map(actual_send.loc.expression, begin_t, end_t))
54
+
55
+ n(method_call.type, [ block ],
56
+ method_call.loc.with_expression(join_exprs(method_call, block)))
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+ # :markup: markdown
2
3
 
3
4
  module Prism
4
5
  module Translation
@@ -74,7 +75,29 @@ module Prism
74
75
  # []
75
76
  # ^^
76
77
  def visit_array_node(node)
77
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
78
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
79
+ elements = node.elements.flat_map do |element|
80
+ if element.is_a?(StringNode)
81
+ if element.content.include?("\n")
82
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
83
+ else
84
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
85
+ end
86
+ elsif element.is_a?(InterpolatedStringNode)
87
+ builder.string_compose(
88
+ token(element.opening_loc),
89
+ string_nodes_from_interpolation(element, node.opening),
90
+ token(element.closing_loc)
91
+ )
92
+ else
93
+ [visit(element)]
94
+ end
95
+ end
96
+ else
97
+ elements = visit_all(node.elements)
98
+ end
99
+
100
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
78
101
  end
79
102
 
80
103
  # foo => [bar]
@@ -111,8 +134,8 @@ module Prism
111
134
  def visit_assoc_node(node)
112
135
  key = node.key
113
136
 
114
- if in_pattern
115
- if node.value.is_a?(ImplicitNode)
137
+ if node.value.is_a?(ImplicitNode)
138
+ if in_pattern
116
139
  if key.is_a?(SymbolNode)
117
140
  if key.opening.nil?
118
141
  builder.match_hash_var([key.unescaped, srange(key.location)])
@@ -122,19 +145,18 @@ module Prism
122
145
  else
123
146
  builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
124
147
  end
125
- elsif key.opening.nil?
126
- builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
127
148
  else
128
- builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
129
- end
130
- elsif node.value.is_a?(ImplicitNode)
131
- if (value = node.value.value).is_a?(LocalVariableReadNode)
132
- builder.pair_keyword(
133
- [key.unescaped, srange(key)],
149
+ value = node.value.value
150
+
151
+ implicit_value = if value.is_a?(CallNode)
152
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
153
+ elsif value.is_a?(ConstantReadNode)
154
+ builder.const([value.name, srange(key.value_loc)])
155
+ else
134
156
  builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
135
- )
136
- else
137
- builder.pair_label([key.unescaped, srange(key.location)])
157
+ end
158
+
159
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
138
160
  end
139
161
  elsif node.operator_loc
140
162
  builder.pair(visit(key), token(node.operator_loc), visit(node.value))
@@ -181,7 +203,14 @@ module Prism
181
203
  if (rescue_clause = node.rescue_clause)
182
204
  begin
183
205
  find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
184
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
206
+ find_end_offset = (
207
+ rescue_clause.statements&.location&.start_offset ||
208
+ rescue_clause.subsequent&.location&.start_offset ||
209
+ node.else_clause&.location&.start_offset ||
210
+ node.ensure_clause&.location&.start_offset ||
211
+ node.end_keyword_loc&.start_offset ||
212
+ find_start_offset + 1
213
+ )
185
214
 
186
215
  rescue_bodies << builder.rescue_body(
187
216
  token(rescue_clause.keyword_loc),
@@ -664,13 +693,37 @@ module Prism
664
693
  # defined?(a)
665
694
  # ^^^^^^^^^^^
666
695
  def visit_defined_node(node)
667
- builder.keyword_cmd(
668
- :defined?,
669
- token(node.keyword_loc),
670
- token(node.lparen_loc),
671
- [visit(node.value)],
672
- token(node.rparen_loc)
673
- )
696
+ # Very weird circumstances here where something like:
697
+ #
698
+ # defined?
699
+ # (1)
700
+ #
701
+ # gets parsed in Ruby as having only the `1` expression but in parser
702
+ # it gets parsed as having a begin. In this case we need to synthesize
703
+ # that begin to match parser's behavior.
704
+ if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
705
+ builder.keyword_cmd(
706
+ :defined?,
707
+ token(node.keyword_loc),
708
+ nil,
709
+ [
710
+ builder.begin(
711
+ token(node.lparen_loc),
712
+ visit(node.value),
713
+ token(node.rparen_loc)
714
+ )
715
+ ],
716
+ nil
717
+ )
718
+ else
719
+ builder.keyword_cmd(
720
+ :defined?,
721
+ token(node.keyword_loc),
722
+ token(node.lparen_loc),
723
+ [visit(node.value)],
724
+ token(node.rparen_loc)
725
+ )
726
+ end
674
727
  end
675
728
 
676
729
  # if foo then bar else baz end
@@ -1000,7 +1053,7 @@ module Prism
1000
1053
  builder.index_asgn(
1001
1054
  visit(node.receiver),
1002
1055
  token(node.opening_loc),
1003
- visit_all(node.arguments.arguments),
1056
+ visit_all(node.arguments&.arguments || []),
1004
1057
  token(node.closing_loc),
1005
1058
  )
1006
1059
  end
@@ -1068,7 +1121,7 @@ module Prism
1068
1121
  def visit_interpolated_regular_expression_node(node)
1069
1122
  builder.regexp_compose(
1070
1123
  token(node.opening_loc),
1071
- visit_all(node.parts),
1124
+ string_nodes_from_interpolation(node, node.opening),
1072
1125
  [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
1073
1126
  builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
1074
1127
  )
@@ -1085,29 +1138,9 @@ module Prism
1085
1138
  return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
1086
1139
  end
1087
1140
 
1088
- parts = if node.parts.one? { |part| part.type == :string_node }
1089
- node.parts.flat_map do |node|
1090
- if node.type == :string_node && node.unescaped.lines.count >= 2
1091
- start_offset = node.content_loc.start_offset
1092
-
1093
- node.unescaped.lines.map do |line|
1094
- end_offset = start_offset + line.length
1095
- offsets = srange_offsets(start_offset, end_offset)
1096
- start_offset = end_offset
1097
-
1098
- builder.string_internal([line, offsets])
1099
- end
1100
- else
1101
- visit(node)
1102
- end
1103
- end
1104
- else
1105
- visit_all(node.parts)
1106
- end
1107
-
1108
1141
  builder.string_compose(
1109
1142
  token(node.opening_loc),
1110
- parts,
1143
+ string_nodes_from_interpolation(node, node.opening),
1111
1144
  token(node.closing_loc)
1112
1145
  )
1113
1146
  end
@@ -1117,7 +1150,7 @@ module Prism
1117
1150
  def visit_interpolated_symbol_node(node)
1118
1151
  builder.symbol_compose(
1119
1152
  token(node.opening_loc),
1120
- visit_all(node.parts),
1153
+ string_nodes_from_interpolation(node, node.opening),
1121
1154
  token(node.closing_loc)
1122
1155
  )
1123
1156
  end
@@ -1126,14 +1159,14 @@ module Prism
1126
1159
  # ^^^^^^^^^^^^
1127
1160
  def visit_interpolated_x_string_node(node)
1128
1161
  if node.heredoc?
1129
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1130
- else
1131
- builder.xstring_compose(
1132
- token(node.opening_loc),
1133
- visit_all(node.parts),
1134
- token(node.closing_loc)
1135
- )
1162
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1136
1163
  end
1164
+
1165
+ builder.xstring_compose(
1166
+ token(node.opening_loc),
1167
+ string_nodes_from_interpolation(node, node.opening),
1168
+ token(node.closing_loc)
1169
+ )
1137
1170
  end
1138
1171
 
1139
1172
  # -> { it }
@@ -1145,7 +1178,17 @@ module Prism
1145
1178
  # -> { it }
1146
1179
  # ^^^^^^^^^
1147
1180
  def visit_it_parameters_node(node)
1148
- builder.args(nil, [], nil, false)
1181
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
1182
+ # Currently RuboCop passes in its own builder that always inherits from the
1183
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
1184
+ # opts in to use the custom prism builder a warning can be emitted when
1185
+ # it is not the expected class, and eventually raise.
1186
+ # https://github.com/rubocop/rubocop-ast/pull/354
1187
+ if builder.is_a?(Translation::Parser::Builder)
1188
+ builder.itarg
1189
+ else
1190
+ builder.args(nil, [], nil, false)
1191
+ end
1149
1192
  end
1150
1193
 
1151
1194
  # foo(bar: baz)
@@ -1187,7 +1230,7 @@ module Prism
1187
1230
  false
1188
1231
  )
1189
1232
  end,
1190
- node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
1233
+ visit(node.body),
1191
1234
  [node.closing, srange(node.closing_loc)]
1192
1235
  )
1193
1236
  end
@@ -1311,7 +1354,7 @@ module Prism
1311
1354
  def visit_multi_write_node(node)
1312
1355
  elements = multi_target_elements(node)
1313
1356
 
1314
- if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
1357
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
1315
1358
  elements = multi_target_elements(elements.first)
1316
1359
  end
1317
1360
 
@@ -1439,7 +1482,8 @@ module Prism
1439
1482
  # foo => ^(bar)
1440
1483
  # ^^^^^^
1441
1484
  def visit_pinned_expression_node(node)
1442
- expression = builder.begin(token(node.lparen_loc), visit(node.expression), token(node.rparen_loc))
1485
+ parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
1486
+ expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
1443
1487
  builder.pin(token(node.operator_loc), expression)
1444
1488
  end
1445
1489
 
@@ -1511,15 +1555,13 @@ module Prism
1511
1555
  # /foo/
1512
1556
  # ^^^^^
1513
1557
  def visit_regular_expression_node(node)
1514
- content = node.content
1515
1558
  parts =
1516
- if content.include?("\n")
1517
- offset = node.content_loc.start_offset
1518
- content.lines.map do |line|
1519
- builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
1520
- end
1559
+ if node.content == ""
1560
+ []
1561
+ elsif node.content.include?("\n")
1562
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1521
1563
  else
1522
- [builder.string_internal(token(node.content_loc))]
1564
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1523
1565
  end
1524
1566
 
1525
1567
  builder.regexp_compose(
@@ -1676,28 +1718,11 @@ module Prism
1676
1718
  elsif node.opening&.start_with?("%") && node.unescaped.empty?
1677
1719
  builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
1678
1720
  else
1679
- content_lines = node.content.lines
1680
- unescaped_lines = node.unescaped.lines
1681
-
1682
1721
  parts =
1683
- if content_lines.length <= 1 || unescaped_lines.length <= 1
1684
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1685
- elsif content_lines.length != unescaped_lines.length
1686
- # This occurs when we have line continuations in the string. We
1687
- # need to come back and fix this, but for now this stops the
1688
- # code from breaking when we encounter it because of trying to
1689
- # transpose arrays of different lengths.
1690
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1722
+ if node.content.include?("\n")
1723
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1691
1724
  else
1692
- start_offset = node.content_loc.start_offset
1693
-
1694
- [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
1695
- end_offset = start_offset + content_line.length
1696
- offsets = srange_offsets(start_offset, end_offset)
1697
- start_offset = end_offset
1698
-
1699
- builder.string_internal([unescaped_line, offsets])
1700
- end
1725
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1701
1726
  end
1702
1727
 
1703
1728
  builder.string_compose(
@@ -1741,19 +1766,14 @@ module Prism
1741
1766
  builder.symbol([node.unescaped, srange(node.location)])
1742
1767
  end
1743
1768
  else
1744
- parts = if node.value.lines.one?
1745
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1746
- else
1747
- start_offset = node.value_loc.start_offset
1748
-
1749
- node.value.lines.map do |line|
1750
- end_offset = start_offset + line.length
1751
- offsets = srange_offsets(start_offset, end_offset)
1752
- start_offset = end_offset
1753
-
1754
- builder.string_internal([line, offsets])
1769
+ parts =
1770
+ if node.value == ""
1771
+ []
1772
+ elsif node.value.include?("\n")
1773
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
1774
+ else
1775
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1755
1776
  end
1756
- end
1757
1777
 
1758
1778
  builder.symbol_compose(
1759
1779
  token(node.opening_loc),
@@ -1882,28 +1902,23 @@ module Prism
1882
1902
  # ^^^^^
1883
1903
  def visit_x_string_node(node)
1884
1904
  if node.heredoc?
1885
- visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1886
- else
1887
- parts = if node.unescaped.lines.one?
1888
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1889
- else
1890
- start_offset = node.content_loc.start_offset
1891
-
1892
- node.unescaped.lines.map do |line|
1893
- end_offset = start_offset + line.length
1894
- offsets = srange_offsets(start_offset, end_offset)
1895
- start_offset = end_offset
1905
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1906
+ end
1896
1907
 
1897
- builder.string_internal([line, offsets])
1898
- end
1908
+ parts =
1909
+ if node.content == ""
1910
+ []
1911
+ elsif node.content.include?("\n")
1912
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1913
+ else
1914
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1899
1915
  end
1900
1916
 
1901
- builder.xstring_compose(
1902
- token(node.opening_loc),
1903
- parts,
1904
- token(node.closing_loc)
1905
- )
1906
- end
1917
+ builder.xstring_compose(
1918
+ token(node.opening_loc),
1919
+ parts,
1920
+ token(node.closing_loc)
1921
+ )
1907
1922
  end
1908
1923
 
1909
1924
  # yield
@@ -2042,7 +2057,7 @@ module Prism
2042
2057
  false
2043
2058
  )
2044
2059
  end,
2045
- block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
2060
+ visit(block.body),
2046
2061
  token(block.closing_loc)
2047
2062
  )
2048
2063
  else
@@ -2050,13 +2065,6 @@ module Prism
2050
2065
  end
2051
2066
  end
2052
2067
 
2053
- # The parser gem automatically converts \r\n to \n, meaning our offsets
2054
- # need to be adjusted to always subtract 1 from the length.
2055
- def chomped_bytesize(line)
2056
- chomped = line.chomp
2057
- chomped.bytesize + (chomped == line ? 0 : 1)
2058
- end
2059
-
2060
2068
  # Visit a heredoc that can be either a string or an xstring.
2061
2069
  def visit_heredoc(node)
2062
2070
  children = Array.new
@@ -2073,34 +2081,8 @@ module Prism
2073
2081
 
2074
2082
  node.parts.each do |part|
2075
2083
  pushing =
2076
- if part.is_a?(StringNode) && part.unescaped.include?("\n")
2077
- unescaped = part.unescaped.lines
2078
- escaped = part.content.lines
2079
-
2080
- escaped_lengths = []
2081
- normalized_lengths = []
2082
-
2083
- if node.opening.end_with?("'")
2084
- escaped.each do |line|
2085
- escaped_lengths << line.bytesize
2086
- normalized_lengths << chomped_bytesize(line)
2087
- end
2088
- else
2089
- escaped
2090
- .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
2091
- .each do |lines|
2092
- escaped_lengths << lines.sum(&:bytesize)
2093
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2094
- end
2095
- end
2096
-
2097
- start_offset = part.location.start_offset
2098
-
2099
- unescaped.map.with_index do |unescaped_line, index|
2100
- inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
2101
- start_offset += escaped_lengths.fetch(index, 0)
2102
- inner_part
2103
- end
2084
+ if part.is_a?(StringNode) && part.content.include?("\n")
2085
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
2104
2086
  else
2105
2087
  [visit(part)]
2106
2088
  end
@@ -2114,7 +2096,7 @@ module Prism
2114
2096
  location = appendee.loc
2115
2097
  location = location.with_expression(location.expression.join(child.loc.expression))
2116
2098
 
2117
- children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
2099
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
2118
2100
  else
2119
2101
  children << child
2120
2102
  end
@@ -2150,6 +2132,102 @@ module Prism
2150
2132
  parser.pattern_variables.pop
2151
2133
  end
2152
2134
  end
2135
+
2136
+ # When the content of a string node is split across multiple lines, the
2137
+ # parser gem creates individual string nodes for each line the content is part of.
2138
+ def string_nodes_from_interpolation(node, opening)
2139
+ node.parts.flat_map do |part|
2140
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
2141
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
2142
+ else
2143
+ visit(part)
2144
+ end
2145
+ end
2146
+ end
2147
+
2148
+ # Create parser string nodes from a single prism node. The parser gem
2149
+ # "glues" strings together when a line continuation is encountered.
2150
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
2151
+ unescaped = unescaped.lines
2152
+ escaped = escaped.lines
2153
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
2154
+ regex = opening == "/" || opening&.start_with?("%r")
2155
+
2156
+ # Non-interpolating strings
2157
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
2158
+ current_length = 0
2159
+ current_line = +""
2160
+
2161
+ escaped.filter_map.with_index do |escaped_line, index|
2162
+ unescaped_line = unescaped.fetch(index, "")
2163
+ current_length += escaped_line.bytesize
2164
+ current_line << unescaped_line
2165
+
2166
+ # Glue line continuations together. Only %w and %i arrays can contain these.
2167
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
2168
+ next unless index == escaped.count - 1
2169
+ end
2170
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
2171
+ start_offset += escaped_line.bytesize
2172
+ current_line = +""
2173
+ current_length = 0
2174
+ s
2175
+ end
2176
+ else
2177
+ escaped_lengths = []
2178
+ normalized_lengths = []
2179
+ # Keeps track of where an unescaped line should start a new token. An unescaped
2180
+ # \n would otherwise be indistinguishable from the actual newline at the end of
2181
+ # of the line. The parser gem only emits a new string node at "real" newlines,
2182
+ # line continuations don't start a new node as well.
2183
+ do_next_tokens = []
2184
+
2185
+ escaped
2186
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
2187
+ .each do |lines|
2188
+ escaped_lengths << lines.sum(&:bytesize)
2189
+
2190
+ unescaped_lines_count =
2191
+ if regex
2192
+ 0 # Will always be preserved as is
2193
+ else
2194
+ lines.sum do |line|
2195
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
2196
+ count -= 1 if !line.end_with?("\n") && count > 0
2197
+ count
2198
+ end
2199
+ end
2200
+
2201
+ extra = 1
2202
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
2203
+
2204
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
2205
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
2206
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
2207
+ do_next_tokens[-1] = true
2208
+ end
2209
+
2210
+ current_line = +""
2211
+ current_normalized_length = 0
2212
+
2213
+ emitted_count = 0
2214
+ unescaped.filter_map.with_index do |unescaped_line, index|
2215
+ current_line << unescaped_line
2216
+ current_normalized_length += normalized_lengths.fetch(index, 0)
2217
+
2218
+ if do_next_tokens[index]
2219
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
2220
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
2221
+ current_line = +""
2222
+ current_normalized_length = 0
2223
+ emitted_count += 1
2224
+ inner_part
2225
+ else
2226
+ nil
2227
+ end
2228
+ end
2229
+ end
2230
+ end
2153
2231
  end
2154
2232
  end
2155
2233
  end