prism 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +46 -1
  3. data/Makefile +1 -1
  4. data/config.yml +429 -2
  5. data/docs/build_system.md +8 -11
  6. data/docs/releasing.md +1 -1
  7. data/docs/relocation.md +34 -0
  8. data/docs/ruby_api.md +1 -1
  9. data/ext/prism/api_node.c +1824 -1305
  10. data/ext/prism/extconf.rb +13 -36
  11. data/ext/prism/extension.c +298 -109
  12. data/ext/prism/extension.h +4 -4
  13. data/include/prism/ast.h +442 -2
  14. data/include/prism/defines.h +26 -8
  15. data/include/prism/options.h +47 -1
  16. data/include/prism/util/pm_buffer.h +10 -0
  17. data/include/prism/version.h +2 -2
  18. data/include/prism.h +51 -4
  19. data/lib/prism/dot_visitor.rb +26 -0
  20. data/lib/prism/dsl.rb +14 -6
  21. data/lib/prism/ffi.rb +93 -28
  22. data/lib/prism/inspect_visitor.rb +4 -1
  23. data/lib/prism/node.rb +1886 -105
  24. data/lib/prism/parse_result/errors.rb +1 -1
  25. data/lib/prism/parse_result/newlines.rb +1 -1
  26. data/lib/prism/parse_result.rb +54 -2
  27. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  28. data/lib/prism/reflection.rb +4 -4
  29. data/lib/prism/relocation.rb +504 -0
  30. data/lib/prism/serialize.rb +1252 -765
  31. data/lib/prism/string_query.rb +30 -0
  32. data/lib/prism/translation/parser/builder.rb +61 -0
  33. data/lib/prism/translation/parser/compiler.rb +228 -162
  34. data/lib/prism/translation/parser/lexer.rb +435 -61
  35. data/lib/prism/translation/parser.rb +51 -3
  36. data/lib/prism/translation/parser35.rb +12 -0
  37. data/lib/prism/translation/ripper.rb +13 -3
  38. data/lib/prism/translation/ruby_parser.rb +17 -7
  39. data/lib/prism/translation.rb +1 -0
  40. data/lib/prism.rb +9 -7
  41. data/prism.gemspec +11 -1
  42. data/rbi/prism/dsl.rbi +10 -7
  43. data/rbi/prism/node.rbi +44 -17
  44. data/rbi/prism/parse_result.rbi +17 -0
  45. data/rbi/prism/string_query.rbi +12 -0
  46. data/rbi/prism/translation/parser35.rbi +6 -0
  47. data/rbi/prism.rbi +39 -36
  48. data/sig/prism/dsl.rbs +6 -4
  49. data/sig/prism/node.rbs +29 -15
  50. data/sig/prism/parse_result.rbs +10 -0
  51. data/sig/prism/relocation.rbs +185 -0
  52. data/sig/prism/serialize.rbs +4 -2
  53. data/sig/prism/string_query.rbs +11 -0
  54. data/sig/prism.rbs +22 -1
  55. data/src/diagnostic.c +2 -2
  56. data/src/node.c +39 -0
  57. data/src/options.c +31 -0
  58. data/src/prettyprint.c +62 -0
  59. data/src/prism.c +738 -199
  60. data/src/regexp.c +7 -3
  61. data/src/serialize.c +18 -0
  62. data/src/static_literals.c +1 -1
  63. data/src/util/pm_buffer.c +40 -0
  64. data/src/util/pm_char.c +1 -1
  65. data/src/util/pm_constant_pool.c +6 -2
  66. data/src/util/pm_string.c +1 -0
  67. data/src/util/pm_strncasecmp.c +13 -1
  68. metadata +13 -7
@@ -74,7 +74,29 @@ module Prism
74
74
  # []
75
75
  # ^^
76
76
  def visit_array_node(node)
77
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
77
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
78
+ elements = node.elements.flat_map do |element|
79
+ if element.is_a?(StringNode)
80
+ if element.content.include?("\n")
81
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
82
+ else
83
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
84
+ end
85
+ elsif element.is_a?(InterpolatedStringNode)
86
+ builder.string_compose(
87
+ token(element.opening_loc),
88
+ string_nodes_from_interpolation(element, node.opening),
89
+ token(element.closing_loc)
90
+ )
91
+ else
92
+ [visit(element)]
93
+ end
94
+ end
95
+ else
96
+ elements = visit_all(node.elements)
97
+ end
98
+
99
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
78
100
  end
79
101
 
80
102
  # foo => [bar]
@@ -128,14 +150,17 @@ module Prism
128
150
  builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
129
151
  end
130
152
  elsif node.value.is_a?(ImplicitNode)
131
- if (value = node.value.value).is_a?(LocalVariableReadNode)
132
- builder.pair_keyword(
133
- [key.unescaped, srange(key)],
134
- builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
135
- )
153
+ value = node.value.value
154
+
155
+ implicit_value = if value.is_a?(CallNode)
156
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
157
+ elsif value.is_a?(ConstantReadNode)
158
+ builder.const([value.name, srange(key.value_loc)])
136
159
  else
137
- builder.pair_label([key.unescaped, srange(key.location)])
160
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
138
161
  end
162
+
163
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
139
164
  elsif node.operator_loc
140
165
  builder.pair(visit(key), token(node.operator_loc), visit(node.value))
141
166
  elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
@@ -181,14 +206,21 @@ module Prism
181
206
  if (rescue_clause = node.rescue_clause)
182
207
  begin
183
208
  find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
184
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
209
+ find_end_offset = (
210
+ rescue_clause.statements&.location&.start_offset ||
211
+ rescue_clause.subsequent&.location&.start_offset ||
212
+ node.else_clause&.location&.start_offset ||
213
+ node.ensure_clause&.location&.start_offset ||
214
+ node.end_keyword_loc&.start_offset ||
215
+ find_start_offset + 1
216
+ )
185
217
 
186
218
  rescue_bodies << builder.rescue_body(
187
219
  token(rescue_clause.keyword_loc),
188
220
  rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
189
221
  token(rescue_clause.operator_loc),
190
222
  visit(rescue_clause.reference),
191
- srange_find(find_start_offset, find_end_offset, [";"]),
223
+ srange_find(find_start_offset, find_end_offset, ";"),
192
224
  visit(rescue_clause.statements)
193
225
  )
194
226
  end until (rescue_clause = rescue_clause.subsequent).nil?
@@ -294,7 +326,7 @@ module Prism
294
326
  visit_all(arguments),
295
327
  token(node.closing_loc),
296
328
  ),
297
- srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
329
+ srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="),
298
330
  visit(node.arguments.arguments.last)
299
331
  ),
300
332
  block
@@ -311,7 +343,7 @@ module Prism
311
343
  if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
312
344
  builder.assign(
313
345
  builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
314
- srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
346
+ srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="),
315
347
  visit(node.arguments.arguments.last)
316
348
  )
317
349
  else
@@ -733,10 +765,10 @@ module Prism
733
765
  visit(node.index),
734
766
  token(node.in_keyword_loc),
735
767
  visit(node.collection),
736
- if node.do_keyword_loc
737
- token(node.do_keyword_loc)
768
+ if (do_keyword_loc = node.do_keyword_loc)
769
+ token(do_keyword_loc)
738
770
  else
739
- srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, [";"])
771
+ srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";")
740
772
  end,
741
773
  visit(node.statements),
742
774
  token(node.end_keyword_loc)
@@ -865,10 +897,10 @@ module Prism
865
897
  builder.condition(
866
898
  token(node.if_keyword_loc),
867
899
  visit(node.predicate),
868
- if node.then_keyword_loc
869
- token(node.then_keyword_loc)
900
+ if (then_keyword_loc = node.then_keyword_loc)
901
+ token(then_keyword_loc)
870
902
  else
871
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, [";"])
903
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";")
872
904
  end,
873
905
  visit(node.statements),
874
906
  case node.subsequent
@@ -931,7 +963,11 @@ module Prism
931
963
  token(node.in_loc),
932
964
  pattern,
933
965
  guard,
934
- srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, [";", "then"]),
966
+ if (then_loc = node.then_loc)
967
+ token(then_loc)
968
+ else
969
+ srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";")
970
+ end,
935
971
  visit(node.statements)
936
972
  )
937
973
  end
@@ -1064,7 +1100,7 @@ module Prism
1064
1100
  def visit_interpolated_regular_expression_node(node)
1065
1101
  builder.regexp_compose(
1066
1102
  token(node.opening_loc),
1067
- visit_all(node.parts),
1103
+ string_nodes_from_interpolation(node, node.opening),
1068
1104
  [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
1069
1105
  builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
1070
1106
  )
@@ -1081,29 +1117,9 @@ module Prism
1081
1117
  return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
1082
1118
  end
1083
1119
 
1084
- parts = if node.parts.one? { |part| part.type == :string_node }
1085
- node.parts.flat_map do |node|
1086
- if node.type == :string_node && node.unescaped.lines.count >= 2
1087
- start_offset = node.content_loc.start_offset
1088
-
1089
- node.unescaped.lines.map do |line|
1090
- end_offset = start_offset + line.length
1091
- offsets = srange_offsets(start_offset, end_offset)
1092
- start_offset = end_offset
1093
-
1094
- builder.string_internal([line, offsets])
1095
- end
1096
- else
1097
- visit(node)
1098
- end
1099
- end
1100
- else
1101
- visit_all(node.parts)
1102
- end
1103
-
1104
1120
  builder.string_compose(
1105
1121
  token(node.opening_loc),
1106
- parts,
1122
+ string_nodes_from_interpolation(node, node.opening),
1107
1123
  token(node.closing_loc)
1108
1124
  )
1109
1125
  end
@@ -1113,7 +1129,7 @@ module Prism
1113
1129
  def visit_interpolated_symbol_node(node)
1114
1130
  builder.symbol_compose(
1115
1131
  token(node.opening_loc),
1116
- visit_all(node.parts),
1132
+ string_nodes_from_interpolation(node, node.opening),
1117
1133
  token(node.closing_loc)
1118
1134
  )
1119
1135
  end
@@ -1122,14 +1138,14 @@ module Prism
1122
1138
  # ^^^^^^^^^^^^
1123
1139
  def visit_interpolated_x_string_node(node)
1124
1140
  if node.heredoc?
1125
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1126
- else
1127
- builder.xstring_compose(
1128
- token(node.opening_loc),
1129
- visit_all(node.parts),
1130
- token(node.closing_loc)
1131
- )
1141
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1132
1142
  end
1143
+
1144
+ builder.xstring_compose(
1145
+ token(node.opening_loc),
1146
+ string_nodes_from_interpolation(node, node.opening),
1147
+ token(node.closing_loc)
1148
+ )
1133
1149
  end
1134
1150
 
1135
1151
  # -> { it }
@@ -1141,7 +1157,17 @@ module Prism
1141
1157
  # -> { it }
1142
1158
  # ^^^^^^^^^
1143
1159
  def visit_it_parameters_node(node)
1144
- builder.args(nil, [], nil, false)
1160
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
1161
+ # Currently RuboCop passes in its own builder that always inherits from the
1162
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
1163
+ # opts in to use the custom prism builder a warning can be emitted when
1164
+ # it is not the expected class, and eventually raise.
1165
+ # https://github.com/rubocop/rubocop-ast/pull/354
1166
+ if builder.is_a?(Translation::Parser::Builder)
1167
+ builder.itarg
1168
+ else
1169
+ builder.args(nil, [], nil, false)
1170
+ end
1145
1171
  end
1146
1172
 
1147
1173
  # foo(bar: baz)
@@ -1183,7 +1209,7 @@ module Prism
1183
1209
  false
1184
1210
  )
1185
1211
  end,
1186
- node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
1212
+ visit(node.body),
1187
1213
  [node.closing, srange(node.closing_loc)]
1188
1214
  )
1189
1215
  end
@@ -1307,7 +1333,7 @@ module Prism
1307
1333
  def visit_multi_write_node(node)
1308
1334
  elements = multi_target_elements(node)
1309
1335
 
1310
- if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
1336
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
1311
1337
  elements = multi_target_elements(elements.first)
1312
1338
  end
1313
1339
 
@@ -1507,15 +1533,13 @@ module Prism
1507
1533
  # /foo/
1508
1534
  # ^^^^^
1509
1535
  def visit_regular_expression_node(node)
1510
- content = node.content
1511
1536
  parts =
1512
- if content.include?("\n")
1513
- offset = node.content_loc.start_offset
1514
- content.lines.map do |line|
1515
- builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
1516
- end
1537
+ if node.content == ""
1538
+ []
1539
+ elsif node.content.include?("\n")
1540
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1517
1541
  else
1518
- [builder.string_internal(token(node.content_loc))]
1542
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1519
1543
  end
1520
1544
 
1521
1545
  builder.regexp_compose(
@@ -1672,28 +1696,11 @@ module Prism
1672
1696
  elsif node.opening&.start_with?("%") && node.unescaped.empty?
1673
1697
  builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
1674
1698
  else
1675
- content_lines = node.content.lines
1676
- unescaped_lines = node.unescaped.lines
1677
-
1678
1699
  parts =
1679
- if content_lines.length <= 1 || unescaped_lines.length <= 1
1680
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1681
- elsif content_lines.length != unescaped_lines.length
1682
- # This occurs when we have line continuations in the string. We
1683
- # need to come back and fix this, but for now this stops the
1684
- # code from breaking when we encounter it because of trying to
1685
- # transpose arrays of different lengths.
1686
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1700
+ if node.content.include?("\n")
1701
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1687
1702
  else
1688
- start_offset = node.content_loc.start_offset
1689
-
1690
- [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
1691
- end_offset = start_offset + content_line.length
1692
- offsets = srange_offsets(start_offset, end_offset)
1693
- start_offset = end_offset
1694
-
1695
- builder.string_internal([unescaped_line, offsets])
1696
- end
1703
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1697
1704
  end
1698
1705
 
1699
1706
  builder.string_compose(
@@ -1737,19 +1744,14 @@ module Prism
1737
1744
  builder.symbol([node.unescaped, srange(node.location)])
1738
1745
  end
1739
1746
  else
1740
- parts = if node.value.lines.one?
1741
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1742
- else
1743
- start_offset = node.value_loc.start_offset
1744
-
1745
- node.value.lines.map do |line|
1746
- end_offset = start_offset + line.length
1747
- offsets = srange_offsets(start_offset, end_offset)
1748
- start_offset = end_offset
1749
-
1750
- builder.string_internal([line, offsets])
1747
+ parts =
1748
+ if node.value == ""
1749
+ []
1750
+ elsif node.value.include?("\n")
1751
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
1752
+ else
1753
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1751
1754
  end
1752
- end
1753
1755
 
1754
1756
  builder.symbol_compose(
1755
1757
  token(node.opening_loc),
@@ -1781,10 +1783,10 @@ module Prism
1781
1783
  builder.condition(
1782
1784
  token(node.keyword_loc),
1783
1785
  visit(node.predicate),
1784
- if node.then_keyword_loc
1785
- token(node.then_keyword_loc)
1786
+ if (then_keyword_loc = node.then_keyword_loc)
1787
+ token(then_keyword_loc)
1786
1788
  else
1787
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, [";"])
1789
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";")
1788
1790
  end,
1789
1791
  visit(node.else_clause),
1790
1792
  token(node.else_clause&.else_keyword_loc),
@@ -1812,7 +1814,11 @@ module Prism
1812
1814
  :until,
1813
1815
  token(node.keyword_loc),
1814
1816
  visit(node.predicate),
1815
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]),
1817
+ if (do_keyword_loc = node.do_keyword_loc)
1818
+ token(do_keyword_loc)
1819
+ else
1820
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
1821
+ end,
1816
1822
  visit(node.statements),
1817
1823
  token(node.closing_loc)
1818
1824
  )
@@ -1832,10 +1838,10 @@ module Prism
1832
1838
  builder.when(
1833
1839
  token(node.keyword_loc),
1834
1840
  visit_all(node.conditions),
1835
- if node.then_keyword_loc
1836
- token(node.then_keyword_loc)
1841
+ if (then_keyword_loc = node.then_keyword_loc)
1842
+ token(then_keyword_loc)
1837
1843
  else
1838
- srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, [";"])
1844
+ srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";")
1839
1845
  end,
1840
1846
  visit(node.statements)
1841
1847
  )
@@ -1852,7 +1858,11 @@ module Prism
1852
1858
  :while,
1853
1859
  token(node.keyword_loc),
1854
1860
  visit(node.predicate),
1855
- srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, [";", "do"]),
1861
+ if (do_keyword_loc = node.do_keyword_loc)
1862
+ token(do_keyword_loc)
1863
+ else
1864
+ srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
1865
+ end,
1856
1866
  visit(node.statements),
1857
1867
  token(node.closing_loc)
1858
1868
  )
@@ -1870,28 +1880,23 @@ module Prism
1870
1880
  # ^^^^^
1871
1881
  def visit_x_string_node(node)
1872
1882
  if node.heredoc?
1873
- visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1874
- else
1875
- parts = if node.unescaped.lines.one?
1876
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1877
- else
1878
- start_offset = node.content_loc.start_offset
1879
-
1880
- node.unescaped.lines.map do |line|
1881
- end_offset = start_offset + line.length
1882
- offsets = srange_offsets(start_offset, end_offset)
1883
- start_offset = end_offset
1883
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1884
+ end
1884
1885
 
1885
- builder.string_internal([line, offsets])
1886
- end
1886
+ parts =
1887
+ if node.content == ""
1888
+ []
1889
+ elsif node.content.include?("\n")
1890
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1891
+ else
1892
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1887
1893
  end
1888
1894
 
1889
- builder.xstring_compose(
1890
- token(node.opening_loc),
1891
- parts,
1892
- token(node.closing_loc)
1893
- )
1894
- end
1895
+ builder.xstring_compose(
1896
+ token(node.opening_loc),
1897
+ parts,
1898
+ token(node.closing_loc)
1899
+ )
1895
1900
  end
1896
1901
 
1897
1902
  # yield
@@ -1985,18 +1990,16 @@ module Prism
1985
1990
  Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
1986
1991
  end
1987
1992
 
1988
- # Constructs a new source range by finding the given tokens between the
1989
- # given start offset and end offset. If the needle is not found, it
1993
+ # Constructs a new source range by finding the given character between
1994
+ # the given start offset and end offset. If the needle is not found, it
1990
1995
  # returns nil. Importantly it does not search past newlines or comments.
1991
1996
  #
1992
1997
  # Note that end_offset is allowed to be nil, in which case this will
1993
1998
  # search until the end of the string.
1994
- def srange_find(start_offset, end_offset, tokens)
1995
- if (match = source_buffer.source.byteslice(start_offset...end_offset).match(/\A(\s*)(#{tokens.join("|")})/))
1996
- _, whitespace, token = *match
1997
- token_offset = start_offset + whitespace.bytesize
1998
-
1999
- [token, Range.new(source_buffer, offset_cache[token_offset], offset_cache[token_offset + token.bytesize])]
1999
+ def srange_find(start_offset, end_offset, character)
2000
+ if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/])
2001
+ final_offset = start_offset + match.bytesize
2002
+ [character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])]
2000
2003
  end
2001
2004
  end
2002
2005
 
@@ -2032,7 +2035,7 @@ module Prism
2032
2035
  false
2033
2036
  )
2034
2037
  end,
2035
- block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
2038
+ visit(block.body),
2036
2039
  token(block.closing_loc)
2037
2040
  )
2038
2041
  else
@@ -2040,13 +2043,6 @@ module Prism
2040
2043
  end
2041
2044
  end
2042
2045
 
2043
- # The parser gem automatically converts \r\n to \n, meaning our offsets
2044
- # need to be adjusted to always subtract 1 from the length.
2045
- def chomped_bytesize(line)
2046
- chomped = line.chomp
2047
- chomped.bytesize + (chomped == line ? 0 : 1)
2048
- end
2049
-
2050
2046
  # Visit a heredoc that can be either a string or an xstring.
2051
2047
  def visit_heredoc(node)
2052
2048
  children = Array.new
@@ -2063,34 +2059,8 @@ module Prism
2063
2059
 
2064
2060
  node.parts.each do |part|
2065
2061
  pushing =
2066
- if part.is_a?(StringNode) && part.unescaped.include?("\n")
2067
- unescaped = part.unescaped.lines
2068
- escaped = part.content.lines
2069
-
2070
- escaped_lengths = []
2071
- normalized_lengths = []
2072
-
2073
- if node.opening.end_with?("'")
2074
- escaped.each do |line|
2075
- escaped_lengths << line.bytesize
2076
- normalized_lengths << chomped_bytesize(line)
2077
- end
2078
- else
2079
- escaped
2080
- .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
2081
- .each do |lines|
2082
- escaped_lengths << lines.sum(&:bytesize)
2083
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2084
- end
2085
- end
2086
-
2087
- start_offset = part.location.start_offset
2088
-
2089
- unescaped.map.with_index do |unescaped_line, index|
2090
- inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
2091
- start_offset += escaped_lengths.fetch(index, 0)
2092
- inner_part
2093
- end
2062
+ if part.is_a?(StringNode) && part.content.include?("\n")
2063
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
2094
2064
  else
2095
2065
  [visit(part)]
2096
2066
  end
@@ -2104,7 +2074,7 @@ module Prism
2104
2074
  location = appendee.loc
2105
2075
  location = location.with_expression(location.expression.join(child.loc.expression))
2106
2076
 
2107
- children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
2077
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
2108
2078
  else
2109
2079
  children << child
2110
2080
  end
@@ -2140,6 +2110,102 @@ module Prism
2140
2110
  parser.pattern_variables.pop
2141
2111
  end
2142
2112
  end
2113
+
2114
+ # When the content of a string node is split across multiple lines, the
2115
+ # parser gem creates individual string nodes for each line the content is part of.
2116
+ def string_nodes_from_interpolation(node, opening)
2117
+ node.parts.flat_map do |part|
2118
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
2119
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
2120
+ else
2121
+ visit(part)
2122
+ end
2123
+ end
2124
+ end
2125
+
2126
+ # Create parser string nodes from a single prism node. The parser gem
2127
+ # "glues" strings together when a line continuation is encountered.
2128
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
2129
+ unescaped = unescaped.lines
2130
+ escaped = escaped.lines
2131
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
2132
+ regex = opening == "/" || opening&.start_with?("%r")
2133
+
2134
+ # Non-interpolating strings
2135
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
2136
+ current_length = 0
2137
+ current_line = +""
2138
+
2139
+ escaped.filter_map.with_index do |escaped_line, index|
2140
+ unescaped_line = unescaped.fetch(index, "")
2141
+ current_length += escaped_line.bytesize
2142
+ current_line << unescaped_line
2143
+
2144
+ # Glue line continuations together. Only %w and %i arrays can contain these.
2145
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
2146
+ next unless index == escaped.count - 1
2147
+ end
2148
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
2149
+ start_offset += escaped_line.bytesize
2150
+ current_line = +""
2151
+ current_length = 0
2152
+ s
2153
+ end
2154
+ else
2155
+ escaped_lengths = []
2156
+ normalized_lengths = []
2157
+ # Keeps track of where an unescaped line should start a new token. An unescaped
2158
+ # \n would otherwise be indistinguishable from the actual newline at the end of
2159
+ # of the line. The parser gem only emits a new string node at "real" newlines,
2160
+ # line continuations don't start a new node as well.
2161
+ do_next_tokens = []
2162
+
2163
+ escaped
2164
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
2165
+ .each do |lines|
2166
+ escaped_lengths << lines.sum(&:bytesize)
2167
+
2168
+ unescaped_lines_count =
2169
+ if regex
2170
+ 0 # Will always be preserved as is
2171
+ else
2172
+ lines.sum do |line|
2173
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
2174
+ count -= 1 if !line.end_with?("\n") && count > 0
2175
+ count
2176
+ end
2177
+ end
2178
+
2179
+ extra = 1
2180
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
2181
+
2182
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
2183
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
2184
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
2185
+ do_next_tokens[-1] = true
2186
+ end
2187
+
2188
+ current_line = +""
2189
+ current_normalized_length = 0
2190
+
2191
+ emitted_count = 0
2192
+ unescaped.filter_map.with_index do |unescaped_line, index|
2193
+ current_line << unescaped_line
2194
+ current_normalized_length += normalized_lengths.fetch(index, 0)
2195
+
2196
+ if do_next_tokens[index]
2197
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
2198
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
2199
+ current_line = +""
2200
+ current_normalized_length = 0
2201
+ emitted_count += 1
2202
+ inner_part
2203
+ else
2204
+ nil
2205
+ end
2206
+ end
2207
+ end
2208
+ end
2143
2209
  end
2144
2210
  end
2145
2211
  end