prism 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -1
  3. data/config.yml +9 -0
  4. data/docs/releasing.md +1 -1
  5. data/docs/ruby_api.md +1 -1
  6. data/ext/prism/api_node.c +1814 -1303
  7. data/ext/prism/extension.c +230 -109
  8. data/ext/prism/extension.h +4 -4
  9. data/include/prism/ast.h +16 -0
  10. data/include/prism/defines.h +4 -1
  11. data/include/prism/options.h +47 -1
  12. data/include/prism/util/pm_buffer.h +10 -0
  13. data/include/prism/version.h +2 -2
  14. data/include/prism.h +4 -4
  15. data/lib/prism/dot_visitor.rb +16 -0
  16. data/lib/prism/dsl.rb +10 -2
  17. data/lib/prism/ffi.rb +45 -27
  18. data/lib/prism/inspect_visitor.rb +2 -1
  19. data/lib/prism/node.rb +48 -10
  20. data/lib/prism/parse_result/newlines.rb +1 -1
  21. data/lib/prism/parse_result.rb +52 -0
  22. data/lib/prism/polyfill/append_as_bytes.rb +15 -0
  23. data/lib/prism/reflection.rb +2 -2
  24. data/lib/prism/serialize.rb +1252 -765
  25. data/lib/prism/translation/parser/builder.rb +61 -0
  26. data/lib/prism/translation/parser/compiler.rb +192 -136
  27. data/lib/prism/translation/parser/lexer.rb +435 -61
  28. data/lib/prism/translation/parser.rb +51 -3
  29. data/lib/prism/translation/parser35.rb +12 -0
  30. data/lib/prism/translation/ripper.rb +13 -3
  31. data/lib/prism/translation/ruby_parser.rb +5 -4
  32. data/lib/prism/translation.rb +1 -0
  33. data/lib/prism.rb +3 -3
  34. data/prism.gemspec +5 -1
  35. data/rbi/prism/dsl.rbi +6 -3
  36. data/rbi/prism/node.rbi +22 -7
  37. data/rbi/prism/parse_result.rbi +17 -0
  38. data/rbi/prism/translation/parser35.rbi +6 -0
  39. data/rbi/prism.rbi +39 -36
  40. data/sig/prism/dsl.rbs +4 -2
  41. data/sig/prism/node.rbs +17 -7
  42. data/sig/prism/parse_result.rbs +10 -0
  43. data/sig/prism/serialize.rbs +4 -2
  44. data/sig/prism.rbs +22 -1
  45. data/src/diagnostic.c +2 -2
  46. data/src/node.c +21 -0
  47. data/src/options.c +31 -0
  48. data/src/prettyprint.c +30 -0
  49. data/src/prism.c +374 -118
  50. data/src/serialize.c +6 -0
  51. data/src/util/pm_buffer.c +40 -0
  52. data/src/util/pm_constant_pool.c +6 -2
  53. data/src/util/pm_strncasecmp.c +13 -1
  54. metadata +7 -7
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Prism
4
+ module Translation
5
+ class Parser
6
+ # A builder that knows how to convert more modern Ruby syntax
7
+ # into whitequark/parser gem's syntax tree.
8
+ class Builder < ::Parser::Builders::Default
9
+ # It represents the `it` block argument, which is not yet implemented in the Parser gem.
10
+ def itarg
11
+ n(:itarg, [:it], nil)
12
+ end
13
+
14
+ # The following three lines have been added to support the `it` block parameter syntax in the source code below.
15
+ #
16
+ # if args.type == :itarg
17
+ # block_type = :itblock
18
+ # args = :it
19
+ #
20
+ # https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
21
+ def block(method_call, begin_t, args, body, end_t)
22
+ _receiver, _selector, *call_args = *method_call
23
+
24
+ if method_call.type == :yield
25
+ diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
26
+ end
27
+
28
+ last_arg = call_args.last
29
+ if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
30
+ diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
31
+ end
32
+
33
+ if args.type == :itarg
34
+ block_type = :itblock
35
+ args = :it
36
+ elsif args.type == :numargs
37
+ block_type = :numblock
38
+ args = args.children[0]
39
+ else
40
+ block_type = :block
41
+ end
42
+
43
+ if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
44
+ n(block_type, [ method_call, args, body ],
45
+ block_map(method_call.loc.expression, begin_t, end_t))
46
+ else
47
+ # Code like "return foo 1 do end" is reduced in a weird sequence.
48
+ # Here, method_call is actually (return).
49
+ actual_send, = *method_call
50
+ block =
51
+ n(block_type, [ actual_send, args, body ],
52
+ block_map(actual_send.loc.expression, begin_t, end_t))
53
+
54
+ n(method_call.type, [ block ],
55
+ method_call.loc.with_expression(join_exprs(method_call, block)))
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -74,7 +74,29 @@ module Prism
74
74
  # []
75
75
  # ^^
76
76
  def visit_array_node(node)
77
- builder.array(token(node.opening_loc), visit_all(node.elements), token(node.closing_loc))
77
+ if node.opening&.start_with?("%w", "%W", "%i", "%I")
78
+ elements = node.elements.flat_map do |element|
79
+ if element.is_a?(StringNode)
80
+ if element.content.include?("\n")
81
+ string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
82
+ else
83
+ [builder.string_internal([element.unescaped, srange(element.content_loc)])]
84
+ end
85
+ elsif element.is_a?(InterpolatedStringNode)
86
+ builder.string_compose(
87
+ token(element.opening_loc),
88
+ string_nodes_from_interpolation(element, node.opening),
89
+ token(element.closing_loc)
90
+ )
91
+ else
92
+ [visit(element)]
93
+ end
94
+ end
95
+ else
96
+ elements = visit_all(node.elements)
97
+ end
98
+
99
+ builder.array(token(node.opening_loc), elements, token(node.closing_loc))
78
100
  end
79
101
 
80
102
  # foo => [bar]
@@ -128,14 +150,17 @@ module Prism
128
150
  builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
129
151
  end
130
152
  elsif node.value.is_a?(ImplicitNode)
131
- if (value = node.value.value).is_a?(LocalVariableReadNode)
132
- builder.pair_keyword(
133
- [key.unescaped, srange(key)],
134
- builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
135
- )
153
+ value = node.value.value
154
+
155
+ implicit_value = if value.is_a?(CallNode)
156
+ builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
157
+ elsif value.is_a?(ConstantReadNode)
158
+ builder.const([value.name, srange(key.value_loc)])
136
159
  else
137
- builder.pair_label([key.unescaped, srange(key.location)])
160
+ builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
138
161
  end
162
+
163
+ builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
139
164
  elsif node.operator_loc
140
165
  builder.pair(visit(key), token(node.operator_loc), visit(node.value))
141
166
  elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
@@ -181,7 +206,14 @@ module Prism
181
206
  if (rescue_clause = node.rescue_clause)
182
207
  begin
183
208
  find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
184
- find_end_offset = (rescue_clause.statements&.location&.start_offset || rescue_clause.subsequent&.location&.start_offset || (find_start_offset + 1))
209
+ find_end_offset = (
210
+ rescue_clause.statements&.location&.start_offset ||
211
+ rescue_clause.subsequent&.location&.start_offset ||
212
+ node.else_clause&.location&.start_offset ||
213
+ node.ensure_clause&.location&.start_offset ||
214
+ node.end_keyword_loc&.start_offset ||
215
+ find_start_offset + 1
216
+ )
185
217
 
186
218
  rescue_bodies << builder.rescue_body(
187
219
  token(rescue_clause.keyword_loc),
@@ -1068,7 +1100,7 @@ module Prism
1068
1100
  def visit_interpolated_regular_expression_node(node)
1069
1101
  builder.regexp_compose(
1070
1102
  token(node.opening_loc),
1071
- visit_all(node.parts),
1103
+ string_nodes_from_interpolation(node, node.opening),
1072
1104
  [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
1073
1105
  builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
1074
1106
  )
@@ -1085,29 +1117,9 @@ module Prism
1085
1117
  return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
1086
1118
  end
1087
1119
 
1088
- parts = if node.parts.one? { |part| part.type == :string_node }
1089
- node.parts.flat_map do |node|
1090
- if node.type == :string_node && node.unescaped.lines.count >= 2
1091
- start_offset = node.content_loc.start_offset
1092
-
1093
- node.unescaped.lines.map do |line|
1094
- end_offset = start_offset + line.length
1095
- offsets = srange_offsets(start_offset, end_offset)
1096
- start_offset = end_offset
1097
-
1098
- builder.string_internal([line, offsets])
1099
- end
1100
- else
1101
- visit(node)
1102
- end
1103
- end
1104
- else
1105
- visit_all(node.parts)
1106
- end
1107
-
1108
1120
  builder.string_compose(
1109
1121
  token(node.opening_loc),
1110
- parts,
1122
+ string_nodes_from_interpolation(node, node.opening),
1111
1123
  token(node.closing_loc)
1112
1124
  )
1113
1125
  end
@@ -1117,7 +1129,7 @@ module Prism
1117
1129
  def visit_interpolated_symbol_node(node)
1118
1130
  builder.symbol_compose(
1119
1131
  token(node.opening_loc),
1120
- visit_all(node.parts),
1132
+ string_nodes_from_interpolation(node, node.opening),
1121
1133
  token(node.closing_loc)
1122
1134
  )
1123
1135
  end
@@ -1126,14 +1138,14 @@ module Prism
1126
1138
  # ^^^^^^^^^^^^
1127
1139
  def visit_interpolated_x_string_node(node)
1128
1140
  if node.heredoc?
1129
- visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1130
- else
1131
- builder.xstring_compose(
1132
- token(node.opening_loc),
1133
- visit_all(node.parts),
1134
- token(node.closing_loc)
1135
- )
1141
+ return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1136
1142
  end
1143
+
1144
+ builder.xstring_compose(
1145
+ token(node.opening_loc),
1146
+ string_nodes_from_interpolation(node, node.opening),
1147
+ token(node.closing_loc)
1148
+ )
1137
1149
  end
1138
1150
 
1139
1151
  # -> { it }
@@ -1145,7 +1157,17 @@ module Prism
1145
1157
  # -> { it }
1146
1158
  # ^^^^^^^^^
1147
1159
  def visit_it_parameters_node(node)
1148
- builder.args(nil, [], nil, false)
1160
+ # FIXME: The builder _should_ always be a subclass of the prism builder.
1161
+ # Currently RuboCop passes in its own builder that always inherits from the
1162
+ # parser builder (which is lacking the `itarg` method). Once rubocop-ast
1163
+ # opts in to use the custom prism builder a warning can be emitted when
1164
+ # it is not the expected class, and eventually raise.
1165
+ # https://github.com/rubocop/rubocop-ast/pull/354
1166
+ if builder.is_a?(Translation::Parser::Builder)
1167
+ builder.itarg
1168
+ else
1169
+ builder.args(nil, [], nil, false)
1170
+ end
1149
1171
  end
1150
1172
 
1151
1173
  # foo(bar: baz)
@@ -1187,7 +1209,7 @@ module Prism
1187
1209
  false
1188
1210
  )
1189
1211
  end,
1190
- node.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
1212
+ visit(node.body),
1191
1213
  [node.closing, srange(node.closing_loc)]
1192
1214
  )
1193
1215
  end
@@ -1311,7 +1333,7 @@ module Prism
1311
1333
  def visit_multi_write_node(node)
1312
1334
  elements = multi_target_elements(node)
1313
1335
 
1314
- if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
1336
+ if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
1315
1337
  elements = multi_target_elements(elements.first)
1316
1338
  end
1317
1339
 
@@ -1511,15 +1533,13 @@ module Prism
1511
1533
  # /foo/
1512
1534
  # ^^^^^
1513
1535
  def visit_regular_expression_node(node)
1514
- content = node.content
1515
1536
  parts =
1516
- if content.include?("\n")
1517
- offset = node.content_loc.start_offset
1518
- content.lines.map do |line|
1519
- builder.string_internal([line, srange_offsets(offset, offset += line.bytesize)])
1520
- end
1537
+ if node.content == ""
1538
+ []
1539
+ elsif node.content.include?("\n")
1540
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1521
1541
  else
1522
- [builder.string_internal(token(node.content_loc))]
1542
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1523
1543
  end
1524
1544
 
1525
1545
  builder.regexp_compose(
@@ -1676,28 +1696,11 @@ module Prism
1676
1696
  elsif node.opening&.start_with?("%") && node.unescaped.empty?
1677
1697
  builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
1678
1698
  else
1679
- content_lines = node.content.lines
1680
- unescaped_lines = node.unescaped.lines
1681
-
1682
1699
  parts =
1683
- if content_lines.length <= 1 || unescaped_lines.length <= 1
1684
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1685
- elsif content_lines.length != unescaped_lines.length
1686
- # This occurs when we have line continuations in the string. We
1687
- # need to come back and fix this, but for now this stops the
1688
- # code from breaking when we encounter it because of trying to
1689
- # transpose arrays of different lengths.
1690
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1700
+ if node.content.include?("\n")
1701
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1691
1702
  else
1692
- start_offset = node.content_loc.start_offset
1693
-
1694
- [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
1695
- end_offset = start_offset + content_line.length
1696
- offsets = srange_offsets(start_offset, end_offset)
1697
- start_offset = end_offset
1698
-
1699
- builder.string_internal([unescaped_line, offsets])
1700
- end
1703
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1701
1704
  end
1702
1705
 
1703
1706
  builder.string_compose(
@@ -1741,19 +1744,14 @@ module Prism
1741
1744
  builder.symbol([node.unescaped, srange(node.location)])
1742
1745
  end
1743
1746
  else
1744
- parts = if node.value.lines.one?
1745
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1746
- else
1747
- start_offset = node.value_loc.start_offset
1748
-
1749
- node.value.lines.map do |line|
1750
- end_offset = start_offset + line.length
1751
- offsets = srange_offsets(start_offset, end_offset)
1752
- start_offset = end_offset
1753
-
1754
- builder.string_internal([line, offsets])
1747
+ parts =
1748
+ if node.value == ""
1749
+ []
1750
+ elsif node.value.include?("\n")
1751
+ string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
1752
+ else
1753
+ [builder.string_internal([node.unescaped, srange(node.value_loc)])]
1755
1754
  end
1756
- end
1757
1755
 
1758
1756
  builder.symbol_compose(
1759
1757
  token(node.opening_loc),
@@ -1882,28 +1880,23 @@ module Prism
1882
1880
  # ^^^^^
1883
1881
  def visit_x_string_node(node)
1884
1882
  if node.heredoc?
1885
- visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1886
- else
1887
- parts = if node.unescaped.lines.one?
1888
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1889
- else
1890
- start_offset = node.content_loc.start_offset
1891
-
1892
- node.unescaped.lines.map do |line|
1893
- end_offset = start_offset + line.length
1894
- offsets = srange_offsets(start_offset, end_offset)
1895
- start_offset = end_offset
1883
+ return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
1884
+ end
1896
1885
 
1897
- builder.string_internal([line, offsets])
1898
- end
1886
+ parts =
1887
+ if node.content == ""
1888
+ []
1889
+ elsif node.content.include?("\n")
1890
+ string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
1891
+ else
1892
+ [builder.string_internal([node.unescaped, srange(node.content_loc)])]
1899
1893
  end
1900
1894
 
1901
- builder.xstring_compose(
1902
- token(node.opening_loc),
1903
- parts,
1904
- token(node.closing_loc)
1905
- )
1906
- end
1895
+ builder.xstring_compose(
1896
+ token(node.opening_loc),
1897
+ parts,
1898
+ token(node.closing_loc)
1899
+ )
1907
1900
  end
1908
1901
 
1909
1902
  # yield
@@ -2042,7 +2035,7 @@ module Prism
2042
2035
  false
2043
2036
  )
2044
2037
  end,
2045
- block.body&.accept(copy_compiler(forwarding: implicit_parameters ? [] : find_forwarding(parameters&.parameters))),
2038
+ visit(block.body),
2046
2039
  token(block.closing_loc)
2047
2040
  )
2048
2041
  else
@@ -2050,13 +2043,6 @@ module Prism
2050
2043
  end
2051
2044
  end
2052
2045
 
2053
- # The parser gem automatically converts \r\n to \n, meaning our offsets
2054
- # need to be adjusted to always subtract 1 from the length.
2055
- def chomped_bytesize(line)
2056
- chomped = line.chomp
2057
- chomped.bytesize + (chomped == line ? 0 : 1)
2058
- end
2059
-
2060
2046
  # Visit a heredoc that can be either a string or an xstring.
2061
2047
  def visit_heredoc(node)
2062
2048
  children = Array.new
@@ -2073,34 +2059,8 @@ module Prism
2073
2059
 
2074
2060
  node.parts.each do |part|
2075
2061
  pushing =
2076
- if part.is_a?(StringNode) && part.unescaped.include?("\n")
2077
- unescaped = part.unescaped.lines
2078
- escaped = part.content.lines
2079
-
2080
- escaped_lengths = []
2081
- normalized_lengths = []
2082
-
2083
- if node.opening.end_with?("'")
2084
- escaped.each do |line|
2085
- escaped_lengths << line.bytesize
2086
- normalized_lengths << chomped_bytesize(line)
2087
- end
2088
- else
2089
- escaped
2090
- .chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
2091
- .each do |lines|
2092
- escaped_lengths << lines.sum(&:bytesize)
2093
- normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
2094
- end
2095
- end
2096
-
2097
- start_offset = part.location.start_offset
2098
-
2099
- unescaped.map.with_index do |unescaped_line, index|
2100
- inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
2101
- start_offset += escaped_lengths.fetch(index, 0)
2102
- inner_part
2103
- end
2062
+ if part.is_a?(StringNode) && part.content.include?("\n")
2063
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
2104
2064
  else
2105
2065
  [visit(part)]
2106
2066
  end
@@ -2114,7 +2074,7 @@ module Prism
2114
2074
  location = appendee.loc
2115
2075
  location = location.with_expression(location.expression.join(child.loc.expression))
2116
2076
 
2117
- children[-1] = appendee.updated(:str, [appendee.children.first << child.children.first], location: location)
2077
+ children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
2118
2078
  else
2119
2079
  children << child
2120
2080
  end
@@ -2150,6 +2110,102 @@ module Prism
2150
2110
  parser.pattern_variables.pop
2151
2111
  end
2152
2112
  end
2113
+
2114
+ # When the content of a string node is split across multiple lines, the
2115
+ # parser gem creates individual string nodes for each line the content is part of.
2116
+ def string_nodes_from_interpolation(node, opening)
2117
+ node.parts.flat_map do |part|
2118
+ if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
2119
+ string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
2120
+ else
2121
+ visit(part)
2122
+ end
2123
+ end
2124
+ end
2125
+
2126
+ # Create parser string nodes from a single prism node. The parser gem
2127
+ # "glues" strings together when a line continuation is encountered.
2128
+ def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
2129
+ unescaped = unescaped.lines
2130
+ escaped = escaped.lines
2131
+ percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
2132
+ regex = opening == "/" || opening&.start_with?("%r")
2133
+
2134
+ # Non-interpolating strings
2135
+ if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
2136
+ current_length = 0
2137
+ current_line = +""
2138
+
2139
+ escaped.filter_map.with_index do |escaped_line, index|
2140
+ unescaped_line = unescaped.fetch(index, "")
2141
+ current_length += escaped_line.bytesize
2142
+ current_line << unescaped_line
2143
+
2144
+ # Glue line continuations together. Only %w and %i arrays can contain these.
2145
+ if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
2146
+ next unless index == escaped.count - 1
2147
+ end
2148
+ s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
2149
+ start_offset += escaped_line.bytesize
2150
+ current_line = +""
2151
+ current_length = 0
2152
+ s
2153
+ end
2154
+ else
2155
+ escaped_lengths = []
2156
+ normalized_lengths = []
2157
+ # Keeps track of where an unescaped line should start a new token. An unescaped
2158
+ # \n would otherwise be indistinguishable from the actual newline at the end of
2159
+ # of the line. The parser gem only emits a new string node at "real" newlines,
2160
+ # line continuations don't start a new node as well.
2161
+ do_next_tokens = []
2162
+
2163
+ escaped
2164
+ .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
2165
+ .each do |lines|
2166
+ escaped_lengths << lines.sum(&:bytesize)
2167
+
2168
+ unescaped_lines_count =
2169
+ if regex
2170
+ 0 # Will always be preserved as is
2171
+ else
2172
+ lines.sum do |line|
2173
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
2174
+ count -= 1 if !line.end_with?("\n") && count > 0
2175
+ count
2176
+ end
2177
+ end
2178
+
2179
+ extra = 1
2180
+ extra = lines.count if percent_array # Account for line continuations in percent arrays
2181
+
2182
+ normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
2183
+ normalized_lengths[-1] = lines.sum { |line| line.bytesize }
2184
+ do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
2185
+ do_next_tokens[-1] = true
2186
+ end
2187
+
2188
+ current_line = +""
2189
+ current_normalized_length = 0
2190
+
2191
+ emitted_count = 0
2192
+ unescaped.filter_map.with_index do |unescaped_line, index|
2193
+ current_line << unescaped_line
2194
+ current_normalized_length += normalized_lengths.fetch(index, 0)
2195
+
2196
+ if do_next_tokens[index]
2197
+ inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
2198
+ start_offset += escaped_lengths.fetch(emitted_count, 0)
2199
+ current_line = +""
2200
+ current_normalized_length = 0
2201
+ emitted_count += 1
2202
+ inner_part
2203
+ else
2204
+ nil
2205
+ end
2206
+ end
2207
+ end
2208
+ end
2153
2209
  end
2154
2210
  end
2155
2211
  end