prism 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -1
- data/Makefile +1 -1
- data/config.yml +429 -2
- data/docs/build_system.md +8 -11
- data/docs/releasing.md +1 -1
- data/docs/relocation.md +34 -0
- data/docs/ruby_api.md +1 -1
- data/ext/prism/api_node.c +1824 -1305
- data/ext/prism/extconf.rb +13 -36
- data/ext/prism/extension.c +298 -109
- data/ext/prism/extension.h +4 -4
- data/include/prism/ast.h +442 -2
- data/include/prism/defines.h +26 -8
- data/include/prism/options.h +47 -1
- data/include/prism/util/pm_buffer.h +10 -0
- data/include/prism/version.h +2 -2
- data/include/prism.h +51 -4
- data/lib/prism/dot_visitor.rb +26 -0
- data/lib/prism/dsl.rb +14 -6
- data/lib/prism/ffi.rb +93 -28
- data/lib/prism/inspect_visitor.rb +4 -1
- data/lib/prism/node.rb +1886 -105
- data/lib/prism/parse_result/errors.rb +1 -1
- data/lib/prism/parse_result/newlines.rb +1 -1
- data/lib/prism/parse_result.rb +54 -2
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/reflection.rb +4 -4
- data/lib/prism/relocation.rb +504 -0
- data/lib/prism/serialize.rb +1252 -765
- data/lib/prism/string_query.rb +30 -0
- data/lib/prism/translation/parser/builder.rb +61 -0
- data/lib/prism/translation/parser/compiler.rb +228 -162
- data/lib/prism/translation/parser/lexer.rb +435 -61
- data/lib/prism/translation/parser.rb +51 -3
- data/lib/prism/translation/parser35.rb +12 -0
- data/lib/prism/translation/ripper.rb +13 -3
- data/lib/prism/translation/ruby_parser.rb +17 -7
- data/lib/prism/translation.rb +1 -0
- data/lib/prism.rb +9 -7
- data/prism.gemspec +11 -1
- data/rbi/prism/dsl.rbi +10 -7
- data/rbi/prism/node.rbi +44 -17
- data/rbi/prism/parse_result.rbi +17 -0
- data/rbi/prism/string_query.rbi +12 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism.rbi +39 -36
- data/sig/prism/dsl.rbs +6 -4
- data/sig/prism/node.rbs +29 -15
- data/sig/prism/parse_result.rbs +10 -0
- data/sig/prism/relocation.rbs +185 -0
- data/sig/prism/serialize.rbs +4 -2
- data/sig/prism/string_query.rbs +11 -0
- data/sig/prism.rbs +22 -1
- data/src/diagnostic.c +2 -2
- data/src/node.c +39 -0
- data/src/options.c +31 -0
- data/src/prettyprint.c +62 -0
- data/src/prism.c +738 -199
- data/src/regexp.c +7 -3
- data/src/serialize.c +18 -0
- data/src/static_literals.c +1 -1
- data/src/util/pm_buffer.c +40 -0
- data/src/util/pm_char.c +1 -1
- data/src/util/pm_constant_pool.c +6 -2
- data/src/util/pm_string.c +1 -0
- data/src/util/pm_strncasecmp.c +13 -1
- metadata +13 -7
@@ -74,7 +74,29 @@ module Prism
|
|
74
74
|
# []
|
75
75
|
# ^^
|
76
76
|
def visit_array_node(node)
|
77
|
-
|
77
|
+
if node.opening&.start_with?("%w", "%W", "%i", "%I")
|
78
|
+
elements = node.elements.flat_map do |element|
|
79
|
+
if element.is_a?(StringNode)
|
80
|
+
if element.content.include?("\n")
|
81
|
+
string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
|
82
|
+
else
|
83
|
+
[builder.string_internal([element.unescaped, srange(element.content_loc)])]
|
84
|
+
end
|
85
|
+
elsif element.is_a?(InterpolatedStringNode)
|
86
|
+
builder.string_compose(
|
87
|
+
token(element.opening_loc),
|
88
|
+
string_nodes_from_interpolation(element, node.opening),
|
89
|
+
token(element.closing_loc)
|
90
|
+
)
|
91
|
+
else
|
92
|
+
[visit(element)]
|
93
|
+
end
|
94
|
+
end
|
95
|
+
else
|
96
|
+
elements = visit_all(node.elements)
|
97
|
+
end
|
98
|
+
|
99
|
+
builder.array(token(node.opening_loc), elements, token(node.closing_loc))
|
78
100
|
end
|
79
101
|
|
80
102
|
# foo => [bar]
|
@@ -128,14 +150,17 @@ module Prism
|
|
128
150
|
builder.pair_quoted(token(key.opening_loc), [builder.string_internal([key.unescaped, srange(key.value_loc)])], token(key.closing_loc), visit(node.value))
|
129
151
|
end
|
130
152
|
elsif node.value.is_a?(ImplicitNode)
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
153
|
+
value = node.value.value
|
154
|
+
|
155
|
+
implicit_value = if value.is_a?(CallNode)
|
156
|
+
builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
|
157
|
+
elsif value.is_a?(ConstantReadNode)
|
158
|
+
builder.const([value.name, srange(key.value_loc)])
|
136
159
|
else
|
137
|
-
builder.
|
160
|
+
builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
|
138
161
|
end
|
162
|
+
|
163
|
+
builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
|
139
164
|
elsif node.operator_loc
|
140
165
|
builder.pair(visit(key), token(node.operator_loc), visit(node.value))
|
141
166
|
elsif key.is_a?(SymbolNode) && key.opening_loc.nil?
|
@@ -181,14 +206,21 @@ module Prism
|
|
181
206
|
if (rescue_clause = node.rescue_clause)
|
182
207
|
begin
|
183
208
|
find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
|
184
|
-
find_end_offset = (
|
209
|
+
find_end_offset = (
|
210
|
+
rescue_clause.statements&.location&.start_offset ||
|
211
|
+
rescue_clause.subsequent&.location&.start_offset ||
|
212
|
+
node.else_clause&.location&.start_offset ||
|
213
|
+
node.ensure_clause&.location&.start_offset ||
|
214
|
+
node.end_keyword_loc&.start_offset ||
|
215
|
+
find_start_offset + 1
|
216
|
+
)
|
185
217
|
|
186
218
|
rescue_bodies << builder.rescue_body(
|
187
219
|
token(rescue_clause.keyword_loc),
|
188
220
|
rescue_clause.exceptions.any? ? builder.array(nil, visit_all(rescue_clause.exceptions), nil) : nil,
|
189
221
|
token(rescue_clause.operator_loc),
|
190
222
|
visit(rescue_clause.reference),
|
191
|
-
srange_find(find_start_offset, find_end_offset,
|
223
|
+
srange_find(find_start_offset, find_end_offset, ";"),
|
192
224
|
visit(rescue_clause.statements)
|
193
225
|
)
|
194
226
|
end until (rescue_clause = rescue_clause.subsequent).nil?
|
@@ -294,7 +326,7 @@ module Prism
|
|
294
326
|
visit_all(arguments),
|
295
327
|
token(node.closing_loc),
|
296
328
|
),
|
297
|
-
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset,
|
329
|
+
srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, "="),
|
298
330
|
visit(node.arguments.arguments.last)
|
299
331
|
),
|
300
332
|
block
|
@@ -311,7 +343,7 @@ module Prism
|
|
311
343
|
if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
|
312
344
|
builder.assign(
|
313
345
|
builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
|
314
|
-
srange_find(message_loc.end_offset, node.arguments.location.start_offset,
|
346
|
+
srange_find(message_loc.end_offset, node.arguments.location.start_offset, "="),
|
315
347
|
visit(node.arguments.arguments.last)
|
316
348
|
)
|
317
349
|
else
|
@@ -733,10 +765,10 @@ module Prism
|
|
733
765
|
visit(node.index),
|
734
766
|
token(node.in_keyword_loc),
|
735
767
|
visit(node.collection),
|
736
|
-
if node.do_keyword_loc
|
737
|
-
token(
|
768
|
+
if (do_keyword_loc = node.do_keyword_loc)
|
769
|
+
token(do_keyword_loc)
|
738
770
|
else
|
739
|
-
srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset,
|
771
|
+
srange_find(node.collection.location.end_offset, (node.statements&.location || node.end_keyword_loc).start_offset, ";")
|
740
772
|
end,
|
741
773
|
visit(node.statements),
|
742
774
|
token(node.end_keyword_loc)
|
@@ -865,10 +897,10 @@ module Prism
|
|
865
897
|
builder.condition(
|
866
898
|
token(node.if_keyword_loc),
|
867
899
|
visit(node.predicate),
|
868
|
-
if node.then_keyword_loc
|
869
|
-
token(
|
900
|
+
if (then_keyword_loc = node.then_keyword_loc)
|
901
|
+
token(then_keyword_loc)
|
870
902
|
else
|
871
|
-
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset,
|
903
|
+
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.subsequent&.location || node.end_keyword_loc).start_offset, ";")
|
872
904
|
end,
|
873
905
|
visit(node.statements),
|
874
906
|
case node.subsequent
|
@@ -931,7 +963,11 @@ module Prism
|
|
931
963
|
token(node.in_loc),
|
932
964
|
pattern,
|
933
965
|
guard,
|
934
|
-
|
966
|
+
if (then_loc = node.then_loc)
|
967
|
+
token(then_loc)
|
968
|
+
else
|
969
|
+
srange_find(node.pattern.location.end_offset, node.statements&.location&.start_offset, ";")
|
970
|
+
end,
|
935
971
|
visit(node.statements)
|
936
972
|
)
|
937
973
|
end
|
@@ -1064,7 +1100,7 @@ module Prism
|
|
1064
1100
|
def visit_interpolated_regular_expression_node(node)
|
1065
1101
|
builder.regexp_compose(
|
1066
1102
|
token(node.opening_loc),
|
1067
|
-
|
1103
|
+
string_nodes_from_interpolation(node, node.opening),
|
1068
1104
|
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
|
1069
1105
|
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
|
1070
1106
|
)
|
@@ -1081,29 +1117,9 @@ module Prism
|
|
1081
1117
|
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
|
1082
1118
|
end
|
1083
1119
|
|
1084
|
-
parts = if node.parts.one? { |part| part.type == :string_node }
|
1085
|
-
node.parts.flat_map do |node|
|
1086
|
-
if node.type == :string_node && node.unescaped.lines.count >= 2
|
1087
|
-
start_offset = node.content_loc.start_offset
|
1088
|
-
|
1089
|
-
node.unescaped.lines.map do |line|
|
1090
|
-
end_offset = start_offset + line.length
|
1091
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1092
|
-
start_offset = end_offset
|
1093
|
-
|
1094
|
-
builder.string_internal([line, offsets])
|
1095
|
-
end
|
1096
|
-
else
|
1097
|
-
visit(node)
|
1098
|
-
end
|
1099
|
-
end
|
1100
|
-
else
|
1101
|
-
visit_all(node.parts)
|
1102
|
-
end
|
1103
|
-
|
1104
1120
|
builder.string_compose(
|
1105
1121
|
token(node.opening_loc),
|
1106
|
-
|
1122
|
+
string_nodes_from_interpolation(node, node.opening),
|
1107
1123
|
token(node.closing_loc)
|
1108
1124
|
)
|
1109
1125
|
end
|
@@ -1113,7 +1129,7 @@ module Prism
|
|
1113
1129
|
def visit_interpolated_symbol_node(node)
|
1114
1130
|
builder.symbol_compose(
|
1115
1131
|
token(node.opening_loc),
|
1116
|
-
|
1132
|
+
string_nodes_from_interpolation(node, node.opening),
|
1117
1133
|
token(node.closing_loc)
|
1118
1134
|
)
|
1119
1135
|
end
|
@@ -1122,14 +1138,14 @@ module Prism
|
|
1122
1138
|
# ^^^^^^^^^^^^
|
1123
1139
|
def visit_interpolated_x_string_node(node)
|
1124
1140
|
if node.heredoc?
|
1125
|
-
visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1126
|
-
else
|
1127
|
-
builder.xstring_compose(
|
1128
|
-
token(node.opening_loc),
|
1129
|
-
visit_all(node.parts),
|
1130
|
-
token(node.closing_loc)
|
1131
|
-
)
|
1141
|
+
return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1132
1142
|
end
|
1143
|
+
|
1144
|
+
builder.xstring_compose(
|
1145
|
+
token(node.opening_loc),
|
1146
|
+
string_nodes_from_interpolation(node, node.opening),
|
1147
|
+
token(node.closing_loc)
|
1148
|
+
)
|
1133
1149
|
end
|
1134
1150
|
|
1135
1151
|
# -> { it }
|
@@ -1141,7 +1157,17 @@ module Prism
|
|
1141
1157
|
# -> { it }
|
1142
1158
|
# ^^^^^^^^^
|
1143
1159
|
def visit_it_parameters_node(node)
|
1144
|
-
builder
|
1160
|
+
# FIXME: The builder _should_ always be a subclass of the prism builder.
|
1161
|
+
# Currently RuboCop passes in its own builder that always inherits from the
|
1162
|
+
# parser builder (which is lacking the `itarg` method). Once rubocop-ast
|
1163
|
+
# opts in to use the custom prism builder a warning can be emitted when
|
1164
|
+
# it is not the expected class, and eventually raise.
|
1165
|
+
# https://github.com/rubocop/rubocop-ast/pull/354
|
1166
|
+
if builder.is_a?(Translation::Parser::Builder)
|
1167
|
+
builder.itarg
|
1168
|
+
else
|
1169
|
+
builder.args(nil, [], nil, false)
|
1170
|
+
end
|
1145
1171
|
end
|
1146
1172
|
|
1147
1173
|
# foo(bar: baz)
|
@@ -1183,7 +1209,7 @@ module Prism
|
|
1183
1209
|
false
|
1184
1210
|
)
|
1185
1211
|
end,
|
1186
|
-
node.body
|
1212
|
+
visit(node.body),
|
1187
1213
|
[node.closing, srange(node.closing_loc)]
|
1188
1214
|
)
|
1189
1215
|
end
|
@@ -1307,7 +1333,7 @@ module Prism
|
|
1307
1333
|
def visit_multi_write_node(node)
|
1308
1334
|
elements = multi_target_elements(node)
|
1309
1335
|
|
1310
|
-
if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
|
1336
|
+
if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
|
1311
1337
|
elements = multi_target_elements(elements.first)
|
1312
1338
|
end
|
1313
1339
|
|
@@ -1507,15 +1533,13 @@ module Prism
|
|
1507
1533
|
# /foo/
|
1508
1534
|
# ^^^^^
|
1509
1535
|
def visit_regular_expression_node(node)
|
1510
|
-
content = node.content
|
1511
1536
|
parts =
|
1512
|
-
if content
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
end
|
1537
|
+
if node.content == ""
|
1538
|
+
[]
|
1539
|
+
elsif node.content.include?("\n")
|
1540
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1517
1541
|
else
|
1518
|
-
[builder.string_internal(
|
1542
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1519
1543
|
end
|
1520
1544
|
|
1521
1545
|
builder.regexp_compose(
|
@@ -1672,28 +1696,11 @@ module Prism
|
|
1672
1696
|
elsif node.opening&.start_with?("%") && node.unescaped.empty?
|
1673
1697
|
builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
|
1674
1698
|
else
|
1675
|
-
content_lines = node.content.lines
|
1676
|
-
unescaped_lines = node.unescaped.lines
|
1677
|
-
|
1678
1699
|
parts =
|
1679
|
-
if
|
1680
|
-
|
1681
|
-
elsif content_lines.length != unescaped_lines.length
|
1682
|
-
# This occurs when we have line continuations in the string. We
|
1683
|
-
# need to come back and fix this, but for now this stops the
|
1684
|
-
# code from breaking when we encounter it because of trying to
|
1685
|
-
# transpose arrays of different lengths.
|
1686
|
-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1700
|
+
if node.content.include?("\n")
|
1701
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1687
1702
|
else
|
1688
|
-
|
1689
|
-
|
1690
|
-
[content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
|
1691
|
-
end_offset = start_offset + content_line.length
|
1692
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1693
|
-
start_offset = end_offset
|
1694
|
-
|
1695
|
-
builder.string_internal([unescaped_line, offsets])
|
1696
|
-
end
|
1703
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1697
1704
|
end
|
1698
1705
|
|
1699
1706
|
builder.string_compose(
|
@@ -1737,19 +1744,14 @@ module Prism
|
|
1737
1744
|
builder.symbol([node.unescaped, srange(node.location)])
|
1738
1745
|
end
|
1739
1746
|
else
|
1740
|
-
parts =
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1748
|
-
start_offset = end_offset
|
1749
|
-
|
1750
|
-
builder.string_internal([line, offsets])
|
1747
|
+
parts =
|
1748
|
+
if node.value == ""
|
1749
|
+
[]
|
1750
|
+
elsif node.value.include?("\n")
|
1751
|
+
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
|
1752
|
+
else
|
1753
|
+
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
|
1751
1754
|
end
|
1752
|
-
end
|
1753
1755
|
|
1754
1756
|
builder.symbol_compose(
|
1755
1757
|
token(node.opening_loc),
|
@@ -1781,10 +1783,10 @@ module Prism
|
|
1781
1783
|
builder.condition(
|
1782
1784
|
token(node.keyword_loc),
|
1783
1785
|
visit(node.predicate),
|
1784
|
-
if node.then_keyword_loc
|
1785
|
-
token(
|
1786
|
+
if (then_keyword_loc = node.then_keyword_loc)
|
1787
|
+
token(then_keyword_loc)
|
1786
1788
|
else
|
1787
|
-
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset,
|
1789
|
+
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.else_clause&.location || node.end_keyword_loc).start_offset, ";")
|
1788
1790
|
end,
|
1789
1791
|
visit(node.else_clause),
|
1790
1792
|
token(node.else_clause&.else_keyword_loc),
|
@@ -1812,7 +1814,11 @@ module Prism
|
|
1812
1814
|
:until,
|
1813
1815
|
token(node.keyword_loc),
|
1814
1816
|
visit(node.predicate),
|
1815
|
-
|
1817
|
+
if (do_keyword_loc = node.do_keyword_loc)
|
1818
|
+
token(do_keyword_loc)
|
1819
|
+
else
|
1820
|
+
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
|
1821
|
+
end,
|
1816
1822
|
visit(node.statements),
|
1817
1823
|
token(node.closing_loc)
|
1818
1824
|
)
|
@@ -1832,10 +1838,10 @@ module Prism
|
|
1832
1838
|
builder.when(
|
1833
1839
|
token(node.keyword_loc),
|
1834
1840
|
visit_all(node.conditions),
|
1835
|
-
if node.then_keyword_loc
|
1836
|
-
token(
|
1841
|
+
if (then_keyword_loc = node.then_keyword_loc)
|
1842
|
+
token(then_keyword_loc)
|
1837
1843
|
else
|
1838
|
-
srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset,
|
1844
|
+
srange_find(node.conditions.last.location.end_offset, node.statements&.location&.start_offset, ";")
|
1839
1845
|
end,
|
1840
1846
|
visit(node.statements)
|
1841
1847
|
)
|
@@ -1852,7 +1858,11 @@ module Prism
|
|
1852
1858
|
:while,
|
1853
1859
|
token(node.keyword_loc),
|
1854
1860
|
visit(node.predicate),
|
1855
|
-
|
1861
|
+
if (do_keyword_loc = node.do_keyword_loc)
|
1862
|
+
token(do_keyword_loc)
|
1863
|
+
else
|
1864
|
+
srange_find(node.predicate.location.end_offset, (node.statements&.location || node.closing_loc).start_offset, ";")
|
1865
|
+
end,
|
1856
1866
|
visit(node.statements),
|
1857
1867
|
token(node.closing_loc)
|
1858
1868
|
)
|
@@ -1870,28 +1880,23 @@ module Prism
|
|
1870
1880
|
# ^^^^^
|
1871
1881
|
def visit_x_string_node(node)
|
1872
1882
|
if node.heredoc?
|
1873
|
-
visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1874
|
-
|
1875
|
-
parts = if node.unescaped.lines.one?
|
1876
|
-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1877
|
-
else
|
1878
|
-
start_offset = node.content_loc.start_offset
|
1879
|
-
|
1880
|
-
node.unescaped.lines.map do |line|
|
1881
|
-
end_offset = start_offset + line.length
|
1882
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1883
|
-
start_offset = end_offset
|
1883
|
+
return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1884
|
+
end
|
1884
1885
|
|
1885
|
-
|
1886
|
-
|
1886
|
+
parts =
|
1887
|
+
if node.content == ""
|
1888
|
+
[]
|
1889
|
+
elsif node.content.include?("\n")
|
1890
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1891
|
+
else
|
1892
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1887
1893
|
end
|
1888
1894
|
|
1889
|
-
|
1890
|
-
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
end
|
1895
|
+
builder.xstring_compose(
|
1896
|
+
token(node.opening_loc),
|
1897
|
+
parts,
|
1898
|
+
token(node.closing_loc)
|
1899
|
+
)
|
1895
1900
|
end
|
1896
1901
|
|
1897
1902
|
# yield
|
@@ -1985,18 +1990,16 @@ module Prism
|
|
1985
1990
|
Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])
|
1986
1991
|
end
|
1987
1992
|
|
1988
|
-
# Constructs a new source range by finding the given
|
1989
|
-
# given start offset and end offset. If the needle is not found, it
|
1993
|
+
# Constructs a new source range by finding the given character between
|
1994
|
+
# the given start offset and end offset. If the needle is not found, it
|
1990
1995
|
# returns nil. Importantly it does not search past newlines or comments.
|
1991
1996
|
#
|
1992
1997
|
# Note that end_offset is allowed to be nil, in which case this will
|
1993
1998
|
# search until the end of the string.
|
1994
|
-
def srange_find(start_offset, end_offset,
|
1995
|
-
if (match = source_buffer.source.byteslice(start_offset...end_offset)
|
1996
|
-
|
1997
|
-
|
1998
|
-
|
1999
|
-
[token, Range.new(source_buffer, offset_cache[token_offset], offset_cache[token_offset + token.bytesize])]
|
1999
|
+
def srange_find(start_offset, end_offset, character)
|
2000
|
+
if (match = source_buffer.source.byteslice(start_offset...end_offset)[/\A\s*#{character}/])
|
2001
|
+
final_offset = start_offset + match.bytesize
|
2002
|
+
[character, Range.new(source_buffer, offset_cache[final_offset - character.bytesize], offset_cache[final_offset])]
|
2000
2003
|
end
|
2001
2004
|
end
|
2002
2005
|
|
@@ -2032,7 +2035,7 @@ module Prism
|
|
2032
2035
|
false
|
2033
2036
|
)
|
2034
2037
|
end,
|
2035
|
-
block.body
|
2038
|
+
visit(block.body),
|
2036
2039
|
token(block.closing_loc)
|
2037
2040
|
)
|
2038
2041
|
else
|
@@ -2040,13 +2043,6 @@ module Prism
|
|
2040
2043
|
end
|
2041
2044
|
end
|
2042
2045
|
|
2043
|
-
# The parser gem automatically converts \r\n to \n, meaning our offsets
|
2044
|
-
# need to be adjusted to always subtract 1 from the length.
|
2045
|
-
def chomped_bytesize(line)
|
2046
|
-
chomped = line.chomp
|
2047
|
-
chomped.bytesize + (chomped == line ? 0 : 1)
|
2048
|
-
end
|
2049
|
-
|
2050
2046
|
# Visit a heredoc that can be either a string or an xstring.
|
2051
2047
|
def visit_heredoc(node)
|
2052
2048
|
children = Array.new
|
@@ -2063,34 +2059,8 @@ module Prism
|
|
2063
2059
|
|
2064
2060
|
node.parts.each do |part|
|
2065
2061
|
pushing =
|
2066
|
-
if part.is_a?(StringNode) && part.
|
2067
|
-
unescaped
|
2068
|
-
escaped = part.content.lines
|
2069
|
-
|
2070
|
-
escaped_lengths = []
|
2071
|
-
normalized_lengths = []
|
2072
|
-
|
2073
|
-
if node.opening.end_with?("'")
|
2074
|
-
escaped.each do |line|
|
2075
|
-
escaped_lengths << line.bytesize
|
2076
|
-
normalized_lengths << chomped_bytesize(line)
|
2077
|
-
end
|
2078
|
-
else
|
2079
|
-
escaped
|
2080
|
-
.chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
|
2081
|
-
.each do |lines|
|
2082
|
-
escaped_lengths << lines.sum(&:bytesize)
|
2083
|
-
normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
|
2084
|
-
end
|
2085
|
-
end
|
2086
|
-
|
2087
|
-
start_offset = part.location.start_offset
|
2088
|
-
|
2089
|
-
unescaped.map.with_index do |unescaped_line, index|
|
2090
|
-
inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
|
2091
|
-
start_offset += escaped_lengths.fetch(index, 0)
|
2092
|
-
inner_part
|
2093
|
-
end
|
2062
|
+
if part.is_a?(StringNode) && part.content.include?("\n")
|
2063
|
+
string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
|
2094
2064
|
else
|
2095
2065
|
[visit(part)]
|
2096
2066
|
end
|
@@ -2104,7 +2074,7 @@ module Prism
|
|
2104
2074
|
location = appendee.loc
|
2105
2075
|
location = location.with_expression(location.expression.join(child.loc.expression))
|
2106
2076
|
|
2107
|
-
children[-1] = appendee.updated(:str, [appendee.children.first
|
2077
|
+
children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
|
2108
2078
|
else
|
2109
2079
|
children << child
|
2110
2080
|
end
|
@@ -2140,6 +2110,102 @@ module Prism
|
|
2140
2110
|
parser.pattern_variables.pop
|
2141
2111
|
end
|
2142
2112
|
end
|
2113
|
+
|
2114
|
+
# When the content of a string node is split across multiple lines, the
|
2115
|
+
# parser gem creates individual string nodes for each line the content is part of.
|
2116
|
+
def string_nodes_from_interpolation(node, opening)
|
2117
|
+
node.parts.flat_map do |part|
|
2118
|
+
if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
|
2119
|
+
string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
|
2120
|
+
else
|
2121
|
+
visit(part)
|
2122
|
+
end
|
2123
|
+
end
|
2124
|
+
end
|
2125
|
+
|
2126
|
+
# Create parser string nodes from a single prism node. The parser gem
|
2127
|
+
# "glues" strings together when a line continuation is encountered.
|
2128
|
+
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
|
2129
|
+
unescaped = unescaped.lines
|
2130
|
+
escaped = escaped.lines
|
2131
|
+
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
|
2132
|
+
regex = opening == "/" || opening&.start_with?("%r")
|
2133
|
+
|
2134
|
+
# Non-interpolating strings
|
2135
|
+
if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
|
2136
|
+
current_length = 0
|
2137
|
+
current_line = +""
|
2138
|
+
|
2139
|
+
escaped.filter_map.with_index do |escaped_line, index|
|
2140
|
+
unescaped_line = unescaped.fetch(index, "")
|
2141
|
+
current_length += escaped_line.bytesize
|
2142
|
+
current_line << unescaped_line
|
2143
|
+
|
2144
|
+
# Glue line continuations together. Only %w and %i arrays can contain these.
|
2145
|
+
if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
|
2146
|
+
next unless index == escaped.count - 1
|
2147
|
+
end
|
2148
|
+
s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
|
2149
|
+
start_offset += escaped_line.bytesize
|
2150
|
+
current_line = +""
|
2151
|
+
current_length = 0
|
2152
|
+
s
|
2153
|
+
end
|
2154
|
+
else
|
2155
|
+
escaped_lengths = []
|
2156
|
+
normalized_lengths = []
|
2157
|
+
# Keeps track of where an unescaped line should start a new token. An unescaped
|
2158
|
+
# \n would otherwise be indistinguishable from the actual newline at the end of
|
2159
|
+
# of the line. The parser gem only emits a new string node at "real" newlines,
|
2160
|
+
# line continuations don't start a new node as well.
|
2161
|
+
do_next_tokens = []
|
2162
|
+
|
2163
|
+
escaped
|
2164
|
+
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
|
2165
|
+
.each do |lines|
|
2166
|
+
escaped_lengths << lines.sum(&:bytesize)
|
2167
|
+
|
2168
|
+
unescaped_lines_count =
|
2169
|
+
if regex
|
2170
|
+
0 # Will always be preserved as is
|
2171
|
+
else
|
2172
|
+
lines.sum do |line|
|
2173
|
+
count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
|
2174
|
+
count -= 1 if !line.end_with?("\n") && count > 0
|
2175
|
+
count
|
2176
|
+
end
|
2177
|
+
end
|
2178
|
+
|
2179
|
+
extra = 1
|
2180
|
+
extra = lines.count if percent_array # Account for line continuations in percent arrays
|
2181
|
+
|
2182
|
+
normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
|
2183
|
+
normalized_lengths[-1] = lines.sum { |line| line.bytesize }
|
2184
|
+
do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
|
2185
|
+
do_next_tokens[-1] = true
|
2186
|
+
end
|
2187
|
+
|
2188
|
+
current_line = +""
|
2189
|
+
current_normalized_length = 0
|
2190
|
+
|
2191
|
+
emitted_count = 0
|
2192
|
+
unescaped.filter_map.with_index do |unescaped_line, index|
|
2193
|
+
current_line << unescaped_line
|
2194
|
+
current_normalized_length += normalized_lengths.fetch(index, 0)
|
2195
|
+
|
2196
|
+
if do_next_tokens[index]
|
2197
|
+
inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
|
2198
|
+
start_offset += escaped_lengths.fetch(emitted_count, 0)
|
2199
|
+
current_line = +""
|
2200
|
+
current_normalized_length = 0
|
2201
|
+
emitted_count += 1
|
2202
|
+
inner_part
|
2203
|
+
else
|
2204
|
+
nil
|
2205
|
+
end
|
2206
|
+
end
|
2207
|
+
end
|
2208
|
+
end
|
2143
2209
|
end
|
2144
2210
|
end
|
2145
2211
|
end
|