prism 1.3.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +46 -1
- data/Makefile +2 -1
- data/README.md +1 -0
- data/config.yml +273 -37
- data/docs/parser_translation.md +8 -23
- data/docs/releasing.md +1 -1
- data/docs/ripper_translation.md +1 -1
- data/docs/ruby_api.md +1 -1
- data/ext/prism/api_node.c +1816 -1303
- data/ext/prism/extension.c +244 -110
- data/ext/prism/extension.h +4 -4
- data/include/prism/ast.h +291 -49
- data/include/prism/defines.h +4 -1
- data/include/prism/diagnostic.h +4 -0
- data/include/prism/options.h +89 -3
- data/include/prism/regexp.h +2 -2
- data/include/prism/util/pm_buffer.h +18 -0
- data/include/prism/util/pm_integer.h +4 -0
- data/include/prism/util/pm_list.h +6 -0
- data/include/prism/util/pm_string.h +12 -2
- data/include/prism/version.h +2 -2
- data/include/prism.h +41 -16
- data/lib/prism/compiler.rb +456 -151
- data/lib/prism/desugar_compiler.rb +1 -0
- data/lib/prism/dispatcher.rb +16 -0
- data/lib/prism/dot_visitor.rb +21 -1
- data/lib/prism/dsl.rb +13 -2
- data/lib/prism/ffi.rb +62 -34
- data/lib/prism/inspect_visitor.rb +5 -1
- data/lib/prism/lex_compat.rb +1 -0
- data/lib/prism/mutation_compiler.rb +3 -0
- data/lib/prism/node.rb +554 -345
- data/lib/prism/node_ext.rb +4 -1
- data/lib/prism/pack.rb +2 -0
- data/lib/prism/parse_result/comments.rb +1 -0
- data/lib/prism/parse_result/errors.rb +1 -0
- data/lib/prism/parse_result/newlines.rb +2 -1
- data/lib/prism/parse_result.rb +53 -0
- data/lib/prism/pattern.rb +1 -0
- data/lib/prism/polyfill/append_as_bytes.rb +15 -0
- data/lib/prism/polyfill/scan_byte.rb +14 -0
- data/lib/prism/polyfill/warn.rb +42 -0
- data/lib/prism/reflection.rb +5 -2
- data/lib/prism/relocation.rb +1 -0
- data/lib/prism/serialize.rb +1275 -783
- data/lib/prism/string_query.rb +1 -0
- data/lib/prism/translation/parser/builder.rb +62 -0
- data/lib/prism/translation/parser/compiler.rb +230 -152
- data/lib/prism/translation/parser/lexer.rb +446 -64
- data/lib/prism/translation/parser.rb +64 -4
- data/lib/prism/translation/parser33.rb +1 -0
- data/lib/prism/translation/parser34.rb +1 -0
- data/lib/prism/translation/parser35.rb +13 -0
- data/lib/prism/translation/parser_current.rb +24 -0
- data/lib/prism/translation/ripper/sexp.rb +1 -0
- data/lib/prism/translation/ripper.rb +30 -4
- data/lib/prism/translation/ruby_parser.rb +291 -7
- data/lib/prism/translation.rb +3 -0
- data/lib/prism/visitor.rb +457 -152
- data/lib/prism.rb +5 -3
- data/prism.gemspec +9 -1
- data/rbi/prism/dsl.rbi +9 -6
- data/rbi/prism/node.rbi +43 -16
- data/rbi/prism/parse_result.rbi +17 -0
- data/rbi/prism/translation/parser35.rbi +6 -0
- data/rbi/prism.rbi +39 -36
- data/sig/prism/dispatcher.rbs +3 -0
- data/sig/prism/dsl.rbs +7 -5
- data/sig/prism/node.rbs +461 -37
- data/sig/prism/node_ext.rbs +84 -17
- data/sig/prism/parse_result/comments.rbs +38 -0
- data/sig/prism/parse_result.rbs +14 -0
- data/sig/prism/reflection.rbs +1 -1
- data/sig/prism/serialize.rbs +4 -2
- data/sig/prism.rbs +22 -1
- data/src/diagnostic.c +9 -3
- data/src/node.c +23 -0
- data/src/options.c +33 -2
- data/src/prettyprint.c +32 -0
- data/src/prism.c +620 -242
- data/src/serialize.c +8 -0
- data/src/token_type.c +36 -34
- data/src/util/pm_buffer.c +40 -0
- data/src/util/pm_constant_pool.c +6 -2
- data/src/util/pm_strncasecmp.c +13 -1
- metadata +11 -7
data/lib/prism/string_query.rb
CHANGED
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# :markup: markdown
|
3
|
+
|
4
|
+
module Prism
|
5
|
+
module Translation
|
6
|
+
class Parser
|
7
|
+
# A builder that knows how to convert more modern Ruby syntax
|
8
|
+
# into whitequark/parser gem's syntax tree.
|
9
|
+
class Builder < ::Parser::Builders::Default
|
10
|
+
# It represents the `it` block argument, which is not yet implemented in the Parser gem.
|
11
|
+
def itarg
|
12
|
+
n(:itarg, [:it], nil)
|
13
|
+
end
|
14
|
+
|
15
|
+
# The following three lines have been added to support the `it` block parameter syntax in the source code below.
|
16
|
+
#
|
17
|
+
# if args.type == :itarg
|
18
|
+
# block_type = :itblock
|
19
|
+
# args = :it
|
20
|
+
#
|
21
|
+
# https://github.com/whitequark/parser/blob/v3.3.7.1/lib/parser/builders/default.rb#L1122-L1155
|
22
|
+
def block(method_call, begin_t, args, body, end_t)
|
23
|
+
_receiver, _selector, *call_args = *method_call
|
24
|
+
|
25
|
+
if method_call.type == :yield
|
26
|
+
diagnostic :error, :block_given_to_yield, nil, method_call.loc.keyword, [loc(begin_t)]
|
27
|
+
end
|
28
|
+
|
29
|
+
last_arg = call_args.last
|
30
|
+
if last_arg && (last_arg.type == :block_pass || last_arg.type == :forwarded_args)
|
31
|
+
diagnostic :error, :block_and_blockarg, nil, last_arg.loc.expression, [loc(begin_t)]
|
32
|
+
end
|
33
|
+
|
34
|
+
if args.type == :itarg
|
35
|
+
block_type = :itblock
|
36
|
+
args = :it
|
37
|
+
elsif args.type == :numargs
|
38
|
+
block_type = :numblock
|
39
|
+
args = args.children[0]
|
40
|
+
else
|
41
|
+
block_type = :block
|
42
|
+
end
|
43
|
+
|
44
|
+
if [:send, :csend, :index, :super, :zsuper, :lambda].include?(method_call.type)
|
45
|
+
n(block_type, [ method_call, args, body ],
|
46
|
+
block_map(method_call.loc.expression, begin_t, end_t))
|
47
|
+
else
|
48
|
+
# Code like "return foo 1 do end" is reduced in a weird sequence.
|
49
|
+
# Here, method_call is actually (return).
|
50
|
+
actual_send, = *method_call
|
51
|
+
block =
|
52
|
+
n(block_type, [ actual_send, args, body ],
|
53
|
+
block_map(actual_send.loc.expression, begin_t, end_t))
|
54
|
+
|
55
|
+
n(method_call.type, [ block ],
|
56
|
+
method_call.loc.with_expression(join_exprs(method_call, block)))
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
# :markup: markdown
|
2
3
|
|
3
4
|
module Prism
|
4
5
|
module Translation
|
@@ -74,7 +75,29 @@ module Prism
|
|
74
75
|
# []
|
75
76
|
# ^^
|
76
77
|
def visit_array_node(node)
|
77
|
-
|
78
|
+
if node.opening&.start_with?("%w", "%W", "%i", "%I")
|
79
|
+
elements = node.elements.flat_map do |element|
|
80
|
+
if element.is_a?(StringNode)
|
81
|
+
if element.content.include?("\n")
|
82
|
+
string_nodes_from_line_continuations(element.unescaped, element.content, element.content_loc.start_offset, node.opening)
|
83
|
+
else
|
84
|
+
[builder.string_internal([element.unescaped, srange(element.content_loc)])]
|
85
|
+
end
|
86
|
+
elsif element.is_a?(InterpolatedStringNode)
|
87
|
+
builder.string_compose(
|
88
|
+
token(element.opening_loc),
|
89
|
+
string_nodes_from_interpolation(element, node.opening),
|
90
|
+
token(element.closing_loc)
|
91
|
+
)
|
92
|
+
else
|
93
|
+
[visit(element)]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
else
|
97
|
+
elements = visit_all(node.elements)
|
98
|
+
end
|
99
|
+
|
100
|
+
builder.array(token(node.opening_loc), elements, token(node.closing_loc))
|
78
101
|
end
|
79
102
|
|
80
103
|
# foo => [bar]
|
@@ -111,8 +134,8 @@ module Prism
|
|
111
134
|
def visit_assoc_node(node)
|
112
135
|
key = node.key
|
113
136
|
|
114
|
-
if
|
115
|
-
if
|
137
|
+
if node.value.is_a?(ImplicitNode)
|
138
|
+
if in_pattern
|
116
139
|
if key.is_a?(SymbolNode)
|
117
140
|
if key.opening.nil?
|
118
141
|
builder.match_hash_var([key.unescaped, srange(key.location)])
|
@@ -122,19 +145,18 @@ module Prism
|
|
122
145
|
else
|
123
146
|
builder.match_hash_var_from_str(token(key.opening_loc), visit_all(key.parts), token(key.closing_loc))
|
124
147
|
end
|
125
|
-
elsif key.opening.nil?
|
126
|
-
builder.pair_keyword([key.unescaped, srange(key.location)], visit(node.value))
|
127
148
|
else
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
[
|
149
|
+
value = node.value.value
|
150
|
+
|
151
|
+
implicit_value = if value.is_a?(CallNode)
|
152
|
+
builder.call_method(nil, nil, [value.name, srange(value.message_loc)])
|
153
|
+
elsif value.is_a?(ConstantReadNode)
|
154
|
+
builder.const([value.name, srange(key.value_loc)])
|
155
|
+
else
|
134
156
|
builder.ident([value.name, srange(key.value_loc)]).updated(:lvar)
|
135
|
-
|
136
|
-
|
137
|
-
builder.
|
157
|
+
end
|
158
|
+
|
159
|
+
builder.pair_keyword([key.unescaped, srange(key)], implicit_value)
|
138
160
|
end
|
139
161
|
elsif node.operator_loc
|
140
162
|
builder.pair(visit(key), token(node.operator_loc), visit(node.value))
|
@@ -181,7 +203,14 @@ module Prism
|
|
181
203
|
if (rescue_clause = node.rescue_clause)
|
182
204
|
begin
|
183
205
|
find_start_offset = (rescue_clause.reference&.location || rescue_clause.exceptions.last&.location || rescue_clause.keyword_loc).end_offset
|
184
|
-
find_end_offset = (
|
206
|
+
find_end_offset = (
|
207
|
+
rescue_clause.statements&.location&.start_offset ||
|
208
|
+
rescue_clause.subsequent&.location&.start_offset ||
|
209
|
+
node.else_clause&.location&.start_offset ||
|
210
|
+
node.ensure_clause&.location&.start_offset ||
|
211
|
+
node.end_keyword_loc&.start_offset ||
|
212
|
+
find_start_offset + 1
|
213
|
+
)
|
185
214
|
|
186
215
|
rescue_bodies << builder.rescue_body(
|
187
216
|
token(rescue_clause.keyword_loc),
|
@@ -664,13 +693,37 @@ module Prism
|
|
664
693
|
# defined?(a)
|
665
694
|
# ^^^^^^^^^^^
|
666
695
|
def visit_defined_node(node)
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
696
|
+
# Very weird circumstances here where something like:
|
697
|
+
#
|
698
|
+
# defined?
|
699
|
+
# (1)
|
700
|
+
#
|
701
|
+
# gets parsed in Ruby as having only the `1` expression but in parser
|
702
|
+
# it gets parsed as having a begin. In this case we need to synthesize
|
703
|
+
# that begin to match parser's behavior.
|
704
|
+
if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
|
705
|
+
builder.keyword_cmd(
|
706
|
+
:defined?,
|
707
|
+
token(node.keyword_loc),
|
708
|
+
nil,
|
709
|
+
[
|
710
|
+
builder.begin(
|
711
|
+
token(node.lparen_loc),
|
712
|
+
visit(node.value),
|
713
|
+
token(node.rparen_loc)
|
714
|
+
)
|
715
|
+
],
|
716
|
+
nil
|
717
|
+
)
|
718
|
+
else
|
719
|
+
builder.keyword_cmd(
|
720
|
+
:defined?,
|
721
|
+
token(node.keyword_loc),
|
722
|
+
token(node.lparen_loc),
|
723
|
+
[visit(node.value)],
|
724
|
+
token(node.rparen_loc)
|
725
|
+
)
|
726
|
+
end
|
674
727
|
end
|
675
728
|
|
676
729
|
# if foo then bar else baz end
|
@@ -1000,7 +1053,7 @@ module Prism
|
|
1000
1053
|
builder.index_asgn(
|
1001
1054
|
visit(node.receiver),
|
1002
1055
|
token(node.opening_loc),
|
1003
|
-
visit_all(node.arguments
|
1056
|
+
visit_all(node.arguments&.arguments || []),
|
1004
1057
|
token(node.closing_loc),
|
1005
1058
|
)
|
1006
1059
|
end
|
@@ -1068,7 +1121,7 @@ module Prism
|
|
1068
1121
|
def visit_interpolated_regular_expression_node(node)
|
1069
1122
|
builder.regexp_compose(
|
1070
1123
|
token(node.opening_loc),
|
1071
|
-
|
1124
|
+
string_nodes_from_interpolation(node, node.opening),
|
1072
1125
|
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
|
1073
1126
|
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
|
1074
1127
|
)
|
@@ -1085,29 +1138,9 @@ module Prism
|
|
1085
1138
|
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
|
1086
1139
|
end
|
1087
1140
|
|
1088
|
-
parts = if node.parts.one? { |part| part.type == :string_node }
|
1089
|
-
node.parts.flat_map do |node|
|
1090
|
-
if node.type == :string_node && node.unescaped.lines.count >= 2
|
1091
|
-
start_offset = node.content_loc.start_offset
|
1092
|
-
|
1093
|
-
node.unescaped.lines.map do |line|
|
1094
|
-
end_offset = start_offset + line.length
|
1095
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1096
|
-
start_offset = end_offset
|
1097
|
-
|
1098
|
-
builder.string_internal([line, offsets])
|
1099
|
-
end
|
1100
|
-
else
|
1101
|
-
visit(node)
|
1102
|
-
end
|
1103
|
-
end
|
1104
|
-
else
|
1105
|
-
visit_all(node.parts)
|
1106
|
-
end
|
1107
|
-
|
1108
1141
|
builder.string_compose(
|
1109
1142
|
token(node.opening_loc),
|
1110
|
-
|
1143
|
+
string_nodes_from_interpolation(node, node.opening),
|
1111
1144
|
token(node.closing_loc)
|
1112
1145
|
)
|
1113
1146
|
end
|
@@ -1117,7 +1150,7 @@ module Prism
|
|
1117
1150
|
def visit_interpolated_symbol_node(node)
|
1118
1151
|
builder.symbol_compose(
|
1119
1152
|
token(node.opening_loc),
|
1120
|
-
|
1153
|
+
string_nodes_from_interpolation(node, node.opening),
|
1121
1154
|
token(node.closing_loc)
|
1122
1155
|
)
|
1123
1156
|
end
|
@@ -1126,14 +1159,14 @@ module Prism
|
|
1126
1159
|
# ^^^^^^^^^^^^
|
1127
1160
|
def visit_interpolated_x_string_node(node)
|
1128
1161
|
if node.heredoc?
|
1129
|
-
visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1130
|
-
else
|
1131
|
-
builder.xstring_compose(
|
1132
|
-
token(node.opening_loc),
|
1133
|
-
visit_all(node.parts),
|
1134
|
-
token(node.closing_loc)
|
1135
|
-
)
|
1162
|
+
return visit_heredoc(node) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1136
1163
|
end
|
1164
|
+
|
1165
|
+
builder.xstring_compose(
|
1166
|
+
token(node.opening_loc),
|
1167
|
+
string_nodes_from_interpolation(node, node.opening),
|
1168
|
+
token(node.closing_loc)
|
1169
|
+
)
|
1137
1170
|
end
|
1138
1171
|
|
1139
1172
|
# -> { it }
|
@@ -1145,7 +1178,17 @@ module Prism
|
|
1145
1178
|
# -> { it }
|
1146
1179
|
# ^^^^^^^^^
|
1147
1180
|
def visit_it_parameters_node(node)
|
1148
|
-
builder
|
1181
|
+
# FIXME: The builder _should_ always be a subclass of the prism builder.
|
1182
|
+
# Currently RuboCop passes in its own builder that always inherits from the
|
1183
|
+
# parser builder (which is lacking the `itarg` method). Once rubocop-ast
|
1184
|
+
# opts in to use the custom prism builder a warning can be emitted when
|
1185
|
+
# it is not the expected class, and eventually raise.
|
1186
|
+
# https://github.com/rubocop/rubocop-ast/pull/354
|
1187
|
+
if builder.is_a?(Translation::Parser::Builder)
|
1188
|
+
builder.itarg
|
1189
|
+
else
|
1190
|
+
builder.args(nil, [], nil, false)
|
1191
|
+
end
|
1149
1192
|
end
|
1150
1193
|
|
1151
1194
|
# foo(bar: baz)
|
@@ -1187,7 +1230,7 @@ module Prism
|
|
1187
1230
|
false
|
1188
1231
|
)
|
1189
1232
|
end,
|
1190
|
-
node.body
|
1233
|
+
visit(node.body),
|
1191
1234
|
[node.closing, srange(node.closing_loc)]
|
1192
1235
|
)
|
1193
1236
|
end
|
@@ -1311,7 +1354,7 @@ module Prism
|
|
1311
1354
|
def visit_multi_write_node(node)
|
1312
1355
|
elements = multi_target_elements(node)
|
1313
1356
|
|
1314
|
-
if elements.length == 1 && elements.first.is_a?(MultiTargetNode)
|
1357
|
+
if elements.length == 1 && elements.first.is_a?(MultiTargetNode) && !node.rest
|
1315
1358
|
elements = multi_target_elements(elements.first)
|
1316
1359
|
end
|
1317
1360
|
|
@@ -1439,7 +1482,8 @@ module Prism
|
|
1439
1482
|
# foo => ^(bar)
|
1440
1483
|
# ^^^^^^
|
1441
1484
|
def visit_pinned_expression_node(node)
|
1442
|
-
|
1485
|
+
parts = node.expression.accept(copy_compiler(in_pattern: false)) # Don't treat * and similar as match_rest
|
1486
|
+
expression = builder.begin(token(node.lparen_loc), parts, token(node.rparen_loc))
|
1443
1487
|
builder.pin(token(node.operator_loc), expression)
|
1444
1488
|
end
|
1445
1489
|
|
@@ -1511,15 +1555,13 @@ module Prism
|
|
1511
1555
|
# /foo/
|
1512
1556
|
# ^^^^^
|
1513
1557
|
def visit_regular_expression_node(node)
|
1514
|
-
content = node.content
|
1515
1558
|
parts =
|
1516
|
-
if content
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
end
|
1559
|
+
if node.content == ""
|
1560
|
+
[]
|
1561
|
+
elsif node.content.include?("\n")
|
1562
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1521
1563
|
else
|
1522
|
-
[builder.string_internal(
|
1564
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1523
1565
|
end
|
1524
1566
|
|
1525
1567
|
builder.regexp_compose(
|
@@ -1676,28 +1718,11 @@ module Prism
|
|
1676
1718
|
elsif node.opening&.start_with?("%") && node.unescaped.empty?
|
1677
1719
|
builder.string_compose(token(node.opening_loc), [], token(node.closing_loc))
|
1678
1720
|
else
|
1679
|
-
content_lines = node.content.lines
|
1680
|
-
unescaped_lines = node.unescaped.lines
|
1681
|
-
|
1682
1721
|
parts =
|
1683
|
-
if
|
1684
|
-
|
1685
|
-
elsif content_lines.length != unescaped_lines.length
|
1686
|
-
# This occurs when we have line continuations in the string. We
|
1687
|
-
# need to come back and fix this, but for now this stops the
|
1688
|
-
# code from breaking when we encounter it because of trying to
|
1689
|
-
# transpose arrays of different lengths.
|
1690
|
-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1722
|
+
if node.content.include?("\n")
|
1723
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1691
1724
|
else
|
1692
|
-
|
1693
|
-
|
1694
|
-
[content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
|
1695
|
-
end_offset = start_offset + content_line.length
|
1696
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1697
|
-
start_offset = end_offset
|
1698
|
-
|
1699
|
-
builder.string_internal([unescaped_line, offsets])
|
1700
|
-
end
|
1725
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1701
1726
|
end
|
1702
1727
|
|
1703
1728
|
builder.string_compose(
|
@@ -1741,19 +1766,14 @@ module Prism
|
|
1741
1766
|
builder.symbol([node.unescaped, srange(node.location)])
|
1742
1767
|
end
|
1743
1768
|
else
|
1744
|
-
parts =
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
|
1749
|
-
|
1750
|
-
|
1751
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1752
|
-
start_offset = end_offset
|
1753
|
-
|
1754
|
-
builder.string_internal([line, offsets])
|
1769
|
+
parts =
|
1770
|
+
if node.value == ""
|
1771
|
+
[]
|
1772
|
+
elsif node.value.include?("\n")
|
1773
|
+
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
|
1774
|
+
else
|
1775
|
+
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
|
1755
1776
|
end
|
1756
|
-
end
|
1757
1777
|
|
1758
1778
|
builder.symbol_compose(
|
1759
1779
|
token(node.opening_loc),
|
@@ -1882,28 +1902,23 @@ module Prism
|
|
1882
1902
|
# ^^^^^
|
1883
1903
|
def visit_x_string_node(node)
|
1884
1904
|
if node.heredoc?
|
1885
|
-
visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1886
|
-
|
1887
|
-
parts = if node.unescaped.lines.one?
|
1888
|
-
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1889
|
-
else
|
1890
|
-
start_offset = node.content_loc.start_offset
|
1891
|
-
|
1892
|
-
node.unescaped.lines.map do |line|
|
1893
|
-
end_offset = start_offset + line.length
|
1894
|
-
offsets = srange_offsets(start_offset, end_offset)
|
1895
|
-
start_offset = end_offset
|
1905
|
+
return visit_heredoc(node.to_interpolated) { |children, closing| builder.xstring_compose(token(node.opening_loc), children, closing) }
|
1906
|
+
end
|
1896
1907
|
|
1897
|
-
|
1898
|
-
|
1908
|
+
parts =
|
1909
|
+
if node.content == ""
|
1910
|
+
[]
|
1911
|
+
elsif node.content.include?("\n")
|
1912
|
+
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
|
1913
|
+
else
|
1914
|
+
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
|
1899
1915
|
end
|
1900
1916
|
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
end
|
1917
|
+
builder.xstring_compose(
|
1918
|
+
token(node.opening_loc),
|
1919
|
+
parts,
|
1920
|
+
token(node.closing_loc)
|
1921
|
+
)
|
1907
1922
|
end
|
1908
1923
|
|
1909
1924
|
# yield
|
@@ -2042,7 +2057,7 @@ module Prism
|
|
2042
2057
|
false
|
2043
2058
|
)
|
2044
2059
|
end,
|
2045
|
-
block.body
|
2060
|
+
visit(block.body),
|
2046
2061
|
token(block.closing_loc)
|
2047
2062
|
)
|
2048
2063
|
else
|
@@ -2050,13 +2065,6 @@ module Prism
|
|
2050
2065
|
end
|
2051
2066
|
end
|
2052
2067
|
|
2053
|
-
# The parser gem automatically converts \r\n to \n, meaning our offsets
|
2054
|
-
# need to be adjusted to always subtract 1 from the length.
|
2055
|
-
def chomped_bytesize(line)
|
2056
|
-
chomped = line.chomp
|
2057
|
-
chomped.bytesize + (chomped == line ? 0 : 1)
|
2058
|
-
end
|
2059
|
-
|
2060
2068
|
# Visit a heredoc that can be either a string or an xstring.
|
2061
2069
|
def visit_heredoc(node)
|
2062
2070
|
children = Array.new
|
@@ -2073,34 +2081,8 @@ module Prism
|
|
2073
2081
|
|
2074
2082
|
node.parts.each do |part|
|
2075
2083
|
pushing =
|
2076
|
-
if part.is_a?(StringNode) && part.
|
2077
|
-
unescaped
|
2078
|
-
escaped = part.content.lines
|
2079
|
-
|
2080
|
-
escaped_lengths = []
|
2081
|
-
normalized_lengths = []
|
2082
|
-
|
2083
|
-
if node.opening.end_with?("'")
|
2084
|
-
escaped.each do |line|
|
2085
|
-
escaped_lengths << line.bytesize
|
2086
|
-
normalized_lengths << chomped_bytesize(line)
|
2087
|
-
end
|
2088
|
-
else
|
2089
|
-
escaped
|
2090
|
-
.chunk_while { |before, after| before.match?(/(?<!\\)\\\r?\n$/) }
|
2091
|
-
.each do |lines|
|
2092
|
-
escaped_lengths << lines.sum(&:bytesize)
|
2093
|
-
normalized_lengths << lines.sum { |line| chomped_bytesize(line) }
|
2094
|
-
end
|
2095
|
-
end
|
2096
|
-
|
2097
|
-
start_offset = part.location.start_offset
|
2098
|
-
|
2099
|
-
unescaped.map.with_index do |unescaped_line, index|
|
2100
|
-
inner_part = builder.string_internal([unescaped_line, srange_offsets(start_offset, start_offset + normalized_lengths.fetch(index, 0))])
|
2101
|
-
start_offset += escaped_lengths.fetch(index, 0)
|
2102
|
-
inner_part
|
2103
|
-
end
|
2084
|
+
if part.is_a?(StringNode) && part.content.include?("\n")
|
2085
|
+
string_nodes_from_line_continuations(part.unescaped, part.content, part.location.start_offset, node.opening)
|
2104
2086
|
else
|
2105
2087
|
[visit(part)]
|
2106
2088
|
end
|
@@ -2114,7 +2096,7 @@ module Prism
|
|
2114
2096
|
location = appendee.loc
|
2115
2097
|
location = location.with_expression(location.expression.join(child.loc.expression))
|
2116
2098
|
|
2117
|
-
children[-1] = appendee.updated(:str, [appendee.children.first
|
2099
|
+
children[-1] = appendee.updated(:str, ["#{appendee.children.first}#{child.children.first}"], location: location)
|
2118
2100
|
else
|
2119
2101
|
children << child
|
2120
2102
|
end
|
@@ -2150,6 +2132,102 @@ module Prism
|
|
2150
2132
|
parser.pattern_variables.pop
|
2151
2133
|
end
|
2152
2134
|
end
|
2135
|
+
|
2136
|
+
# When the content of a string node is split across multiple lines, the
|
2137
|
+
# parser gem creates individual string nodes for each line the content is part of.
|
2138
|
+
def string_nodes_from_interpolation(node, opening)
|
2139
|
+
node.parts.flat_map do |part|
|
2140
|
+
if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
|
2141
|
+
string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, opening)
|
2142
|
+
else
|
2143
|
+
visit(part)
|
2144
|
+
end
|
2145
|
+
end
|
2146
|
+
end
|
2147
|
+
|
2148
|
+
# Create parser string nodes from a single prism node. The parser gem
|
2149
|
+
# "glues" strings together when a line continuation is encountered.
|
2150
|
+
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
|
2151
|
+
unescaped = unescaped.lines
|
2152
|
+
escaped = escaped.lines
|
2153
|
+
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
|
2154
|
+
regex = opening == "/" || opening&.start_with?("%r")
|
2155
|
+
|
2156
|
+
# Non-interpolating strings
|
2157
|
+
if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
|
2158
|
+
current_length = 0
|
2159
|
+
current_line = +""
|
2160
|
+
|
2161
|
+
escaped.filter_map.with_index do |escaped_line, index|
|
2162
|
+
unescaped_line = unescaped.fetch(index, "")
|
2163
|
+
current_length += escaped_line.bytesize
|
2164
|
+
current_line << unescaped_line
|
2165
|
+
|
2166
|
+
# Glue line continuations together. Only %w and %i arrays can contain these.
|
2167
|
+
if percent_array && escaped_line[/(\\)*\n$/, 1]&.length&.odd?
|
2168
|
+
next unless index == escaped.count - 1
|
2169
|
+
end
|
2170
|
+
s = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_length)])
|
2171
|
+
start_offset += escaped_line.bytesize
|
2172
|
+
current_line = +""
|
2173
|
+
current_length = 0
|
2174
|
+
s
|
2175
|
+
end
|
2176
|
+
else
|
2177
|
+
escaped_lengths = []
|
2178
|
+
normalized_lengths = []
|
2179
|
+
# Keeps track of where an unescaped line should start a new token. An unescaped
|
2180
|
+
# \n would otherwise be indistinguishable from the actual newline at the end of
|
2181
|
+
# of the line. The parser gem only emits a new string node at "real" newlines,
|
2182
|
+
# line continuations don't start a new node as well.
|
2183
|
+
do_next_tokens = []
|
2184
|
+
|
2185
|
+
escaped
|
2186
|
+
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
|
2187
|
+
.each do |lines|
|
2188
|
+
escaped_lengths << lines.sum(&:bytesize)
|
2189
|
+
|
2190
|
+
unescaped_lines_count =
|
2191
|
+
if regex
|
2192
|
+
0 # Will always be preserved as is
|
2193
|
+
else
|
2194
|
+
lines.sum do |line|
|
2195
|
+
count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
|
2196
|
+
count -= 1 if !line.end_with?("\n") && count > 0
|
2197
|
+
count
|
2198
|
+
end
|
2199
|
+
end
|
2200
|
+
|
2201
|
+
extra = 1
|
2202
|
+
extra = lines.count if percent_array # Account for line continuations in percent arrays
|
2203
|
+
|
2204
|
+
normalized_lengths.concat(Array.new(unescaped_lines_count + extra, 0))
|
2205
|
+
normalized_lengths[-1] = lines.sum { |line| line.bytesize }
|
2206
|
+
do_next_tokens.concat(Array.new(unescaped_lines_count + extra, false))
|
2207
|
+
do_next_tokens[-1] = true
|
2208
|
+
end
|
2209
|
+
|
2210
|
+
current_line = +""
|
2211
|
+
current_normalized_length = 0
|
2212
|
+
|
2213
|
+
emitted_count = 0
|
2214
|
+
unescaped.filter_map.with_index do |unescaped_line, index|
|
2215
|
+
current_line << unescaped_line
|
2216
|
+
current_normalized_length += normalized_lengths.fetch(index, 0)
|
2217
|
+
|
2218
|
+
if do_next_tokens[index]
|
2219
|
+
inner_part = builder.string_internal([current_line, srange_offsets(start_offset, start_offset + current_normalized_length)])
|
2220
|
+
start_offset += escaped_lengths.fetch(emitted_count, 0)
|
2221
|
+
current_line = +""
|
2222
|
+
current_normalized_length = 0
|
2223
|
+
emitted_count += 1
|
2224
|
+
inner_part
|
2225
|
+
else
|
2226
|
+
nil
|
2227
|
+
end
|
2228
|
+
end
|
2229
|
+
end
|
2230
|
+
end
|
2153
2231
|
end
|
2154
2232
|
end
|
2155
2233
|
end
|