yarp 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
@@ -0,0 +1,267 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
class DesugarVisitor < MutationVisitor
|
5
|
+
# @@foo &&= bar
|
6
|
+
#
|
7
|
+
# becomes
|
8
|
+
#
|
9
|
+
# @@foo && @@foo = bar
|
10
|
+
def visit_class_variable_and_write_node(node)
|
11
|
+
AndNode.new(
|
12
|
+
ClassVariableReadNode.new(node.name_loc),
|
13
|
+
ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
14
|
+
node.operator_loc,
|
15
|
+
node.location
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @@foo ||= bar
|
20
|
+
#
|
21
|
+
# becomes
|
22
|
+
#
|
23
|
+
# @@foo || @@foo = bar
|
24
|
+
def visit_class_variable_or_write_node(node)
|
25
|
+
OrNode.new(
|
26
|
+
ClassVariableReadNode.new(node.name_loc),
|
27
|
+
ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
28
|
+
node.operator_loc,
|
29
|
+
node.location
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @@foo += bar
|
34
|
+
#
|
35
|
+
# becomes
|
36
|
+
#
|
37
|
+
# @@foo = @@foo + bar
|
38
|
+
def visit_class_variable_operator_write_node(node)
|
39
|
+
desugar_operator_write_node(node, ClassVariableWriteNode, ClassVariableReadNode)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Foo &&= bar
|
43
|
+
#
|
44
|
+
# becomes
|
45
|
+
#
|
46
|
+
# Foo && Foo = bar
|
47
|
+
def visit_constant_and_write_node(node)
|
48
|
+
AndNode.new(
|
49
|
+
ConstantReadNode.new(node.name_loc),
|
50
|
+
ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
51
|
+
node.operator_loc,
|
52
|
+
node.location
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Foo ||= bar
|
57
|
+
#
|
58
|
+
# becomes
|
59
|
+
#
|
60
|
+
# Foo || Foo = bar
|
61
|
+
def visit_constant_or_write_node(node)
|
62
|
+
OrNode.new(
|
63
|
+
ConstantReadNode.new(node.name_loc),
|
64
|
+
ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
65
|
+
node.operator_loc,
|
66
|
+
node.location
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Foo += bar
|
71
|
+
#
|
72
|
+
# becomes
|
73
|
+
#
|
74
|
+
# Foo = Foo + bar
|
75
|
+
def visit_constant_operator_write_node(node)
|
76
|
+
desugar_operator_write_node(node, ConstantWriteNode, ConstantReadNode)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Foo::Bar &&= baz
|
80
|
+
#
|
81
|
+
# becomes
|
82
|
+
#
|
83
|
+
# Foo::Bar && Foo::Bar = baz
|
84
|
+
def visit_constant_path_and_write_node(node)
|
85
|
+
AndNode.new(
|
86
|
+
node.target,
|
87
|
+
ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
|
88
|
+
node.operator_loc,
|
89
|
+
node.location
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Foo::Bar ||= baz
|
94
|
+
#
|
95
|
+
# becomes
|
96
|
+
#
|
97
|
+
# Foo::Bar || Foo::Bar = baz
|
98
|
+
def visit_constant_path_or_write_node(node)
|
99
|
+
OrNode.new(
|
100
|
+
node.target,
|
101
|
+
ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
|
102
|
+
node.operator_loc,
|
103
|
+
node.location
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Foo::Bar += baz
|
108
|
+
#
|
109
|
+
# becomes
|
110
|
+
#
|
111
|
+
# Foo::Bar = Foo::Bar + baz
|
112
|
+
def visit_constant_path_operator_write_node(node)
|
113
|
+
ConstantPathWriteNode.new(
|
114
|
+
node.target,
|
115
|
+
CallNode.new(
|
116
|
+
node.target,
|
117
|
+
nil,
|
118
|
+
node.operator_loc.copy(length: node.operator_loc.length - 1),
|
119
|
+
nil,
|
120
|
+
ArgumentsNode.new([node.value], node.value.location),
|
121
|
+
nil,
|
122
|
+
nil,
|
123
|
+
0,
|
124
|
+
node.operator_loc.slice.chomp("="),
|
125
|
+
node.location
|
126
|
+
),
|
127
|
+
node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
|
128
|
+
node.location
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
# $foo &&= bar
|
133
|
+
#
|
134
|
+
# becomes
|
135
|
+
#
|
136
|
+
# $foo && $foo = bar
|
137
|
+
def visit_global_variable_and_write_node(node)
|
138
|
+
AndNode.new(
|
139
|
+
GlobalVariableReadNode.new(node.name_loc),
|
140
|
+
GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
141
|
+
node.operator_loc,
|
142
|
+
node.location
|
143
|
+
)
|
144
|
+
end
|
145
|
+
|
146
|
+
# $foo ||= bar
|
147
|
+
#
|
148
|
+
# becomes
|
149
|
+
#
|
150
|
+
# $foo || $foo = bar
|
151
|
+
def visit_global_variable_or_write_node(node)
|
152
|
+
OrNode.new(
|
153
|
+
GlobalVariableReadNode.new(node.name_loc),
|
154
|
+
GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
155
|
+
node.operator_loc,
|
156
|
+
node.location
|
157
|
+
)
|
158
|
+
end
|
159
|
+
|
160
|
+
# $foo += bar
|
161
|
+
#
|
162
|
+
# becomes
|
163
|
+
#
|
164
|
+
# $foo = $foo + bar
|
165
|
+
def visit_global_variable_operator_write_node(node)
|
166
|
+
desugar_operator_write_node(node, GlobalVariableWriteNode, GlobalVariableReadNode)
|
167
|
+
end
|
168
|
+
|
169
|
+
# @foo &&= bar
|
170
|
+
#
|
171
|
+
# becomes
|
172
|
+
#
|
173
|
+
# @foo && @foo = bar
|
174
|
+
def visit_instance_variable_and_write_node(node)
|
175
|
+
AndNode.new(
|
176
|
+
InstanceVariableReadNode.new(node.name_loc),
|
177
|
+
InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
178
|
+
node.operator_loc,
|
179
|
+
node.location
|
180
|
+
)
|
181
|
+
end
|
182
|
+
|
183
|
+
# @foo ||= bar
|
184
|
+
#
|
185
|
+
# becomes
|
186
|
+
#
|
187
|
+
# @foo || @foo = bar
|
188
|
+
def visit_instance_variable_or_write_node(node)
|
189
|
+
OrNode.new(
|
190
|
+
InstanceVariableReadNode.new(node.name_loc),
|
191
|
+
InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
192
|
+
node.operator_loc,
|
193
|
+
node.location
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
# @foo += bar
|
198
|
+
#
|
199
|
+
# becomes
|
200
|
+
#
|
201
|
+
# @foo = @foo + bar
|
202
|
+
def visit_instance_variable_operator_write_node(node)
|
203
|
+
desugar_operator_write_node(node, InstanceVariableWriteNode, InstanceVariableReadNode)
|
204
|
+
end
|
205
|
+
|
206
|
+
# foo &&= bar
|
207
|
+
#
|
208
|
+
# becomes
|
209
|
+
#
|
210
|
+
# foo && foo = bar
|
211
|
+
def visit_local_variable_and_write_node(node)
|
212
|
+
AndNode.new(
|
213
|
+
LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
|
214
|
+
LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
|
215
|
+
node.operator_loc,
|
216
|
+
node.location
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
# foo ||= bar
|
221
|
+
#
|
222
|
+
# becomes
|
223
|
+
#
|
224
|
+
# foo || foo = bar
|
225
|
+
def visit_local_variable_or_write_node(node)
|
226
|
+
OrNode.new(
|
227
|
+
LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
|
228
|
+
LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
|
229
|
+
node.operator_loc,
|
230
|
+
node.location
|
231
|
+
)
|
232
|
+
end
|
233
|
+
|
234
|
+
# foo += bar
|
235
|
+
#
|
236
|
+
# becomes
|
237
|
+
#
|
238
|
+
# foo = foo + bar
|
239
|
+
def visit_local_variable_operator_write_node(node)
|
240
|
+
desugar_operator_write_node(node, LocalVariableWriteNode, LocalVariableReadNode, arguments: [node.constant_id, node.depth])
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
# Desugar `x += y` to `x = x + y`
|
246
|
+
def desugar_operator_write_node(node, write_class, read_class, arguments: [])
|
247
|
+
write_class.new(
|
248
|
+
*arguments,
|
249
|
+
node.name_loc,
|
250
|
+
CallNode.new(
|
251
|
+
read_class.new(*arguments, node.name_loc),
|
252
|
+
nil,
|
253
|
+
node.operator_loc.copy(length: node.operator_loc.length - 1),
|
254
|
+
nil,
|
255
|
+
ArgumentsNode.new([node.value], node.value.location),
|
256
|
+
nil,
|
257
|
+
nil,
|
258
|
+
0,
|
259
|
+
node.operator_loc.slice.chomp("="),
|
260
|
+
node.location
|
261
|
+
),
|
262
|
+
node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
|
263
|
+
node.location
|
264
|
+
)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
data/lib/yarp/ffi.rb
CHANGED
@@ -70,7 +70,8 @@ module YARP
|
|
70
70
|
"yarp.h",
|
71
71
|
"yp_version",
|
72
72
|
"yp_parse_serialize",
|
73
|
-
"yp_lex_serialize"
|
73
|
+
"yp_lex_serialize",
|
74
|
+
"yp_parse_lex_serialize"
|
74
75
|
)
|
75
76
|
|
76
77
|
load_exported_functions_from(
|
@@ -223,4 +224,29 @@ module YARP
|
|
223
224
|
parse(string.read, filepath)
|
224
225
|
end
|
225
226
|
end
|
227
|
+
|
228
|
+
# Mirror the YARP.parse_lex API by using the serialization API.
|
229
|
+
def self.parse_lex(code, filepath = nil)
|
230
|
+
LibRubyParser::YPBuffer.with do |buffer|
|
231
|
+
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
|
232
|
+
LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
|
233
|
+
|
234
|
+
source = Source.new(code)
|
235
|
+
loader = Serialize::Loader.new(source, buffer.read)
|
236
|
+
|
237
|
+
tokens = loader.load_tokens
|
238
|
+
node, comments, errors, warnings = loader.load_nodes
|
239
|
+
|
240
|
+
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
|
241
|
+
|
242
|
+
ParseResult.new([node, tokens], comments, errors, warnings, source)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# Mirror the YARP.parse_lex_file API by using the serialization API.
|
247
|
+
def self.parse_lex_file(filepath)
|
248
|
+
LibRubyParser::YPString.with(filepath) do |string|
|
249
|
+
parse_lex(string.read, filepath)
|
250
|
+
end
|
251
|
+
end
|
226
252
|
end
|
data/lib/yarp/lex_compat.rb
CHANGED
@@ -208,18 +208,9 @@ module YARP
|
|
208
208
|
end
|
209
209
|
end
|
210
210
|
|
211
|
-
#
|
212
|
-
#
|
213
|
-
|
214
|
-
class CommentToken < Token
|
215
|
-
def ==(other)
|
216
|
-
self[0...-1] == other[0...-1]
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
# Heredoc end tokens are emitted in an odd order, so we don't compare the
|
221
|
-
# state on them.
|
222
|
-
class HeredocEndToken < Token
|
211
|
+
# Tokens where state should be ignored
|
212
|
+
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
|
213
|
+
class IgnoreStateToken < Token
|
223
214
|
def ==(other)
|
224
215
|
self[0...-1] == other[0...-1]
|
225
216
|
end
|
@@ -252,6 +243,23 @@ module YARP
|
|
252
243
|
end
|
253
244
|
end
|
254
245
|
|
246
|
+
# If we have an identifier that follows a method name like:
|
247
|
+
#
|
248
|
+
# def foo bar
|
249
|
+
#
|
250
|
+
# then Ripper will mark bar as END|LABEL if there is a local in a parent
|
251
|
+
# scope named bar because it hasn't pushed the local table yet. We do this
|
252
|
+
# more accurately, so we need to allow comparing against both END and
|
253
|
+
# END|LABEL.
|
254
|
+
class ParamToken < Token
|
255
|
+
def ==(other)
|
256
|
+
(self[0...-1] == other[0...-1]) && (
|
257
|
+
(other[3] == Ripper::EXPR_END) ||
|
258
|
+
(other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
|
259
|
+
)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
255
263
|
# A heredoc in this case is a list of tokens that belong to the body of the
|
256
264
|
# heredoc that should be appended onto the list of tokens when the heredoc
|
257
265
|
# closes.
|
@@ -558,18 +566,45 @@ module YARP
|
|
558
566
|
result_value = result.value
|
559
567
|
previous_state = nil
|
560
568
|
|
561
|
-
#
|
562
|
-
#
|
563
|
-
#
|
564
|
-
|
565
|
-
|
566
|
-
bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
567
|
-
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
|
569
|
+
# In previous versions of Ruby, Ripper wouldn't flush the bom before the
|
570
|
+
# first token, so we had to have a hack in place to account for that. This
|
571
|
+
# checks for that behavior.
|
572
|
+
bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
|
573
|
+
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
|
568
574
|
|
569
575
|
result_value.each_with_index do |(token, lex_state), index|
|
570
576
|
lineno = token.location.start_line
|
571
577
|
column = token.location.start_column
|
572
|
-
|
578
|
+
|
579
|
+
# If there's a UTF-8 byte-order mark as the start of the file, then for
|
580
|
+
# certain tokens ripper sets the first token back by 3 bytes. It also
|
581
|
+
# keeps the byte order mark in the first token's value. This is weird,
|
582
|
+
# and I don't want to mirror that in our parser. So instead, we'll match
|
583
|
+
# up the columns and values here.
|
584
|
+
if bom && lineno == 1
|
585
|
+
column -= 3
|
586
|
+
|
587
|
+
if index == 0 && column == 0 && !bom_flushed
|
588
|
+
flushed =
|
589
|
+
case token.type
|
590
|
+
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
|
591
|
+
:GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
|
592
|
+
:PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
|
593
|
+
:PERCENT_UPPER_W, :STRING_BEGIN
|
594
|
+
true
|
595
|
+
when :REGEXP_BEGIN, :SYMBOL_BEGIN
|
596
|
+
token.value.start_with?("%")
|
597
|
+
else
|
598
|
+
false
|
599
|
+
end
|
600
|
+
|
601
|
+
unless flushed
|
602
|
+
column -= 3
|
603
|
+
value = token.value
|
604
|
+
value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
|
605
|
+
end
|
606
|
+
end
|
607
|
+
end
|
573
608
|
|
574
609
|
event = RIPPER.fetch(token.type)
|
575
610
|
value = token.value
|
@@ -580,13 +615,23 @@ module YARP
|
|
580
615
|
when :on___end__
|
581
616
|
EndContentToken.new([[lineno, column], event, value, lex_state])
|
582
617
|
when :on_comment
|
583
|
-
|
618
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
584
619
|
when :on_heredoc_end
|
585
620
|
# Heredoc end tokens can be emitted in an odd order, so we don't
|
586
621
|
# want to bother comparing the state on them.
|
587
|
-
|
588
|
-
when :
|
589
|
-
if lex_state == Ripper::EXPR_END
|
622
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
623
|
+
when :on_ident
|
624
|
+
if lex_state == Ripper::EXPR_END
|
625
|
+
# If we have an identifier that follows a method name like:
|
626
|
+
#
|
627
|
+
# def foo bar
|
628
|
+
#
|
629
|
+
# then Ripper will mark bar as END|LABEL if there is a local in a
|
630
|
+
# parent scope named bar because it hasn't pushed the local table
|
631
|
+
# yet. We do this more accurately, so we need to allow comparing
|
632
|
+
# against both END and END|LABEL.
|
633
|
+
ParamToken.new([[lineno, column], event, value, lex_state])
|
634
|
+
elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
|
590
635
|
# In the event that we're comparing identifiers, we're going to
|
591
636
|
# allow a little divergence. Ripper doesn't account for local
|
592
637
|
# variables introduced through named captures in regexes, and we
|
@@ -595,6 +640,8 @@ module YARP
|
|
595
640
|
else
|
596
641
|
Token.new([[lineno, column], event, value, lex_state])
|
597
642
|
end
|
643
|
+
when :on_embexpr_end
|
644
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
598
645
|
when :on_ignored_nl
|
599
646
|
# Ignored newlines can occasionally have a LABEL state attached to
|
600
647
|
# them which doesn't actually impact anything. We don't mirror that
|
@@ -629,6 +676,26 @@ module YARP
|
|
629
676
|
previous_state
|
630
677
|
end
|
631
678
|
|
679
|
+
Token.new([[lineno, column], event, value, lex_state])
|
680
|
+
when :on_eof
|
681
|
+
previous_token = result_value[index - 1][0]
|
682
|
+
|
683
|
+
# If we're at the end of the file and the previous token was a
|
684
|
+
# comment and there is still whitespace after the comment, then
|
685
|
+
# Ripper will append a on_nl token (even though there isn't
|
686
|
+
# necessarily a newline). We mirror that here.
|
687
|
+
start_offset = previous_token.location.end_offset
|
688
|
+
end_offset = token.location.start_offset
|
689
|
+
|
690
|
+
if previous_token.type == :COMMENT && start_offset < end_offset
|
691
|
+
if bom
|
692
|
+
start_offset += 3
|
693
|
+
end_offset += 3
|
694
|
+
end
|
695
|
+
|
696
|
+
tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
|
697
|
+
end
|
698
|
+
|
632
699
|
Token.new([[lineno, column], event, value, lex_state])
|
633
700
|
else
|
634
701
|
Token.new([[lineno, column], event, value, lex_state])
|
@@ -713,7 +780,8 @@ module YARP
|
|
713
780
|
end
|
714
781
|
end
|
715
782
|
|
716
|
-
|
783
|
+
# Drop the EOF token from the list
|
784
|
+
tokens = tokens[0...-1]
|
717
785
|
|
718
786
|
# We sort by location to compare against Ripper's output
|
719
787
|
tokens.sort_by!(&:location)
|