yarp 0.8.0 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/README.md +4 -3
- data/config.yml +332 -52
- data/docs/configuration.md +1 -0
- data/docs/ruby_api.md +2 -0
- data/docs/serialization.md +1 -1
- data/docs/testing.md +2 -2
- data/ext/yarp/api_node.c +703 -136
- data/ext/yarp/extension.c +73 -24
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +331 -137
- data/include/yarp/node.h +10 -0
- data/include/yarp/unescape.h +4 -2
- data/include/yarp/util/yp_newline_list.h +3 -0
- data/include/yarp/version.h +2 -2
- data/include/yarp.h +10 -0
- data/lib/yarp/desugar_visitor.rb +267 -0
- data/lib/yarp/ffi.rb +27 -1
- data/lib/yarp/lex_compat.rb +93 -25
- data/lib/yarp/mutation_visitor.rb +683 -0
- data/lib/yarp/node.rb +3042 -508
- data/lib/yarp/serialize.rb +198 -126
- data/lib/yarp.rb +48 -2
- data/src/node.c +421 -185
- data/src/prettyprint.c +262 -80
- data/src/serialize.c +410 -270
- data/src/token_type.c +2 -2
- data/src/unescape.c +69 -51
- data/src/util/yp_newline_list.c +10 -0
- data/src/yarp.c +1208 -458
- data/yarp.gemspec +3 -1
- metadata +4 -2
@@ -0,0 +1,267 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
class DesugarVisitor < MutationVisitor
|
5
|
+
# @@foo &&= bar
|
6
|
+
#
|
7
|
+
# becomes
|
8
|
+
#
|
9
|
+
# @@foo && @@foo = bar
|
10
|
+
def visit_class_variable_and_write_node(node)
|
11
|
+
AndNode.new(
|
12
|
+
ClassVariableReadNode.new(node.name_loc),
|
13
|
+
ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
14
|
+
node.operator_loc,
|
15
|
+
node.location
|
16
|
+
)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @@foo ||= bar
|
20
|
+
#
|
21
|
+
# becomes
|
22
|
+
#
|
23
|
+
# @@foo || @@foo = bar
|
24
|
+
def visit_class_variable_or_write_node(node)
|
25
|
+
OrNode.new(
|
26
|
+
ClassVariableReadNode.new(node.name_loc),
|
27
|
+
ClassVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
28
|
+
node.operator_loc,
|
29
|
+
node.location
|
30
|
+
)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @@foo += bar
|
34
|
+
#
|
35
|
+
# becomes
|
36
|
+
#
|
37
|
+
# @@foo = @@foo + bar
|
38
|
+
def visit_class_variable_operator_write_node(node)
|
39
|
+
desugar_operator_write_node(node, ClassVariableWriteNode, ClassVariableReadNode)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Foo &&= bar
|
43
|
+
#
|
44
|
+
# becomes
|
45
|
+
#
|
46
|
+
# Foo && Foo = bar
|
47
|
+
def visit_constant_and_write_node(node)
|
48
|
+
AndNode.new(
|
49
|
+
ConstantReadNode.new(node.name_loc),
|
50
|
+
ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
51
|
+
node.operator_loc,
|
52
|
+
node.location
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Foo ||= bar
|
57
|
+
#
|
58
|
+
# becomes
|
59
|
+
#
|
60
|
+
# Foo || Foo = bar
|
61
|
+
def visit_constant_or_write_node(node)
|
62
|
+
OrNode.new(
|
63
|
+
ConstantReadNode.new(node.name_loc),
|
64
|
+
ConstantWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
65
|
+
node.operator_loc,
|
66
|
+
node.location
|
67
|
+
)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Foo += bar
|
71
|
+
#
|
72
|
+
# becomes
|
73
|
+
#
|
74
|
+
# Foo = Foo + bar
|
75
|
+
def visit_constant_operator_write_node(node)
|
76
|
+
desugar_operator_write_node(node, ConstantWriteNode, ConstantReadNode)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Foo::Bar &&= baz
|
80
|
+
#
|
81
|
+
# becomes
|
82
|
+
#
|
83
|
+
# Foo::Bar && Foo::Bar = baz
|
84
|
+
def visit_constant_path_and_write_node(node)
|
85
|
+
AndNode.new(
|
86
|
+
node.target,
|
87
|
+
ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
|
88
|
+
node.operator_loc,
|
89
|
+
node.location
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Foo::Bar ||= baz
|
94
|
+
#
|
95
|
+
# becomes
|
96
|
+
#
|
97
|
+
# Foo::Bar || Foo::Bar = baz
|
98
|
+
def visit_constant_path_or_write_node(node)
|
99
|
+
OrNode.new(
|
100
|
+
node.target,
|
101
|
+
ConstantPathWriteNode.new(node.target, node.value, node.operator_loc, node.location),
|
102
|
+
node.operator_loc,
|
103
|
+
node.location
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
# Foo::Bar += baz
|
108
|
+
#
|
109
|
+
# becomes
|
110
|
+
#
|
111
|
+
# Foo::Bar = Foo::Bar + baz
|
112
|
+
def visit_constant_path_operator_write_node(node)
|
113
|
+
ConstantPathWriteNode.new(
|
114
|
+
node.target,
|
115
|
+
CallNode.new(
|
116
|
+
node.target,
|
117
|
+
nil,
|
118
|
+
node.operator_loc.copy(length: node.operator_loc.length - 1),
|
119
|
+
nil,
|
120
|
+
ArgumentsNode.new([node.value], node.value.location),
|
121
|
+
nil,
|
122
|
+
nil,
|
123
|
+
0,
|
124
|
+
node.operator_loc.slice.chomp("="),
|
125
|
+
node.location
|
126
|
+
),
|
127
|
+
node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
|
128
|
+
node.location
|
129
|
+
)
|
130
|
+
end
|
131
|
+
|
132
|
+
# $foo &&= bar
|
133
|
+
#
|
134
|
+
# becomes
|
135
|
+
#
|
136
|
+
# $foo && $foo = bar
|
137
|
+
def visit_global_variable_and_write_node(node)
|
138
|
+
AndNode.new(
|
139
|
+
GlobalVariableReadNode.new(node.name_loc),
|
140
|
+
GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
141
|
+
node.operator_loc,
|
142
|
+
node.location
|
143
|
+
)
|
144
|
+
end
|
145
|
+
|
146
|
+
# $foo ||= bar
|
147
|
+
#
|
148
|
+
# becomes
|
149
|
+
#
|
150
|
+
# $foo || $foo = bar
|
151
|
+
def visit_global_variable_or_write_node(node)
|
152
|
+
OrNode.new(
|
153
|
+
GlobalVariableReadNode.new(node.name_loc),
|
154
|
+
GlobalVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
155
|
+
node.operator_loc,
|
156
|
+
node.location
|
157
|
+
)
|
158
|
+
end
|
159
|
+
|
160
|
+
# $foo += bar
|
161
|
+
#
|
162
|
+
# becomes
|
163
|
+
#
|
164
|
+
# $foo = $foo + bar
|
165
|
+
def visit_global_variable_operator_write_node(node)
|
166
|
+
desugar_operator_write_node(node, GlobalVariableWriteNode, GlobalVariableReadNode)
|
167
|
+
end
|
168
|
+
|
169
|
+
# @foo &&= bar
|
170
|
+
#
|
171
|
+
# becomes
|
172
|
+
#
|
173
|
+
# @foo && @foo = bar
|
174
|
+
def visit_instance_variable_and_write_node(node)
|
175
|
+
AndNode.new(
|
176
|
+
InstanceVariableReadNode.new(node.name_loc),
|
177
|
+
InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
178
|
+
node.operator_loc,
|
179
|
+
node.location
|
180
|
+
)
|
181
|
+
end
|
182
|
+
|
183
|
+
# @foo ||= bar
|
184
|
+
#
|
185
|
+
# becomes
|
186
|
+
#
|
187
|
+
# @foo || @foo = bar
|
188
|
+
def visit_instance_variable_or_write_node(node)
|
189
|
+
OrNode.new(
|
190
|
+
InstanceVariableReadNode.new(node.name_loc),
|
191
|
+
InstanceVariableWriteNode.new(node.name_loc, node.value, node.operator_loc, node.location),
|
192
|
+
node.operator_loc,
|
193
|
+
node.location
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
# @foo += bar
|
198
|
+
#
|
199
|
+
# becomes
|
200
|
+
#
|
201
|
+
# @foo = @foo + bar
|
202
|
+
def visit_instance_variable_operator_write_node(node)
|
203
|
+
desugar_operator_write_node(node, InstanceVariableWriteNode, InstanceVariableReadNode)
|
204
|
+
end
|
205
|
+
|
206
|
+
# foo &&= bar
|
207
|
+
#
|
208
|
+
# becomes
|
209
|
+
#
|
210
|
+
# foo && foo = bar
|
211
|
+
def visit_local_variable_and_write_node(node)
|
212
|
+
AndNode.new(
|
213
|
+
LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
|
214
|
+
LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
|
215
|
+
node.operator_loc,
|
216
|
+
node.location
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
# foo ||= bar
|
221
|
+
#
|
222
|
+
# becomes
|
223
|
+
#
|
224
|
+
# foo || foo = bar
|
225
|
+
def visit_local_variable_or_write_node(node)
|
226
|
+
OrNode.new(
|
227
|
+
LocalVariableReadNode.new(node.constant_id, node.depth, node.name_loc),
|
228
|
+
LocalVariableWriteNode.new(node.constant_id, node.depth, node.name_loc, node.value, node.operator_loc, node.location),
|
229
|
+
node.operator_loc,
|
230
|
+
node.location
|
231
|
+
)
|
232
|
+
end
|
233
|
+
|
234
|
+
# foo += bar
|
235
|
+
#
|
236
|
+
# becomes
|
237
|
+
#
|
238
|
+
# foo = foo + bar
|
239
|
+
def visit_local_variable_operator_write_node(node)
|
240
|
+
desugar_operator_write_node(node, LocalVariableWriteNode, LocalVariableReadNode, arguments: [node.constant_id, node.depth])
|
241
|
+
end
|
242
|
+
|
243
|
+
private
|
244
|
+
|
245
|
+
# Desugar `x += y` to `x = x + y`
|
246
|
+
def desugar_operator_write_node(node, write_class, read_class, arguments: [])
|
247
|
+
write_class.new(
|
248
|
+
*arguments,
|
249
|
+
node.name_loc,
|
250
|
+
CallNode.new(
|
251
|
+
read_class.new(*arguments, node.name_loc),
|
252
|
+
nil,
|
253
|
+
node.operator_loc.copy(length: node.operator_loc.length - 1),
|
254
|
+
nil,
|
255
|
+
ArgumentsNode.new([node.value], node.value.location),
|
256
|
+
nil,
|
257
|
+
nil,
|
258
|
+
0,
|
259
|
+
node.operator_loc.slice.chomp("="),
|
260
|
+
node.location
|
261
|
+
),
|
262
|
+
node.operator_loc.copy(start_offset: node.operator_loc.end_offset - 1, length: 1),
|
263
|
+
node.location
|
264
|
+
)
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
data/lib/yarp/ffi.rb
CHANGED
@@ -70,7 +70,8 @@ module YARP
|
|
70
70
|
"yarp.h",
|
71
71
|
"yp_version",
|
72
72
|
"yp_parse_serialize",
|
73
|
-
"yp_lex_serialize"
|
73
|
+
"yp_lex_serialize",
|
74
|
+
"yp_parse_lex_serialize"
|
74
75
|
)
|
75
76
|
|
76
77
|
load_exported_functions_from(
|
@@ -223,4 +224,29 @@ module YARP
|
|
223
224
|
parse(string.read, filepath)
|
224
225
|
end
|
225
226
|
end
|
227
|
+
|
228
|
+
# Mirror the YARP.parse_lex API by using the serialization API.
|
229
|
+
def self.parse_lex(code, filepath = nil)
|
230
|
+
LibRubyParser::YPBuffer.with do |buffer|
|
231
|
+
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
|
232
|
+
LibRubyParser.yp_parse_lex_serialize(code, code.bytesize, buffer.pointer, metadata)
|
233
|
+
|
234
|
+
source = Source.new(code)
|
235
|
+
loader = Serialize::Loader.new(source, buffer.read)
|
236
|
+
|
237
|
+
tokens = loader.load_tokens
|
238
|
+
node, comments, errors, warnings = loader.load_nodes
|
239
|
+
|
240
|
+
tokens.each { |token,| token.value.force_encoding(loader.encoding) }
|
241
|
+
|
242
|
+
ParseResult.new([node, tokens], comments, errors, warnings, source)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# Mirror the YARP.parse_lex_file API by using the serialization API.
|
247
|
+
def self.parse_lex_file(filepath)
|
248
|
+
LibRubyParser::YPString.with(filepath) do |string|
|
249
|
+
parse_lex(string.read, filepath)
|
250
|
+
end
|
251
|
+
end
|
226
252
|
end
|
data/lib/yarp/lex_compat.rb
CHANGED
@@ -208,18 +208,9 @@ module YARP
|
|
208
208
|
end
|
209
209
|
end
|
210
210
|
|
211
|
-
#
|
212
|
-
#
|
213
|
-
|
214
|
-
class CommentToken < Token
|
215
|
-
def ==(other)
|
216
|
-
self[0...-1] == other[0...-1]
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
# Heredoc end tokens are emitted in an odd order, so we don't compare the
|
221
|
-
# state on them.
|
222
|
-
class HeredocEndToken < Token
|
211
|
+
# Tokens where state should be ignored
|
212
|
+
# used for :on_comment, :on_heredoc_end, :on_embexpr_end
|
213
|
+
class IgnoreStateToken < Token
|
223
214
|
def ==(other)
|
224
215
|
self[0...-1] == other[0...-1]
|
225
216
|
end
|
@@ -252,6 +243,23 @@ module YARP
|
|
252
243
|
end
|
253
244
|
end
|
254
245
|
|
246
|
+
# If we have an identifier that follows a method name like:
|
247
|
+
#
|
248
|
+
# def foo bar
|
249
|
+
#
|
250
|
+
# then Ripper will mark bar as END|LABEL if there is a local in a parent
|
251
|
+
# scope named bar because it hasn't pushed the local table yet. We do this
|
252
|
+
# more accurately, so we need to allow comparing against both END and
|
253
|
+
# END|LABEL.
|
254
|
+
class ParamToken < Token
|
255
|
+
def ==(other)
|
256
|
+
(self[0...-1] == other[0...-1]) && (
|
257
|
+
(other[3] == Ripper::EXPR_END) ||
|
258
|
+
(other[3] == Ripper::EXPR_END | Ripper::EXPR_LABEL)
|
259
|
+
)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
255
263
|
# A heredoc in this case is a list of tokens that belong to the body of the
|
256
264
|
# heredoc that should be appended onto the list of tokens when the heredoc
|
257
265
|
# closes.
|
@@ -558,18 +566,45 @@ module YARP
|
|
558
566
|
result_value = result.value
|
559
567
|
previous_state = nil
|
560
568
|
|
561
|
-
#
|
562
|
-
#
|
563
|
-
#
|
564
|
-
|
565
|
-
|
566
|
-
bom = source.bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
567
|
-
result_value[0][0].value.prepend("\xEF\xBB\xBF") if bom
|
569
|
+
# In previous versions of Ruby, Ripper wouldn't flush the bom before the
|
570
|
+
# first token, so we had to have a hack in place to account for that. This
|
571
|
+
# checks for that behavior.
|
572
|
+
bom_flushed = Ripper.lex("\xEF\xBB\xBF# test")[0][0][1] == 0
|
573
|
+
bom = source.byteslice(0..2) == "\xEF\xBB\xBF"
|
568
574
|
|
569
575
|
result_value.each_with_index do |(token, lex_state), index|
|
570
576
|
lineno = token.location.start_line
|
571
577
|
column = token.location.start_column
|
572
|
-
|
578
|
+
|
579
|
+
# If there's a UTF-8 byte-order mark as the start of the file, then for
|
580
|
+
# certain tokens ripper sets the first token back by 3 bytes. It also
|
581
|
+
# keeps the byte order mark in the first token's value. This is weird,
|
582
|
+
# and I don't want to mirror that in our parser. So instead, we'll match
|
583
|
+
# up the columns and values here.
|
584
|
+
if bom && lineno == 1
|
585
|
+
column -= 3
|
586
|
+
|
587
|
+
if index == 0 && column == 0 && !bom_flushed
|
588
|
+
flushed =
|
589
|
+
case token.type
|
590
|
+
when :BACK_REFERENCE, :INSTANCE_VARIABLE, :CLASS_VARIABLE,
|
591
|
+
:GLOBAL_VARIABLE, :NUMBERED_REFERENCE, :PERCENT_LOWER_I,
|
592
|
+
:PERCENT_LOWER_X, :PERCENT_LOWER_W, :PERCENT_UPPER_I,
|
593
|
+
:PERCENT_UPPER_W, :STRING_BEGIN
|
594
|
+
true
|
595
|
+
when :REGEXP_BEGIN, :SYMBOL_BEGIN
|
596
|
+
token.value.start_with?("%")
|
597
|
+
else
|
598
|
+
false
|
599
|
+
end
|
600
|
+
|
601
|
+
unless flushed
|
602
|
+
column -= 3
|
603
|
+
value = token.value
|
604
|
+
value.prepend(String.new("\xEF\xBB\xBF", encoding: value.encoding))
|
605
|
+
end
|
606
|
+
end
|
607
|
+
end
|
573
608
|
|
574
609
|
event = RIPPER.fetch(token.type)
|
575
610
|
value = token.value
|
@@ -580,13 +615,23 @@ module YARP
|
|
580
615
|
when :on___end__
|
581
616
|
EndContentToken.new([[lineno, column], event, value, lex_state])
|
582
617
|
when :on_comment
|
583
|
-
|
618
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
584
619
|
when :on_heredoc_end
|
585
620
|
# Heredoc end tokens can be emitted in an odd order, so we don't
|
586
621
|
# want to bother comparing the state on them.
|
587
|
-
|
588
|
-
when :
|
589
|
-
if lex_state == Ripper::EXPR_END
|
622
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
623
|
+
when :on_ident
|
624
|
+
if lex_state == Ripper::EXPR_END
|
625
|
+
# If we have an identifier that follows a method name like:
|
626
|
+
#
|
627
|
+
# def foo bar
|
628
|
+
#
|
629
|
+
# then Ripper will mark bar as END|LABEL if there is a local in a
|
630
|
+
# parent scope named bar because it hasn't pushed the local table
|
631
|
+
# yet. We do this more accurately, so we need to allow comparing
|
632
|
+
# against both END and END|LABEL.
|
633
|
+
ParamToken.new([[lineno, column], event, value, lex_state])
|
634
|
+
elsif lex_state == Ripper::EXPR_END | Ripper::EXPR_LABEL
|
590
635
|
# In the event that we're comparing identifiers, we're going to
|
591
636
|
# allow a little divergence. Ripper doesn't account for local
|
592
637
|
# variables introduced through named captures in regexes, and we
|
@@ -595,6 +640,8 @@ module YARP
|
|
595
640
|
else
|
596
641
|
Token.new([[lineno, column], event, value, lex_state])
|
597
642
|
end
|
643
|
+
when :on_embexpr_end
|
644
|
+
IgnoreStateToken.new([[lineno, column], event, value, lex_state])
|
598
645
|
when :on_ignored_nl
|
599
646
|
# Ignored newlines can occasionally have a LABEL state attached to
|
600
647
|
# them which doesn't actually impact anything. We don't mirror that
|
@@ -629,6 +676,26 @@ module YARP
|
|
629
676
|
previous_state
|
630
677
|
end
|
631
678
|
|
679
|
+
Token.new([[lineno, column], event, value, lex_state])
|
680
|
+
when :on_eof
|
681
|
+
previous_token = result_value[index - 1][0]
|
682
|
+
|
683
|
+
# If we're at the end of the file and the previous token was a
|
684
|
+
# comment and there is still whitespace after the comment, then
|
685
|
+
# Ripper will append a on_nl token (even though there isn't
|
686
|
+
# necessarily a newline). We mirror that here.
|
687
|
+
start_offset = previous_token.location.end_offset
|
688
|
+
end_offset = token.location.start_offset
|
689
|
+
|
690
|
+
if previous_token.type == :COMMENT && start_offset < end_offset
|
691
|
+
if bom
|
692
|
+
start_offset += 3
|
693
|
+
end_offset += 3
|
694
|
+
end
|
695
|
+
|
696
|
+
tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
|
697
|
+
end
|
698
|
+
|
632
699
|
Token.new([[lineno, column], event, value, lex_state])
|
633
700
|
else
|
634
701
|
Token.new([[lineno, column], event, value, lex_state])
|
@@ -713,7 +780,8 @@ module YARP
|
|
713
780
|
end
|
714
781
|
end
|
715
782
|
|
716
|
-
|
783
|
+
# Drop the EOF token from the list
|
784
|
+
tokens = tokens[0...-1]
|
717
785
|
|
718
786
|
# We sort by location to compare against Ripper's output
|
719
787
|
tokens.sort_by!(&:location)
|