yarp 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -1
- data/CONTRIBUTING.md +7 -0
- data/config.yml +154 -43
- data/docs/configuration.md +0 -1
- data/docs/mapping.md +91 -91
- data/docs/serialization.md +23 -20
- data/ext/yarp/api_node.c +1074 -391
- data/ext/yarp/extension.c +1 -1
- data/ext/yarp/extension.h +2 -2
- data/include/yarp/ast.h +501 -301
- data/include/yarp/diagnostic.h +198 -1
- data/include/yarp/node.h +0 -4
- data/include/yarp/util/yp_char.h +1 -1
- data/include/yarp/util/yp_constant_pool.h +11 -4
- data/include/yarp/version.h +2 -2
- data/lib/yarp/desugar_visitor.rb +19 -19
- data/lib/yarp/mutation_visitor.rb +22 -12
- data/lib/yarp/node.rb +2883 -293
- data/lib/yarp/parse_result/comments.rb +172 -0
- data/lib/yarp/parse_result/newlines.rb +60 -0
- data/lib/yarp/pattern.rb +239 -0
- data/lib/yarp/serialize.rb +152 -129
- data/lib/yarp.rb +104 -44
- data/src/diagnostic.c +254 -2
- data/src/node.c +901 -868
- data/src/prettyprint.c +380 -186
- data/src/serialize.c +325 -170
- data/src/unescape.c +20 -20
- data/src/util/yp_char.c +2 -7
- data/src/util/yp_constant_pool.c +41 -8
- data/src/util/yp_newline_list.c +5 -1
- data/src/util/yp_string_list.c +4 -1
- data/src/yarp.c +946 -818
- data/yarp.gemspec +4 -1
- metadata +6 -3
@@ -0,0 +1,172 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
class ParseResult
|
5
|
+
# When we've parsed the source, we have both the syntax tree and the list of
|
6
|
+
# comments that we found in the source. This class is responsible for
|
7
|
+
# walking the tree and finding the nearest location to attach each comment.
|
8
|
+
#
|
9
|
+
# It does this by first finding the nearest locations to each comment.
|
10
|
+
# Locations can either come from nodes directly or from location fields on
|
11
|
+
# nodes. For example, a `ClassNode` has an overall location encompassing the
|
12
|
+
# entire class, but it also has a location for the `class` keyword.
|
13
|
+
#
|
14
|
+
# Once the nearest locations are found, it determines which one to attach
|
15
|
+
# to. If it's a trailing comment (a comment on the same line as other source
|
16
|
+
# code), it will favor attaching to the nearest location that occurs before
|
17
|
+
# the comment. Otherwise it will favor attaching to the nearest location
|
18
|
+
# that is after the comment.
|
19
|
+
class Comments
|
20
|
+
# A target for attaching comments that is based on a specific node's
|
21
|
+
# location.
|
22
|
+
class NodeTarget
|
23
|
+
attr_reader :node
|
24
|
+
|
25
|
+
def initialize(node)
|
26
|
+
@node = node
|
27
|
+
end
|
28
|
+
|
29
|
+
def start_offset
|
30
|
+
node.location.start_offset
|
31
|
+
end
|
32
|
+
|
33
|
+
def end_offset
|
34
|
+
node.location.end_offset
|
35
|
+
end
|
36
|
+
|
37
|
+
def encloses?(comment)
|
38
|
+
start_offset <= comment.location.start_offset &&
|
39
|
+
comment.location.end_offset <= end_offset
|
40
|
+
end
|
41
|
+
|
42
|
+
def <<(comment)
|
43
|
+
node.location.comments << comment
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# A target for attaching comments that is based on a location field on a
|
48
|
+
# node. For example, the `end` token of a ClassNode.
|
49
|
+
class LocationTarget
|
50
|
+
attr_reader :location
|
51
|
+
|
52
|
+
def initialize(location)
|
53
|
+
@location = location
|
54
|
+
end
|
55
|
+
|
56
|
+
def start_offset
|
57
|
+
location.start_offset
|
58
|
+
end
|
59
|
+
|
60
|
+
def end_offset
|
61
|
+
location.end_offset
|
62
|
+
end
|
63
|
+
|
64
|
+
def encloses?(comment)
|
65
|
+
false
|
66
|
+
end
|
67
|
+
|
68
|
+
def <<(comment)
|
69
|
+
location.comments << comment
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
attr_reader :parse_result
|
74
|
+
|
75
|
+
def initialize(parse_result)
|
76
|
+
@parse_result = parse_result
|
77
|
+
end
|
78
|
+
|
79
|
+
def attach!
|
80
|
+
parse_result.comments.each do |comment|
|
81
|
+
preceding, enclosing, following = nearest_targets(parse_result.value, comment)
|
82
|
+
target =
|
83
|
+
if comment.trailing?
|
84
|
+
preceding || following || enclosing || NodeTarget.new(parse_result.value)
|
85
|
+
else
|
86
|
+
# If a comment exists on its own line, prefer a leading comment.
|
87
|
+
following || preceding || enclosing || NodeTarget.new(parse_result.value)
|
88
|
+
end
|
89
|
+
|
90
|
+
target << comment
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# Responsible for finding the nearest targets to the given comment within
|
97
|
+
# the context of the given encapsulating node.
|
98
|
+
def nearest_targets(node, comment)
|
99
|
+
comment_start = comment.location.start_offset
|
100
|
+
comment_end = comment.location.end_offset
|
101
|
+
|
102
|
+
targets = []
|
103
|
+
node.comment_targets.map do |value|
|
104
|
+
case value
|
105
|
+
when StatementsNode
|
106
|
+
targets.concat(value.body.map { |node| NodeTarget.new(node) })
|
107
|
+
when Node
|
108
|
+
targets << NodeTarget.new(value)
|
109
|
+
when Location
|
110
|
+
targets << LocationTarget.new(value)
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
targets.sort_by!(&:start_offset)
|
115
|
+
preceding = nil
|
116
|
+
following = nil
|
117
|
+
|
118
|
+
left = 0
|
119
|
+
right = targets.length
|
120
|
+
|
121
|
+
# This is a custom binary search that finds the nearest nodes to the
|
122
|
+
# given comment. When it finds a node that completely encapsulates the
|
123
|
+
# comment, it recurses downward into the tree.
|
124
|
+
while left < right
|
125
|
+
middle = (left + right) / 2
|
126
|
+
target = targets[middle]
|
127
|
+
|
128
|
+
target_start = target.start_offset
|
129
|
+
target_end = target.end_offset
|
130
|
+
|
131
|
+
if target.encloses?(comment)
|
132
|
+
# The comment is completely contained by this target. Abandon the
|
133
|
+
# binary search at this level.
|
134
|
+
return nearest_targets(target.node, comment)
|
135
|
+
end
|
136
|
+
|
137
|
+
if target_end <= comment_start
|
138
|
+
# This target falls completely before the comment. Because we will
|
139
|
+
# never consider this target or any targets before it again, this
|
140
|
+
# target must be the closest preceding target we have encountered so
|
141
|
+
# far.
|
142
|
+
preceding = target
|
143
|
+
left = middle + 1
|
144
|
+
next
|
145
|
+
end
|
146
|
+
|
147
|
+
if comment_end <= target_start
|
148
|
+
# This target falls completely after the comment. Because we will
|
149
|
+
# never consider this target or any targets after it again, this
|
150
|
+
# target must be the closest following target we have encountered so
|
151
|
+
# far.
|
152
|
+
following = target
|
153
|
+
right = middle
|
154
|
+
next
|
155
|
+
end
|
156
|
+
|
157
|
+
# This should only happen if there is a bug in this parser.
|
158
|
+
raise "Comment location overlaps with a target location"
|
159
|
+
end
|
160
|
+
|
161
|
+
[preceding, NodeTarget.new(node), following]
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
private_constant :Comments
|
166
|
+
|
167
|
+
# Attach the list of comments to their respective locations in the tree.
|
168
|
+
def attach_comments!
|
169
|
+
Comments.new(self).attach!
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
class ParseResult
|
5
|
+
# The :line tracepoint event gets fired whenever the Ruby VM encounters an
|
6
|
+
# expression on a new line. The types of expressions that can trigger this
|
7
|
+
# event are:
|
8
|
+
#
|
9
|
+
# * if statements
|
10
|
+
# * unless statements
|
11
|
+
# * nodes that are children of statements lists
|
12
|
+
#
|
13
|
+
# In order to keep track of the newlines, we have a list of offsets that
|
14
|
+
# come back from the parser. We assign these offsets to the first nodes that
|
15
|
+
# we find in the tree that are on those lines.
|
16
|
+
#
|
17
|
+
# Note that the logic in this file should be kept in sync with the Java
|
18
|
+
# MarkNewlinesVisitor, since that visitor is responsible for marking the
|
19
|
+
# newlines for JRuby/TruffleRuby.
|
20
|
+
class Newlines < Visitor
|
21
|
+
def initialize(newline_marked)
|
22
|
+
@newline_marked = newline_marked
|
23
|
+
end
|
24
|
+
|
25
|
+
def visit_block_node(node)
|
26
|
+
old_newline_marked = @newline_marked
|
27
|
+
@newline_marked = Array.new(old_newline_marked.size, false)
|
28
|
+
|
29
|
+
begin
|
30
|
+
super(node)
|
31
|
+
ensure
|
32
|
+
@newline_marked = old_newline_marked
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
alias_method :visit_lambda_node, :visit_block_node
|
37
|
+
|
38
|
+
def visit_if_node(node)
|
39
|
+
node.set_newline_flag(@newline_marked)
|
40
|
+
super(node)
|
41
|
+
end
|
42
|
+
|
43
|
+
alias_method :visit_unless_node, :visit_if_node
|
44
|
+
|
45
|
+
def visit_statements_node(node)
|
46
|
+
node.body.each do |child|
|
47
|
+
child.set_newline_flag(@newline_marked)
|
48
|
+
end
|
49
|
+
super(node)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private_constant :Newlines
|
54
|
+
|
55
|
+
# Walk the tree and mark nodes that are on a new line.
|
56
|
+
def mark_newlines!
|
57
|
+
value.accept(Newlines.new(Array.new(1 + source.offsets.size, false)))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/yarp/pattern.rb
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module YARP
|
4
|
+
# A pattern is an object that wraps a Ruby pattern matching expression. The
|
5
|
+
# expression would normally be passed to an `in` clause within a `case`
|
6
|
+
# expression or a rightward assignment expression. For example, in the
|
7
|
+
# following snippet:
|
8
|
+
#
|
9
|
+
# case node
|
10
|
+
# in ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]
|
11
|
+
# end
|
12
|
+
#
|
13
|
+
# the pattern is the `ConstantPathNode[...]` expression.
|
14
|
+
#
|
15
|
+
# The pattern gets compiled into an object that responds to #call by running
|
16
|
+
# the #compile method. This method itself will run back through YARP to
|
17
|
+
# parse the expression into a tree, then walk the tree to generate the
|
18
|
+
# necessary callable objects. For example, if you wanted to compile the
|
19
|
+
# expression above into a callable, you would:
|
20
|
+
#
|
21
|
+
# callable = YARP::Pattern.new("ConstantPathNode[ConstantReadNode[name: :YARP], ConstantReadNode[name: :Pattern]]").compile
|
22
|
+
# callable.call(node)
|
23
|
+
#
|
24
|
+
# The callable object returned by #compile is guaranteed to respond to #call
|
25
|
+
# with a single argument, which is the node to match against. It also is
|
26
|
+
# guaranteed to respond to #===, which means it itself can be used in a `case`
|
27
|
+
# expression, as in:
|
28
|
+
#
|
29
|
+
# case node
|
30
|
+
# when callable
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# If the query given to the initializer cannot be compiled into a valid
|
34
|
+
# matcher (either because of a syntax error or because it is using syntax we
|
35
|
+
# do not yet support) then a YARP::Pattern::CompilationError will be
|
36
|
+
# raised.
|
37
|
+
class Pattern
|
38
|
+
# Raised when the query given to a pattern is either invalid Ruby syntax or
|
39
|
+
# is using syntax that we don't yet support.
|
40
|
+
class CompilationError < StandardError
|
41
|
+
def initialize(repr)
|
42
|
+
super(<<~ERROR)
|
43
|
+
YARP was unable to compile the pattern you provided into a usable
|
44
|
+
expression. It failed on to understand the node represented by:
|
45
|
+
|
46
|
+
#{repr}
|
47
|
+
|
48
|
+
Note that not all syntax supported by Ruby's pattern matching syntax
|
49
|
+
is also supported by YARP's patterns. If you're using some syntax
|
50
|
+
that you believe should be supported, please open an issue on
|
51
|
+
GitHub at https://github.com/ruby/yarp/issues/new.
|
52
|
+
ERROR
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
attr_reader :query
|
57
|
+
|
58
|
+
def initialize(query)
|
59
|
+
@query = query
|
60
|
+
@compiled = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
def compile
|
64
|
+
result = YARP.parse("case nil\nin #{query}\nend")
|
65
|
+
compile_node(result.value.statements.body.last.conditions.last.pattern)
|
66
|
+
end
|
67
|
+
|
68
|
+
def scan(root)
|
69
|
+
return to_enum(__method__, root) unless block_given?
|
70
|
+
|
71
|
+
@compiled ||= compile
|
72
|
+
queue = [root]
|
73
|
+
|
74
|
+
while (node = queue.shift)
|
75
|
+
yield node if @compiled.call(node)
|
76
|
+
queue.concat(node.child_nodes.compact)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
# Shortcut for combining two procs into one that returns true if both return
|
83
|
+
# true.
|
84
|
+
def combine_and(left, right)
|
85
|
+
->(other) { left.call(other) && right.call(other) }
|
86
|
+
end
|
87
|
+
|
88
|
+
# Shortcut for combining two procs into one that returns true if either
|
89
|
+
# returns true.
|
90
|
+
def combine_or(left, right)
|
91
|
+
->(other) { left.call(other) || right.call(other) }
|
92
|
+
end
|
93
|
+
|
94
|
+
# Raise an error because the given node is not supported.
|
95
|
+
def compile_error(node)
|
96
|
+
raise CompilationError, node.inspect
|
97
|
+
end
|
98
|
+
|
99
|
+
# in [foo, bar, baz]
|
100
|
+
def compile_array_pattern_node(node)
|
101
|
+
compile_error(node) if !node.rest.nil? || node.posts.any?
|
102
|
+
|
103
|
+
constant = node.constant
|
104
|
+
compiled_constant = compile_node(constant) if constant
|
105
|
+
|
106
|
+
preprocessed = node.requireds.map { |required| compile_node(required) }
|
107
|
+
|
108
|
+
compiled_requireds = ->(other) do
|
109
|
+
deconstructed = other.deconstruct
|
110
|
+
|
111
|
+
deconstructed.length == preprocessed.length &&
|
112
|
+
preprocessed
|
113
|
+
.zip(deconstructed)
|
114
|
+
.all? { |(matcher, value)| matcher.call(value) }
|
115
|
+
end
|
116
|
+
|
117
|
+
if compiled_constant
|
118
|
+
combine_and(compiled_constant, compiled_requireds)
|
119
|
+
else
|
120
|
+
compiled_requireds
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# in foo | bar
|
125
|
+
def compile_alternation_pattern_node(node)
|
126
|
+
combine_or(compile_node(node.left), compile_node(node.right))
|
127
|
+
end
|
128
|
+
|
129
|
+
# in YARP::ConstantReadNode
|
130
|
+
def compile_constant_path_node(node)
|
131
|
+
parent = node.parent
|
132
|
+
|
133
|
+
if parent.is_a?(ConstantReadNode) && parent.slice == "YARP"
|
134
|
+
compile_node(node.child)
|
135
|
+
else
|
136
|
+
compile_error(node)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
# in ConstantReadNode
|
141
|
+
# in String
|
142
|
+
def compile_constant_read_node(node)
|
143
|
+
value = node.slice
|
144
|
+
|
145
|
+
if YARP.const_defined?(value, false)
|
146
|
+
clazz = YARP.const_get(value)
|
147
|
+
|
148
|
+
->(other) { clazz === other }
|
149
|
+
elsif Object.const_defined?(value, false)
|
150
|
+
clazz = Object.const_get(value)
|
151
|
+
|
152
|
+
->(other) { clazz === other }
|
153
|
+
else
|
154
|
+
compile_error(node)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# in InstanceVariableReadNode[name: Symbol]
|
159
|
+
# in { name: Symbol }
|
160
|
+
def compile_hash_pattern_node(node)
|
161
|
+
compile_error(node) unless node.kwrest.nil?
|
162
|
+
compiled_constant = compile_node(node.constant) if node.constant
|
163
|
+
|
164
|
+
preprocessed =
|
165
|
+
node.assocs.to_h do |assoc|
|
166
|
+
[assoc.key.unescaped.to_sym, compile_node(assoc.value)]
|
167
|
+
end
|
168
|
+
|
169
|
+
compiled_keywords = ->(other) do
|
170
|
+
deconstructed = other.deconstruct_keys(preprocessed.keys)
|
171
|
+
|
172
|
+
preprocessed.all? do |keyword, matcher|
|
173
|
+
deconstructed.key?(keyword) && matcher.call(deconstructed[keyword])
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
if compiled_constant
|
178
|
+
combine_and(compiled_constant, compiled_keywords)
|
179
|
+
else
|
180
|
+
compiled_keywords
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# in nil
|
185
|
+
def compile_nil_node(node)
|
186
|
+
->(attribute) { attribute.nil? }
|
187
|
+
end
|
188
|
+
|
189
|
+
# in /foo/
|
190
|
+
def compile_regular_expression_node(node)
|
191
|
+
regexp = Regexp.new(node.unescaped, node.closing[1..])
|
192
|
+
|
193
|
+
->(attribute) { regexp === attribute }
|
194
|
+
end
|
195
|
+
|
196
|
+
# in ""
|
197
|
+
# in "foo"
|
198
|
+
def compile_string_node(node)
|
199
|
+
string = node.unescaped
|
200
|
+
|
201
|
+
->(attribute) { string === attribute }
|
202
|
+
end
|
203
|
+
|
204
|
+
# in :+
|
205
|
+
# in :foo
|
206
|
+
def compile_symbol_node(node)
|
207
|
+
symbol = node.unescaped.to_sym
|
208
|
+
|
209
|
+
->(attribute) { symbol === attribute }
|
210
|
+
end
|
211
|
+
|
212
|
+
# Compile any kind of node. Dispatch out to the individual compilation
|
213
|
+
# methods based on the type of node.
|
214
|
+
def compile_node(node)
|
215
|
+
case node
|
216
|
+
when AlternationPatternNode
|
217
|
+
compile_alternation_pattern_node(node)
|
218
|
+
when ArrayPatternNode
|
219
|
+
compile_array_pattern_node(node)
|
220
|
+
when ConstantPathNode
|
221
|
+
compile_constant_path_node(node)
|
222
|
+
when ConstantReadNode
|
223
|
+
compile_constant_read_node(node)
|
224
|
+
when HashPatternNode
|
225
|
+
compile_hash_pattern_node(node)
|
226
|
+
when NilNode
|
227
|
+
compile_nil_node(node)
|
228
|
+
when RegularExpressionNode
|
229
|
+
compile_regular_expression_node(node)
|
230
|
+
when StringNode
|
231
|
+
compile_string_node(node)
|
232
|
+
when SymbolNode
|
233
|
+
compile_symbol_node(node)
|
234
|
+
else
|
235
|
+
compile_error(node)
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|