rubocop-ast 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rubocop/ast.rb +3 -6
- data/lib/rubocop/ast/node_pattern.rb +883 -0
- data/lib/rubocop/ast/processed_source.rb +203 -0
- data/lib/rubocop/ast/token.rb +116 -0
- data/lib/rubocop/ast/version.rb +1 -1
- metadata +5 -6
- data/lib/rubocop/error.rb +0 -34
- data/lib/rubocop/node_pattern.rb +0 -881
- data/lib/rubocop/processed_source.rb +0 -211
- data/lib/rubocop/token.rb +0 -114
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0c196be6c58699fcb84209860508c96e69e8122adb8e14d6641134009802712
|
4
|
+
data.tar.gz: a2d50c781f612349b4aa7e24a4ea4008c359ce766a6f5e0f7e925ac78d57dbb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d263c6a0ab978cb1b81dde253ce752c2b64af9e94c59ac6a6a364aef3cec37c03028e4d7c00086c11b8b46abae9373edad636528782f84ad914ea527b644ec0
|
7
|
+
data.tar.gz: edeefb4a74f678920907e130ea6a7fea86b4c67cf9a9bbaefa21ef231738c0de541d61de5d9c234384e46b3243a74ffaac84288b2c68c49e3178cb39d2d96b37
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Contains the classes needed by [RuboCop](https://github.com/rubocop-hq/rubocop) to deal with Ruby's AST, in particular:
|
7
7
|
* `RuboCop::AST::Node`
|
8
|
-
* `RuboCop::NodePattern` ([doc](manual/node_pattern.md))
|
8
|
+
* `RuboCop::AST::NodePattern` ([doc](manual/node_pattern.md))
|
9
9
|
|
10
10
|
This gem may be used independently from the main RuboCop gem.
|
11
11
|
|
@@ -25,7 +25,7 @@ gem 'rubocop-ast'
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::NodePattern`](manual/node_pattern.md)
|
28
|
+
Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::AST::NodePattern`](manual/node_pattern.md)
|
29
29
|
|
30
30
|
## Contributing
|
31
31
|
|
data/lib/rubocop/ast.rb
CHANGED
@@ -3,9 +3,7 @@
|
|
3
3
|
require 'parser'
|
4
4
|
require 'forwardable'
|
5
5
|
|
6
|
-
require_relative '
|
7
|
-
require_relative 'node_pattern'
|
8
|
-
|
6
|
+
require_relative 'ast/node_pattern'
|
9
7
|
require_relative 'ast/sexp'
|
10
8
|
require_relative 'ast/node'
|
11
9
|
require_relative 'ast/node/mixin/method_identifier_predicates'
|
@@ -56,8 +54,7 @@ require_relative 'ast/node/when_node'
|
|
56
54
|
require_relative 'ast/node/while_node'
|
57
55
|
require_relative 'ast/node/yield_node'
|
58
56
|
require_relative 'ast/builder'
|
57
|
+
require_relative 'ast/processed_source'
|
58
|
+
require_relative 'ast/token'
|
59
59
|
require_relative 'ast/traversal'
|
60
60
|
require_relative 'ast/version'
|
61
|
-
|
62
|
-
require_relative 'token'
|
63
|
-
require_relative 'processed_source'
|
@@ -0,0 +1,883 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
require 'erb'
|
5
|
+
|
6
|
+
# rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity
|
7
|
+
module RuboCop
|
8
|
+
module AST
|
9
|
+
# This class performs a pattern-matching operation on an AST node.
|
10
|
+
#
|
11
|
+
# Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then
|
12
|
+
# pass an AST node to `NodePattern#match`. Alternatively, use one of the class
|
13
|
+
# macros in `NodePattern::Macros` to define your own pattern-matching method.
|
14
|
+
#
|
15
|
+
# If the match fails, `nil` will be returned. If the match succeeds, the
|
16
|
+
# return value depends on whether a block was provided to `#match`, and
|
17
|
+
# whether the pattern contained any "captures" (values which are extracted
|
18
|
+
# from a matching AST.)
|
19
|
+
#
|
20
|
+
# - With block: #match yields the captures (if any) and passes the return
|
21
|
+
# value of the block through.
|
22
|
+
# - With no block, but one capture: the capture is returned.
|
23
|
+
# - With no block, but multiple captures: captures are returned as an array.
|
24
|
+
# - With no block and no captures: #match returns `true`.
|
25
|
+
#
|
26
|
+
# ## Pattern string format examples
|
27
|
+
#
|
28
|
+
# ':sym' # matches a literal symbol
|
29
|
+
# '1' # matches a literal integer
|
30
|
+
# 'nil' # matches a literal nil
|
31
|
+
# 'send' # matches (send ...)
|
32
|
+
# '(send)' # matches (send)
|
33
|
+
# '(send ...)' # matches (send ...)
|
34
|
+
# '(op-asgn)' # node types with hyphenated names also work
|
35
|
+
# '{send class}' # matches (send ...) or (class ...)
|
36
|
+
# '({send class})' # matches (send) or (class)
|
37
|
+
# '(send const)' # matches (send (const ...))
|
38
|
+
# '(send _ :new)' # matches (send <anything> :new)
|
39
|
+
# '(send $_ :new)' # as above, but whatever matches the $_ is captured
|
40
|
+
# '(send $_ $_)' # you can use as many captures as you want
|
41
|
+
# '(send !const ...)' # ! negates the next part of the pattern
|
42
|
+
# '$(send const ...)' # arbitrary matching can be performed on a capture
|
43
|
+
# '(send _recv _msg)' # wildcards can be named (for readability)
|
44
|
+
# '(send ... :new)' # you can match against the last children
|
45
|
+
# '(array <str sym>)' # you can match children in any order. This
|
46
|
+
# # would match `['x', :y]` as well as `[:y, 'x']
|
47
|
+
# '(_ <str sym ...>)' # will match if arguments have at least a `str` and
|
48
|
+
# # a `sym` node, but can have more.
|
49
|
+
# '(array <$str $_>)' # captures are in the order of the pattern,
|
50
|
+
# # irrespective of the actual order of the children
|
51
|
+
# '(array int*)' # will match an array of 0 or more integers
|
52
|
+
# '(array int ?)' # will match 0 or 1 integer.
|
53
|
+
# # Note: Space needed to distinguish from int?
|
54
|
+
# '(array int+)' # will match an array of 1 or more integers
|
55
|
+
# '(array (int $_)+)' # as above and will capture the numbers in an array
|
56
|
+
# '(send $...)' # capture all the children as an array
|
57
|
+
# '(send $... int)' # capture all children but the last as an array
|
58
|
+
# '(send _x :+ _x)' # unification is performed on named wildcards
|
59
|
+
# # (like Prolog variables...)
|
60
|
+
# # (#== is used to see if values unify)
|
61
|
+
# '(int odd?)' # words which end with a ? are predicate methods,
|
62
|
+
# # are are called on the target to see if it matches
|
63
|
+
# # any Ruby method which the matched object supports
|
64
|
+
# # can be used
|
65
|
+
# # if a truthy value is returned, the match succeeds
|
66
|
+
# '(int [!1 !2])' # [] contains multiple patterns, ALL of which must
|
67
|
+
# # match in that position
|
68
|
+
# # in other words, while {} is pattern union (logical
|
69
|
+
# # OR), [] is intersection (logical AND)
|
70
|
+
# '(send %1 _)' # % stands for a parameter which must be supplied to
|
71
|
+
# # #match at matching time
|
72
|
+
# # it will be compared to the corresponding value in
|
73
|
+
# # the AST using #==
|
74
|
+
# # a bare '%' is the same as '%1'
|
75
|
+
# # the number of extra parameters passed to #match
|
76
|
+
# # must equal the highest % value in the pattern
|
77
|
+
# # for consistency, %0 is the 'root node' which is
|
78
|
+
# # passed as the 1st argument to #match, where the
|
79
|
+
# # matching process starts
|
80
|
+
# '^^send' # each ^ ascends one level in the AST
|
81
|
+
# # so this matches against the grandparent node
|
82
|
+
# '`send' # descends any number of level in the AST
|
83
|
+
# # so this matches against any descendant node
|
84
|
+
# '#method' # we call this a 'funcall'; it calls a method in the
|
85
|
+
# # context where a pattern-matching method is defined
|
86
|
+
# # if that returns a truthy value, the match succeeds
|
87
|
+
# 'equal?(%1)' # predicates can be given 1 or more extra args
|
88
|
+
# '#method(%0, 1)' # funcalls can also be given 1 or more extra args
|
89
|
+
#
|
90
|
+
# You can nest arbitrarily deep:
|
91
|
+
#
|
92
|
+
# # matches node parsed from 'Const = Class.new' or 'Const = Module.new':
|
93
|
+
# '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))'
|
94
|
+
# # matches a node parsed from an 'if', with a '==' comparison,
|
95
|
+
# # and no 'else' branch:
|
96
|
+
# '(if (send _ :== _) _ nil?)'
|
97
|
+
#
|
98
|
+
# Note that patterns like 'send' are implemented by calling `#send_type?` on
|
99
|
+
# the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`,
|
100
|
+
# and so on. Therefore, if you add methods which are named like
|
101
|
+
# `#prefix_type?` to the AST node class, then 'prefix' will become usable as
|
102
|
+
# a pattern.
|
103
|
+
#
|
104
|
+
# Also note that if you need a "guard clause" to protect against possible nils
|
105
|
+
# in a certain place in the AST, you can do it like this: `[!nil <pattern>]`
|
106
|
+
#
|
107
|
+
# The compiler code is very simple; don't be afraid to read through it!
|
108
|
+
class NodePattern
|
109
|
+
# @private
|
110
|
+
Invalid = Class.new(StandardError)
|
111
|
+
|
112
|
+
# @private
|
113
|
+
# Builds Ruby code which implements a pattern
|
114
|
+
class Compiler
|
115
|
+
SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze
|
116
|
+
IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze
|
117
|
+
META = Regexp.union(
|
118
|
+
%w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?"
|
119
|
+
).freeze
|
120
|
+
NUMBER = /-?\d+(?:\.\d+)?/.freeze
|
121
|
+
STRING = /".+?"/.freeze
|
122
|
+
METHOD_NAME = /\#?#{IDENTIFIER}[\!\?]?\(?/.freeze
|
123
|
+
PARAM_NUMBER = /%\d*/.freeze
|
124
|
+
|
125
|
+
SEPARATORS = /[\s]+/.freeze
|
126
|
+
TOKENS = Regexp.union(META, PARAM_NUMBER, NUMBER,
|
127
|
+
METHOD_NAME, SYMBOL, STRING)
|
128
|
+
|
129
|
+
TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze
|
130
|
+
|
131
|
+
NODE = /\A#{IDENTIFIER}\Z/.freeze
|
132
|
+
PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze
|
133
|
+
WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze
|
134
|
+
|
135
|
+
FUNCALL = /\A\##{METHOD_NAME}/.freeze
|
136
|
+
LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze
|
137
|
+
PARAM = /\A#{PARAM_NUMBER}\Z/.freeze
|
138
|
+
CLOSING = /\A(?:\)|\}|\])\Z/.freeze
|
139
|
+
|
140
|
+
REST = '...'
|
141
|
+
CAPTURED_REST = '$...'
|
142
|
+
|
143
|
+
attr_reader :match_code, :tokens, :captures
|
144
|
+
|
145
|
+
SEQ_HEAD_INDEX = -1
|
146
|
+
|
147
|
+
# Placeholders while compiling, see with_..._context methods
|
148
|
+
CUR_PLACEHOLDER = '@@@cur'
|
149
|
+
CUR_NODE = "#{CUR_PLACEHOLDER} node@@@"
|
150
|
+
CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@"
|
151
|
+
SEQ_HEAD_GUARD = '@@@seq guard head@@@'
|
152
|
+
|
153
|
+
line = __LINE__
|
154
|
+
ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
|
155
|
+
<% if capture_rest %>(<%= capture_rest %> = []) && <% end -%>
|
156
|
+
<% if capture_all %>(<%= capture_all %> = <% end -%>
|
157
|
+
<%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%>
|
158
|
+
.each_with_object({}) { |<%= child %>, <%= matched %>|
|
159
|
+
case
|
160
|
+
<% patterns.each_with_index do |pattern, i| -%>
|
161
|
+
when !<%= matched %>[<%= i %>] && <%=
|
162
|
+
with_context(pattern, child, use_temp_node: false)
|
163
|
+
%> then <%= matched %>[<%= i %>] = true
|
164
|
+
<% end -%>
|
165
|
+
<% if !rest %> else break({})
|
166
|
+
<% elsif capture_rest %> else <%= capture_rest %> << <%= child %>
|
167
|
+
<% end -%>
|
168
|
+
end
|
169
|
+
}.size == <%= patterns.size -%>
|
170
|
+
RUBY
|
171
|
+
ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1]
|
172
|
+
|
173
|
+
line = __LINE__
|
174
|
+
REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
|
175
|
+
<% if captured %>(<%= accumulate %> = Array.new) && <% end %>
|
176
|
+
<%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>|
|
177
|
+
<%= with_context(expr, child, use_temp_node: false) %><% if captured %>&&
|
178
|
+
<%= accumulate %>.push(<%= captured %>)<% end %>
|
179
|
+
end <% if captured %>&&
|
180
|
+
(<%= captured %> = if <%= accumulate %>.empty?
|
181
|
+
<%= captured %>.map{[]} # Transpose hack won't work for empty case
|
182
|
+
else
|
183
|
+
<%= accumulate %>.transpose
|
184
|
+
end) <% end -%>
|
185
|
+
RUBY
|
186
|
+
REPEATED_TEMPLATE.location = [__FILE__, line + 1]
|
187
|
+
|
188
|
+
def initialize(str, node_var = 'node0')
|
189
|
+
@string = str
|
190
|
+
@root = node_var
|
191
|
+
|
192
|
+
@temps = 0 # avoid name clashes between temp variables
|
193
|
+
@captures = 0 # number of captures seen
|
194
|
+
@unify = {} # named wildcard -> temp variable
|
195
|
+
@params = 0 # highest % (param) number seen
|
196
|
+
run(node_var)
|
197
|
+
end
|
198
|
+
|
199
|
+
def run(node_var)
|
200
|
+
@tokens = Compiler.tokens(@string)
|
201
|
+
|
202
|
+
@match_code = with_context(compile_expr, node_var, use_temp_node: false)
|
203
|
+
@match_code.prepend("(captures = Array.new(#{@captures})) && ") \
|
204
|
+
if @captures.positive?
|
205
|
+
|
206
|
+
fail_due_to('unbalanced pattern') unless tokens.empty?
|
207
|
+
end
|
208
|
+
|
209
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
210
|
+
def compile_expr(token = tokens.shift)
|
211
|
+
# read a single pattern-matching expression from the token stream,
|
212
|
+
# return Ruby code which performs the corresponding matching operation
|
213
|
+
#
|
214
|
+
# the 'pattern-matching' expression may be a composite which
|
215
|
+
# contains an arbitrary number of sub-expressions, but that composite
|
216
|
+
# must all have precedence higher or equal to that of `&&`
|
217
|
+
#
|
218
|
+
# Expressions may use placeholders like:
|
219
|
+
# CUR_NODE: Ruby code that evaluates to an AST node
|
220
|
+
# CUR_ELEMENT: Either the node or the type if in first element of
|
221
|
+
# a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...")
|
222
|
+
case token
|
223
|
+
when '(' then compile_seq
|
224
|
+
when '{' then compile_union
|
225
|
+
when '[' then compile_intersect
|
226
|
+
when '!' then compile_negation
|
227
|
+
when '$' then compile_capture
|
228
|
+
when '^' then compile_ascend
|
229
|
+
when '`' then compile_descend
|
230
|
+
when WILDCARD then compile_wildcard(token[1..-1])
|
231
|
+
when FUNCALL then compile_funcall(token)
|
232
|
+
when LITERAL then compile_literal(token)
|
233
|
+
when PREDICATE then compile_predicate(token)
|
234
|
+
when NODE then compile_nodetype(token)
|
235
|
+
when PARAM then compile_param(token[1..-1])
|
236
|
+
when CLOSING then fail_due_to("#{token} in invalid position")
|
237
|
+
when nil then fail_due_to('pattern ended prematurely')
|
238
|
+
else fail_due_to("invalid token #{token.inspect}")
|
239
|
+
end
|
240
|
+
end
|
241
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
242
|
+
|
243
|
+
def tokens_until(stop, what)
|
244
|
+
return to_enum __method__, stop, what unless block_given?
|
245
|
+
|
246
|
+
fail_due_to("empty #{what}") if tokens.first == stop && what
|
247
|
+
yield until tokens.first == stop
|
248
|
+
tokens.shift
|
249
|
+
end
|
250
|
+
|
251
|
+
def compile_seq
|
252
|
+
terms = tokens_until(')', 'sequence').map { variadic_seq_term }
|
253
|
+
Sequence.new(self, *terms).compile
|
254
|
+
end
|
255
|
+
|
256
|
+
def compile_guard_clause
|
257
|
+
"#{CUR_NODE}.is_a?(RuboCop::AST::Node)"
|
258
|
+
end
|
259
|
+
|
260
|
+
def variadic_seq_term
|
261
|
+
token = tokens.shift
|
262
|
+
case token
|
263
|
+
when CAPTURED_REST then compile_captured_ellipsis
|
264
|
+
when REST then compile_ellipsis
|
265
|
+
when '$<' then compile_any_order(next_capture)
|
266
|
+
when '<' then compile_any_order
|
267
|
+
else compile_repeated_expr(token)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def compile_repeated_expr(token)
|
272
|
+
before = @captures
|
273
|
+
expr = compile_expr(token)
|
274
|
+
min, max = parse_repetition_token
|
275
|
+
return [1, expr] if min.nil?
|
276
|
+
|
277
|
+
if @captures != before
|
278
|
+
captured = "captures[#{before}...#{@captures}]"
|
279
|
+
accumulate = next_temp_variable(:accumulate)
|
280
|
+
end
|
281
|
+
arity = min..max || Float::INFINITY
|
282
|
+
|
283
|
+
[arity, repeated_generator(expr, captured, accumulate)]
|
284
|
+
end
|
285
|
+
|
286
|
+
def repeated_generator(expr, captured, accumulate)
|
287
|
+
with_temp_variables do |child|
|
288
|
+
lambda do |range|
|
289
|
+
fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX
|
290
|
+
REPEATED_TEMPLATE.result(binding)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def parse_repetition_token
|
296
|
+
case tokens.first
|
297
|
+
when '*' then min = 0
|
298
|
+
when '+' then min = 1
|
299
|
+
when '?' then min = 0
|
300
|
+
max = 1
|
301
|
+
else return
|
302
|
+
end
|
303
|
+
tokens.shift
|
304
|
+
[min, max]
|
305
|
+
end
|
306
|
+
|
307
|
+
# @private
|
308
|
+
# Builds Ruby code for a sequence
|
309
|
+
# (head *first_terms variadic_term *last_terms)
|
310
|
+
class Sequence < SimpleDelegator
|
311
|
+
def initialize(compiler, *arity_term_list)
|
312
|
+
@arities, @terms = arity_term_list.transpose
|
313
|
+
|
314
|
+
super(compiler)
|
315
|
+
@variadic_index = @arities.find_index { |a| a.is_a?(Range) }
|
316
|
+
fail_due_to 'multiple variable patterns in same sequence' \
|
317
|
+
if @variadic_index && !@arities.one? { |a| a.is_a?(Range) }
|
318
|
+
end
|
319
|
+
|
320
|
+
def compile
|
321
|
+
[
|
322
|
+
compile_guard_clause,
|
323
|
+
compile_child_nb_guard,
|
324
|
+
compile_seq_head,
|
325
|
+
*compile_first_terms,
|
326
|
+
compile_variadic_term,
|
327
|
+
*compile_last_terms
|
328
|
+
].compact.join(" &&\n") << SEQ_HEAD_GUARD
|
329
|
+
end
|
330
|
+
|
331
|
+
private
|
332
|
+
|
333
|
+
def first_terms_arity
|
334
|
+
first_terms_range { |r| @arities[r].inject(0, :+) } || 0
|
335
|
+
end
|
336
|
+
|
337
|
+
def last_terms_arity
|
338
|
+
last_terms_range { |r| @arities[r].inject(0, :+) } || 0
|
339
|
+
end
|
340
|
+
|
341
|
+
def variadic_term_min_arity
|
342
|
+
@variadic_index ? @arities[@variadic_index].begin : 0
|
343
|
+
end
|
344
|
+
|
345
|
+
def first_terms_range
|
346
|
+
yield 1..(@variadic_index || @terms.size) - 1 if seq_head?
|
347
|
+
end
|
348
|
+
|
349
|
+
def last_terms_range
|
350
|
+
yield @variadic_index + 1...@terms.size if @variadic_index
|
351
|
+
end
|
352
|
+
|
353
|
+
def seq_head?
|
354
|
+
@variadic_index != 0
|
355
|
+
end
|
356
|
+
|
357
|
+
def compile_child_nb_guard
|
358
|
+
fixed = first_terms_arity + last_terms_arity
|
359
|
+
min = fixed + variadic_term_min_arity
|
360
|
+
op = if @variadic_index
|
361
|
+
max_variadic = @arities[@variadic_index].end
|
362
|
+
if max_variadic != Float::INFINITY
|
363
|
+
range = min..fixed + max_variadic
|
364
|
+
return "(#{range}).cover?(#{CUR_NODE}.children.size)"
|
365
|
+
end
|
366
|
+
'>='
|
367
|
+
else
|
368
|
+
'=='
|
369
|
+
end
|
370
|
+
"#{CUR_NODE}.children.size #{op} #{min}"
|
371
|
+
end
|
372
|
+
|
373
|
+
def term(index, range)
|
374
|
+
t = @terms[index]
|
375
|
+
if t.respond_to? :call
|
376
|
+
t.call(range)
|
377
|
+
else
|
378
|
+
with_child_context(t, range.begin)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def compile_seq_head
|
383
|
+
return unless seq_head?
|
384
|
+
|
385
|
+
fail_due_to 'sequences cannot start with <' \
|
386
|
+
if @terms[0].respond_to? :call
|
387
|
+
|
388
|
+
with_seq_head_context(@terms[0])
|
389
|
+
end
|
390
|
+
|
391
|
+
def compile_first_terms
|
392
|
+
first_terms_range { |range| compile_terms(range, 0) }
|
393
|
+
end
|
394
|
+
|
395
|
+
def compile_last_terms
|
396
|
+
last_terms_range { |r| compile_terms(r, -last_terms_arity) }
|
397
|
+
end
|
398
|
+
|
399
|
+
def compile_terms(index_range, start)
|
400
|
+
index_range.map do |i|
|
401
|
+
current = start
|
402
|
+
start += @arities.fetch(i)
|
403
|
+
term(i, current..start - 1)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def compile_variadic_term
|
408
|
+
variadic_arity { |arity| term(@variadic_index, arity) }
|
409
|
+
end
|
410
|
+
|
411
|
+
def variadic_arity
|
412
|
+
return unless @variadic_index
|
413
|
+
|
414
|
+
first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX
|
415
|
+
yield first..-last_terms_arity - 1
|
416
|
+
end
|
417
|
+
end
|
418
|
+
private_constant :Sequence
|
419
|
+
|
420
|
+
def compile_captured_ellipsis
|
421
|
+
capture = next_capture
|
422
|
+
block = lambda { |range|
|
423
|
+
# Consider ($...) like (_ $...):
|
424
|
+
range = 0..range.end if range.begin == SEQ_HEAD_INDEX
|
425
|
+
"(#{capture} = #{CUR_NODE}.children[#{range}])"
|
426
|
+
}
|
427
|
+
[0..Float::INFINITY, block]
|
428
|
+
end
|
429
|
+
|
430
|
+
def compile_ellipsis
|
431
|
+
[0..Float::INFINITY, 'true']
|
432
|
+
end
|
433
|
+
|
434
|
+
# rubocop:disable Metrics/AbcSize
|
435
|
+
# rubocop:disable Metrics/MethodLength
|
436
|
+
def compile_any_order(capture_all = nil)
|
437
|
+
rest = capture_rest = nil
|
438
|
+
patterns = []
|
439
|
+
with_temp_variables do |child, matched|
|
440
|
+
tokens_until('>', 'any child') do
|
441
|
+
fail_due_to 'ellipsis must be at the end of <>' if rest
|
442
|
+
token = tokens.shift
|
443
|
+
case token
|
444
|
+
when CAPTURED_REST then rest = capture_rest = next_capture
|
445
|
+
when REST then rest = true
|
446
|
+
else patterns << compile_expr(token)
|
447
|
+
end
|
448
|
+
end
|
449
|
+
[rest ? patterns.size..Float::INFINITY : patterns.size,
|
450
|
+
->(range) { ANY_ORDER_TEMPLATE.result(binding) }]
|
451
|
+
end
|
452
|
+
end
|
453
|
+
# rubocop:enable Metrics/MethodLength
|
454
|
+
# rubocop:enable Metrics/AbcSize
|
455
|
+
|
456
|
+
def insure_same_captures(enum, what)
|
457
|
+
return to_enum __method__, enum, what unless block_given?
|
458
|
+
|
459
|
+
captures_before = captures_after = nil
|
460
|
+
enum.each do
|
461
|
+
captures_before ||= @captures
|
462
|
+
@captures = captures_before
|
463
|
+
yield
|
464
|
+
captures_after ||= @captures
|
465
|
+
fail_due_to("each #{what} must have same # of captures") if captures_after != @captures
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def access_unify(name)
|
470
|
+
var = @unify[name]
|
471
|
+
|
472
|
+
if var == :forbidden_unification
|
473
|
+
fail_due_to "Wildcard #{name} was first seen in a subset of a" \
|
474
|
+
" union and can't be used outside that union"
|
475
|
+
end
|
476
|
+
var
|
477
|
+
end
|
478
|
+
|
479
|
+
def forbid_unification(*names)
|
480
|
+
names.each do |name|
|
481
|
+
@unify[name] = :forbidden_unification
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
486
|
+
def unify_in_union(enum)
|
487
|
+
# We need to reset @unify before each branch is processed.
|
488
|
+
# Moreover we need to keep track of newly encountered wildcards.
|
489
|
+
# Var `new_unify_intersection` will hold those that are encountered
|
490
|
+
# in all branches; these are not a problem.
|
491
|
+
# Var `partial_unify` will hold those encountered in only a subset
|
492
|
+
# of the branches; these can't be used outside of the union.
|
493
|
+
|
494
|
+
return to_enum __method__, enum unless block_given?
|
495
|
+
|
496
|
+
new_unify_intersection = nil
|
497
|
+
partial_unify = []
|
498
|
+
unify_before = @unify.dup
|
499
|
+
|
500
|
+
result = enum.each do |e|
|
501
|
+
@unify = unify_before.dup if new_unify_intersection
|
502
|
+
yield e
|
503
|
+
new_unify = @unify.keys - unify_before.keys
|
504
|
+
if new_unify_intersection.nil?
|
505
|
+
# First iteration
|
506
|
+
new_unify_intersection = new_unify
|
507
|
+
else
|
508
|
+
union = new_unify_intersection | new_unify
|
509
|
+
new_unify_intersection &= new_unify
|
510
|
+
partial_unify |= union - new_unify_intersection
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
# At this point, all members of `new_unify_intersection` can be used
|
515
|
+
# for unification outside of the union, but partial_unify may not
|
516
|
+
|
517
|
+
forbid_unification(*partial_unify)
|
518
|
+
|
519
|
+
result
|
520
|
+
end
|
521
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
522
|
+
|
523
|
+
def compile_union
|
524
|
+
# we need to ensure that each branch of the {} contains the same
|
525
|
+
# number of captures (since only one branch of the {} can actually
|
526
|
+
# match, the same variables are used to hold the captures for each
|
527
|
+
# branch)
|
528
|
+
enum = tokens_until('}', 'union')
|
529
|
+
enum = unify_in_union(enum)
|
530
|
+
terms = insure_same_captures(enum, 'branch of {}')
|
531
|
+
.map { compile_expr }
|
532
|
+
|
533
|
+
"(#{terms.join(' || ')})"
|
534
|
+
end
|
535
|
+
|
536
|
+
def compile_intersect
|
537
|
+
tokens_until(']', 'intersection')
|
538
|
+
.map { compile_expr }
|
539
|
+
.join(' && ')
|
540
|
+
end
|
541
|
+
|
542
|
+
def compile_capture
|
543
|
+
"(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})"
|
544
|
+
end
|
545
|
+
|
546
|
+
def compile_negation
|
547
|
+
"!(#{compile_expr})"
|
548
|
+
end
|
549
|
+
|
550
|
+
def compile_ascend
|
551
|
+
with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent")
|
552
|
+
end
|
553
|
+
|
554
|
+
def compile_descend
|
555
|
+
with_temp_variables do |descendant|
|
556
|
+
pattern = with_context(compile_expr, descendant,
|
557
|
+
use_temp_node: false)
|
558
|
+
[
|
559
|
+
"RuboCop::AST::NodePattern.descend(#{CUR_ELEMENT}).",
|
560
|
+
"any? do |#{descendant}|",
|
561
|
+
" #{pattern}",
|
562
|
+
'end'
|
563
|
+
].join("\n")
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
def compile_wildcard(name)
|
568
|
+
if name.empty?
|
569
|
+
'true'
|
570
|
+
elsif @unify.key?(name)
|
571
|
+
# we have already seen a wildcard with this name before
|
572
|
+
# so the value it matched the first time will already be stored
|
573
|
+
# in a temp. check if this value matches the one stored in the temp
|
574
|
+
"#{CUR_ELEMENT} == #{access_unify(name)}"
|
575
|
+
else
|
576
|
+
n = @unify[name] = "unify_#{name.gsub('-', '__')}"
|
577
|
+
# double assign to avoid "assigned but unused variable"
|
578
|
+
"(#{n} = #{CUR_ELEMENT}; " \
|
579
|
+
"#{n} = #{n}; true)"
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
def compile_literal(literal)
|
584
|
+
"#{CUR_ELEMENT} == #{literal}"
|
585
|
+
end
|
586
|
+
|
587
|
+
def compile_predicate(predicate)
|
588
|
+
if predicate.end_with?('(') # is there an arglist?
|
589
|
+
args = compile_args(tokens)
|
590
|
+
predicate = predicate[0..-2] # drop the trailing (
|
591
|
+
"#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})"
|
592
|
+
else
|
593
|
+
"#{CUR_ELEMENT}.#{predicate}"
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
def compile_funcall(method)
|
598
|
+
# call a method in the context which this pattern-matching
|
599
|
+
# code is used in. pass target value as an argument
|
600
|
+
method = method[1..-1] # drop the leading #
|
601
|
+
if method.end_with?('(') # is there an arglist?
|
602
|
+
args = compile_args(tokens)
|
603
|
+
method = method[0..-2] # drop the trailing (
|
604
|
+
"#{method}(#{CUR_ELEMENT},#{args.join(',')})"
|
605
|
+
else
|
606
|
+
"#{method}(#{CUR_ELEMENT})"
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
def compile_nodetype(type)
|
611
|
+
"#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?"
|
612
|
+
end
|
613
|
+
|
614
|
+
def compile_param(number)
|
615
|
+
"#{CUR_ELEMENT} == #{get_param(number)}"
|
616
|
+
end
|
617
|
+
|
618
|
+
def compile_args(tokens)
|
619
|
+
index = tokens.find_index { |token| token == ')' }
|
620
|
+
|
621
|
+
tokens.slice!(0..index).each_with_object([]) do |token, args|
|
622
|
+
next if [')', ','].include?(token)
|
623
|
+
|
624
|
+
args << compile_arg(token)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def compile_arg(token)
|
629
|
+
case token
|
630
|
+
when WILDCARD then
|
631
|
+
name = token[1..-1]
|
632
|
+
access_unify(name) || fail_due_to('invalid in arglist: ' + token)
|
633
|
+
when LITERAL then token
|
634
|
+
when PARAM then get_param(token[1..-1])
|
635
|
+
when CLOSING then fail_due_to("#{token} in invalid position")
|
636
|
+
when nil then fail_due_to('pattern ended prematurely')
|
637
|
+
else fail_due_to("invalid token in arglist: #{token.inspect}")
|
638
|
+
end
|
639
|
+
end
|
640
|
+
|
641
|
+
def next_capture
|
642
|
+
index = @captures
|
643
|
+
@captures += 1
|
644
|
+
"captures[#{index}]"
|
645
|
+
end
|
646
|
+
|
647
|
+
def get_param(number)
|
648
|
+
number = number.empty? ? 1 : Integer(number)
|
649
|
+
@params = number if number > @params
|
650
|
+
number.zero? ? @root : "param#{number}"
|
651
|
+
end
|
652
|
+
|
653
|
+
def emit_yield_capture(when_no_capture = '')
|
654
|
+
yield_val = if @captures.zero?
|
655
|
+
when_no_capture
|
656
|
+
elsif @captures == 1
|
657
|
+
'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710
|
658
|
+
else
|
659
|
+
'*captures'
|
660
|
+
end
|
661
|
+
"yield(#{yield_val})"
|
662
|
+
end
|
663
|
+
|
664
|
+
def emit_retval
|
665
|
+
if @captures.zero?
|
666
|
+
'true'
|
667
|
+
elsif @captures == 1
|
668
|
+
'captures[0]'
|
669
|
+
else
|
670
|
+
'captures'
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|
674
|
+
def emit_param_list
|
675
|
+
(1..@params).map { |n| "param#{n}" }.join(',')
|
676
|
+
end
|
677
|
+
|
678
|
+
def emit_trailing_params
|
679
|
+
params = emit_param_list
|
680
|
+
params.empty? ? '' : ",#{params}"
|
681
|
+
end
|
682
|
+
|
683
|
+
def emit_method_code
|
684
|
+
<<~RUBY
|
685
|
+
return unless #{@match_code}
|
686
|
+
block_given? ? #{emit_yield_capture} : (return #{emit_retval})
|
687
|
+
RUBY
|
688
|
+
end
|
689
|
+
|
690
|
+
def fail_due_to(message)
|
691
|
+
raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}"
|
692
|
+
end
|
693
|
+
|
694
|
+
def with_temp_node(cur_node)
|
695
|
+
with_temp_variables do |node|
|
696
|
+
yield "(#{node} = #{cur_node})", node
|
697
|
+
end
|
698
|
+
.gsub("\n", "\n ") # Nicer indent for debugging
|
699
|
+
end
|
700
|
+
|
701
|
+
def with_temp_variables(&block)
|
702
|
+
names = block.parameters.map { |_, name| next_temp_variable(name) }
|
703
|
+
yield(*names)
|
704
|
+
end
|
705
|
+
|
706
|
+
def next_temp_variable(name)
|
707
|
+
"#{name}#{next_temp_value}"
|
708
|
+
end
|
709
|
+
|
710
|
+
def next_temp_value
|
711
|
+
@temps += 1
|
712
|
+
end
|
713
|
+
|
714
|
+
def auto_use_temp_node?(code)
|
715
|
+
code.scan(CUR_PLACEHOLDER).count > 1
|
716
|
+
end
|
717
|
+
|
718
|
+
# with_<...>_context methods are used whenever the context,
|
719
|
+
# i.e the current node or the current element can be determined.
|
720
|
+
|
721
|
+
def with_child_context(code, child_index)
|
722
|
+
with_context(code, "#{CUR_NODE}.children[#{child_index}]")
|
723
|
+
end
|
724
|
+
|
725
|
+
def with_context(code, cur_node,
|
726
|
+
use_temp_node: auto_use_temp_node?(code))
|
727
|
+
if use_temp_node
|
728
|
+
with_temp_node(cur_node) do |init, temp_var|
|
729
|
+
substitute_cur_node(code, temp_var, first_cur_node: init)
|
730
|
+
end
|
731
|
+
else
|
732
|
+
substitute_cur_node(code, cur_node)
|
733
|
+
end
|
734
|
+
end
|
735
|
+
|
736
|
+
def with_seq_head_context(code)
|
737
|
+
fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD)
|
738
|
+
|
739
|
+
code.gsub CUR_ELEMENT, "#{CUR_NODE}.type"
|
740
|
+
end
|
741
|
+
|
742
|
+
def substitute_cur_node(code, cur_node, first_cur_node: cur_node)
|
743
|
+
iter = 0
|
744
|
+
code
|
745
|
+
.gsub(CUR_ELEMENT, CUR_NODE)
|
746
|
+
.gsub(CUR_NODE) do
|
747
|
+
iter += 1
|
748
|
+
iter == 1 ? first_cur_node : cur_node
|
749
|
+
end
|
750
|
+
.gsub(SEQ_HEAD_GUARD, '')
|
751
|
+
end
|
752
|
+
|
753
|
+
def self.tokens(pattern)
|
754
|
+
pattern.scan(TOKEN).reject { |token| token =~ /\A#{SEPARATORS}\Z/ }
|
755
|
+
end
|
756
|
+
end
|
757
|
+
private_constant :Compiler
|
758
|
+
|
759
|
+
# Helpers for defining methods based on a pattern string
|
760
|
+
module Macros
|
761
|
+
# Define a method which applies a pattern to an AST node
|
762
|
+
#
|
763
|
+
# The new method will return nil if the node does not match
|
764
|
+
# If the node matches, and a block is provided, the new method will
|
765
|
+
# yield to the block (passing any captures as block arguments).
|
766
|
+
# If the node matches, and no block is provided, the new method will
|
767
|
+
# return the captures, or `true` if there were none.
|
768
|
+
def def_node_matcher(method_name, pattern_str)
|
769
|
+
compiler = Compiler.new(pattern_str, 'node')
|
770
|
+
src = "def #{method_name}(node = self" \
|
771
|
+
"#{compiler.emit_trailing_params});" \
|
772
|
+
"#{compiler.emit_method_code};end"
|
773
|
+
|
774
|
+
location = caller_locations(1, 1).first
|
775
|
+
class_eval(src, location.path, location.lineno)
|
776
|
+
end
|
777
|
+
|
778
|
+
# Define a method which recurses over the descendants of an AST node,
|
779
|
+
# checking whether any of them match the provided pattern
|
780
|
+
#
|
781
|
+
# If the method name ends with '?', the new method will return `true`
|
782
|
+
# as soon as it finds a descendant which matches. Otherwise, it will
|
783
|
+
# yield all descendants which match.
|
784
|
+
def def_node_search(method_name, pattern_str)
|
785
|
+
compiler = Compiler.new(pattern_str, 'node')
|
786
|
+
called_from = caller(1..1).first.split(':')
|
787
|
+
|
788
|
+
if method_name.to_s.end_with?('?')
|
789
|
+
node_search_first(method_name, compiler, called_from)
|
790
|
+
else
|
791
|
+
node_search_all(method_name, compiler, called_from)
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
def node_search_first(method_name, compiler, called_from)
|
796
|
+
node_search(method_name, compiler, 'return true', '', called_from)
|
797
|
+
end
|
798
|
+
|
799
|
+
def node_search_all(method_name, compiler, called_from)
|
800
|
+
yield_code = compiler.emit_yield_capture('node')
|
801
|
+
prelude = "return enum_for(:#{method_name}, node0" \
|
802
|
+
"#{compiler.emit_trailing_params}) unless block_given?"
|
803
|
+
|
804
|
+
node_search(method_name, compiler, yield_code, prelude, called_from)
|
805
|
+
end
|
806
|
+
|
807
|
+
def node_search(method_name, compiler, on_match, prelude, called_from)
|
808
|
+
src = node_search_body(method_name, compiler.emit_trailing_params,
|
809
|
+
prelude, compiler.match_code, on_match)
|
810
|
+
filename, lineno = *called_from
|
811
|
+
class_eval(src, filename, lineno.to_i)
|
812
|
+
end
|
813
|
+
|
814
|
+
def node_search_body(method_name, trailing_params, prelude, match_code,
|
815
|
+
on_match)
|
816
|
+
<<~RUBY
|
817
|
+
def #{method_name}(node0#{trailing_params})
|
818
|
+
#{prelude}
|
819
|
+
node0.each_node do |node|
|
820
|
+
if #{match_code}
|
821
|
+
#{on_match}
|
822
|
+
end
|
823
|
+
end
|
824
|
+
nil
|
825
|
+
end
|
826
|
+
RUBY
|
827
|
+
end
|
828
|
+
end
|
829
|
+
|
830
|
+
attr_reader :pattern
|
831
|
+
|
832
|
+
def initialize(str)
|
833
|
+
@pattern = str
|
834
|
+
compiler = Compiler.new(str)
|
835
|
+
src = "def match(node0#{compiler.emit_trailing_params});" \
|
836
|
+
"#{compiler.emit_method_code}end"
|
837
|
+
instance_eval(src, __FILE__, __LINE__ + 1)
|
838
|
+
end
|
839
|
+
|
840
|
+
def match(*args)
|
841
|
+
# If we're here, it's because the singleton method has not been defined,
|
842
|
+
# either because we've been dup'ed or serialized through YAML
|
843
|
+
initialize(pattern)
|
844
|
+
match(*args)
|
845
|
+
end
|
846
|
+
|
847
|
+
def marshal_load(pattern)
|
848
|
+
initialize pattern
|
849
|
+
end
|
850
|
+
|
851
|
+
def marshal_dump
|
852
|
+
pattern
|
853
|
+
end
|
854
|
+
|
855
|
+
def ==(other)
|
856
|
+
other.is_a?(NodePattern) &&
|
857
|
+
Compiler.tokens(other.pattern) == Compiler.tokens(pattern)
|
858
|
+
end
|
859
|
+
alias eql? ==
|
860
|
+
|
861
|
+
def to_s
|
862
|
+
"#<#{self.class} #{pattern}>"
|
863
|
+
end
|
864
|
+
|
865
|
+
# Yields its argument and any descendants, depth-first.
|
866
|
+
#
|
867
|
+
def self.descend(element, &block)
|
868
|
+
return to_enum(__method__, element) unless block_given?
|
869
|
+
|
870
|
+
yield element
|
871
|
+
|
872
|
+
if element.is_a?(::RuboCop::AST::Node)
|
873
|
+
element.children.each do |child|
|
874
|
+
descend(child, &block)
|
875
|
+
end
|
876
|
+
end
|
877
|
+
|
878
|
+
nil
|
879
|
+
end
|
880
|
+
end
|
881
|
+
end
|
882
|
+
end
|
883
|
+
# rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity
|