rubocop-ast 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/rubocop/ast.rb +3 -6
- data/lib/rubocop/ast/node_pattern.rb +883 -0
- data/lib/rubocop/ast/processed_source.rb +203 -0
- data/lib/rubocop/ast/token.rb +116 -0
- data/lib/rubocop/ast/version.rb +1 -1
- metadata +5 -6
- data/lib/rubocop/error.rb +0 -34
- data/lib/rubocop/node_pattern.rb +0 -881
- data/lib/rubocop/processed_source.rb +0 -211
- data/lib/rubocop/token.rb +0 -114
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0c196be6c58699fcb84209860508c96e69e8122adb8e14d6641134009802712
|
4
|
+
data.tar.gz: a2d50c781f612349b4aa7e24a4ea4008c359ce766a6f5e0f7e925ac78d57dbb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d263c6a0ab978cb1b81dde253ce752c2b64af9e94c59ac6a6a364aef3cec37c03028e4d7c00086c11b8b46abae9373edad636528782f84ad914ea527b644ec0
|
7
|
+
data.tar.gz: edeefb4a74f678920907e130ea6a7fea86b4c67cf9a9bbaefa21ef231738c0de541d61de5d9c234384e46b3243a74ffaac84288b2c68c49e3178cb39d2d96b37
|
data/README.md
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Contains the classes needed by [RuboCop](https://github.com/rubocop-hq/rubocop) to deal with Ruby's AST, in particular:
|
7
7
|
* `RuboCop::AST::Node`
|
8
|
-
* `RuboCop::NodePattern` ([doc](manual/node_pattern.md))
|
8
|
+
* `RuboCop::AST::NodePattern` ([doc](manual/node_pattern.md))
|
9
9
|
|
10
10
|
This gem may be used independently from the main RuboCop gem.
|
11
11
|
|
@@ -25,7 +25,7 @@ gem 'rubocop-ast'
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::NodePattern`](manual/node_pattern.md)
|
28
|
+
Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::AST::NodePattern`](manual/node_pattern.md)
|
29
29
|
|
30
30
|
## Contributing
|
31
31
|
|
data/lib/rubocop/ast.rb
CHANGED
@@ -3,9 +3,7 @@
|
|
3
3
|
require 'parser'
|
4
4
|
require 'forwardable'
|
5
5
|
|
6
|
-
require_relative '
|
7
|
-
require_relative 'node_pattern'
|
8
|
-
|
6
|
+
require_relative 'ast/node_pattern'
|
9
7
|
require_relative 'ast/sexp'
|
10
8
|
require_relative 'ast/node'
|
11
9
|
require_relative 'ast/node/mixin/method_identifier_predicates'
|
@@ -56,8 +54,7 @@ require_relative 'ast/node/when_node'
|
|
56
54
|
require_relative 'ast/node/while_node'
|
57
55
|
require_relative 'ast/node/yield_node'
|
58
56
|
require_relative 'ast/builder'
|
57
|
+
require_relative 'ast/processed_source'
|
58
|
+
require_relative 'ast/token'
|
59
59
|
require_relative 'ast/traversal'
|
60
60
|
require_relative 'ast/version'
|
61
|
-
|
62
|
-
require_relative 'token'
|
63
|
-
require_relative 'processed_source'
|
@@ -0,0 +1,883 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'delegate'
|
4
|
+
require 'erb'
|
5
|
+
|
6
|
+
# rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity
|
7
|
+
module RuboCop
|
8
|
+
module AST
|
9
|
+
# This class performs a pattern-matching operation on an AST node.
|
10
|
+
#
|
11
|
+
# Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then
|
12
|
+
# pass an AST node to `NodePattern#match`. Alternatively, use one of the class
|
13
|
+
# macros in `NodePattern::Macros` to define your own pattern-matching method.
|
14
|
+
#
|
15
|
+
# If the match fails, `nil` will be returned. If the match succeeds, the
|
16
|
+
# return value depends on whether a block was provided to `#match`, and
|
17
|
+
# whether the pattern contained any "captures" (values which are extracted
|
18
|
+
# from a matching AST.)
|
19
|
+
#
|
20
|
+
# - With block: #match yields the captures (if any) and passes the return
|
21
|
+
# value of the block through.
|
22
|
+
# - With no block, but one capture: the capture is returned.
|
23
|
+
# - With no block, but multiple captures: captures are returned as an array.
|
24
|
+
# - With no block and no captures: #match returns `true`.
|
25
|
+
#
|
26
|
+
# ## Pattern string format examples
|
27
|
+
#
|
28
|
+
# ':sym' # matches a literal symbol
|
29
|
+
# '1' # matches a literal integer
|
30
|
+
# 'nil' # matches a literal nil
|
31
|
+
# 'send' # matches (send ...)
|
32
|
+
# '(send)' # matches (send)
|
33
|
+
# '(send ...)' # matches (send ...)
|
34
|
+
# '(op-asgn)' # node types with hyphenated names also work
|
35
|
+
# '{send class}' # matches (send ...) or (class ...)
|
36
|
+
# '({send class})' # matches (send) or (class)
|
37
|
+
# '(send const)' # matches (send (const ...))
|
38
|
+
# '(send _ :new)' # matches (send <anything> :new)
|
39
|
+
# '(send $_ :new)' # as above, but whatever matches the $_ is captured
|
40
|
+
# '(send $_ $_)' # you can use as many captures as you want
|
41
|
+
# '(send !const ...)' # ! negates the next part of the pattern
|
42
|
+
# '$(send const ...)' # arbitrary matching can be performed on a capture
|
43
|
+
# '(send _recv _msg)' # wildcards can be named (for readability)
|
44
|
+
# '(send ... :new)' # you can match against the last children
|
45
|
+
# '(array <str sym>)' # you can match children in any order. This
|
46
|
+
# # would match `['x', :y]` as well as `[:y, 'x']
|
47
|
+
# '(_ <str sym ...>)' # will match if arguments have at least a `str` and
|
48
|
+
# # a `sym` node, but can have more.
|
49
|
+
# '(array <$str $_>)' # captures are in the order of the pattern,
|
50
|
+
# # irrespective of the actual order of the children
|
51
|
+
# '(array int*)' # will match an array of 0 or more integers
|
52
|
+
# '(array int ?)' # will match 0 or 1 integer.
|
53
|
+
# # Note: Space needed to distinguish from int?
|
54
|
+
# '(array int+)' # will match an array of 1 or more integers
|
55
|
+
# '(array (int $_)+)' # as above and will capture the numbers in an array
|
56
|
+
# '(send $...)' # capture all the children as an array
|
57
|
+
# '(send $... int)' # capture all children but the last as an array
|
58
|
+
# '(send _x :+ _x)' # unification is performed on named wildcards
|
59
|
+
# # (like Prolog variables...)
|
60
|
+
# # (#== is used to see if values unify)
|
61
|
+
# '(int odd?)' # words which end with a ? are predicate methods,
|
62
|
+
# # are are called on the target to see if it matches
|
63
|
+
# # any Ruby method which the matched object supports
|
64
|
+
# # can be used
|
65
|
+
# # if a truthy value is returned, the match succeeds
|
66
|
+
# '(int [!1 !2])' # [] contains multiple patterns, ALL of which must
|
67
|
+
# # match in that position
|
68
|
+
# # in other words, while {} is pattern union (logical
|
69
|
+
# # OR), [] is intersection (logical AND)
|
70
|
+
# '(send %1 _)' # % stands for a parameter which must be supplied to
|
71
|
+
# # #match at matching time
|
72
|
+
# # it will be compared to the corresponding value in
|
73
|
+
# # the AST using #==
|
74
|
+
# # a bare '%' is the same as '%1'
|
75
|
+
# # the number of extra parameters passed to #match
|
76
|
+
# # must equal the highest % value in the pattern
|
77
|
+
# # for consistency, %0 is the 'root node' which is
|
78
|
+
# # passed as the 1st argument to #match, where the
|
79
|
+
# # matching process starts
|
80
|
+
# '^^send' # each ^ ascends one level in the AST
|
81
|
+
# # so this matches against the grandparent node
|
82
|
+
# '`send' # descends any number of level in the AST
|
83
|
+
# # so this matches against any descendant node
|
84
|
+
# '#method' # we call this a 'funcall'; it calls a method in the
|
85
|
+
# # context where a pattern-matching method is defined
|
86
|
+
# # if that returns a truthy value, the match succeeds
|
87
|
+
# 'equal?(%1)' # predicates can be given 1 or more extra args
|
88
|
+
# '#method(%0, 1)' # funcalls can also be given 1 or more extra args
|
89
|
+
#
|
90
|
+
# You can nest arbitrarily deep:
|
91
|
+
#
|
92
|
+
# # matches node parsed from 'Const = Class.new' or 'Const = Module.new':
|
93
|
+
# '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))'
|
94
|
+
# # matches a node parsed from an 'if', with a '==' comparison,
|
95
|
+
# # and no 'else' branch:
|
96
|
+
# '(if (send _ :== _) _ nil?)'
|
97
|
+
#
|
98
|
+
# Note that patterns like 'send' are implemented by calling `#send_type?` on
|
99
|
+
# the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`,
|
100
|
+
# and so on. Therefore, if you add methods which are named like
|
101
|
+
# `#prefix_type?` to the AST node class, then 'prefix' will become usable as
|
102
|
+
# a pattern.
|
103
|
+
#
|
104
|
+
# Also note that if you need a "guard clause" to protect against possible nils
|
105
|
+
# in a certain place in the AST, you can do it like this: `[!nil <pattern>]`
|
106
|
+
#
|
107
|
+
# The compiler code is very simple; don't be afraid to read through it!
|
108
|
+
class NodePattern
|
109
|
+
# @private
|
110
|
+
Invalid = Class.new(StandardError)
|
111
|
+
|
112
|
+
# @private
|
113
|
+
# Builds Ruby code which implements a pattern
|
114
|
+
class Compiler
|
115
|
+
SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze
|
116
|
+
IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze
|
117
|
+
META = Regexp.union(
|
118
|
+
%w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?"
|
119
|
+
).freeze
|
120
|
+
NUMBER = /-?\d+(?:\.\d+)?/.freeze
|
121
|
+
STRING = /".+?"/.freeze
|
122
|
+
METHOD_NAME = /\#?#{IDENTIFIER}[\!\?]?\(?/.freeze
|
123
|
+
PARAM_NUMBER = /%\d*/.freeze
|
124
|
+
|
125
|
+
SEPARATORS = /[\s]+/.freeze
|
126
|
+
TOKENS = Regexp.union(META, PARAM_NUMBER, NUMBER,
|
127
|
+
METHOD_NAME, SYMBOL, STRING)
|
128
|
+
|
129
|
+
TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze
|
130
|
+
|
131
|
+
NODE = /\A#{IDENTIFIER}\Z/.freeze
|
132
|
+
PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze
|
133
|
+
WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze
|
134
|
+
|
135
|
+
FUNCALL = /\A\##{METHOD_NAME}/.freeze
|
136
|
+
LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze
|
137
|
+
PARAM = /\A#{PARAM_NUMBER}\Z/.freeze
|
138
|
+
CLOSING = /\A(?:\)|\}|\])\Z/.freeze
|
139
|
+
|
140
|
+
REST = '...'
|
141
|
+
CAPTURED_REST = '$...'
|
142
|
+
|
143
|
+
attr_reader :match_code, :tokens, :captures
|
144
|
+
|
145
|
+
SEQ_HEAD_INDEX = -1
|
146
|
+
|
147
|
+
# Placeholders while compiling, see with_..._context methods
|
148
|
+
CUR_PLACEHOLDER = '@@@cur'
|
149
|
+
CUR_NODE = "#{CUR_PLACEHOLDER} node@@@"
|
150
|
+
CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@"
|
151
|
+
SEQ_HEAD_GUARD = '@@@seq guard head@@@'
|
152
|
+
|
153
|
+
line = __LINE__
|
154
|
+
ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
|
155
|
+
<% if capture_rest %>(<%= capture_rest %> = []) && <% end -%>
|
156
|
+
<% if capture_all %>(<%= capture_all %> = <% end -%>
|
157
|
+
<%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%>
|
158
|
+
.each_with_object({}) { |<%= child %>, <%= matched %>|
|
159
|
+
case
|
160
|
+
<% patterns.each_with_index do |pattern, i| -%>
|
161
|
+
when !<%= matched %>[<%= i %>] && <%=
|
162
|
+
with_context(pattern, child, use_temp_node: false)
|
163
|
+
%> then <%= matched %>[<%= i %>] = true
|
164
|
+
<% end -%>
|
165
|
+
<% if !rest %> else break({})
|
166
|
+
<% elsif capture_rest %> else <%= capture_rest %> << <%= child %>
|
167
|
+
<% end -%>
|
168
|
+
end
|
169
|
+
}.size == <%= patterns.size -%>
|
170
|
+
RUBY
|
171
|
+
ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1]
|
172
|
+
|
173
|
+
line = __LINE__
|
174
|
+
REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
|
175
|
+
<% if captured %>(<%= accumulate %> = Array.new) && <% end %>
|
176
|
+
<%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>|
|
177
|
+
<%= with_context(expr, child, use_temp_node: false) %><% if captured %>&&
|
178
|
+
<%= accumulate %>.push(<%= captured %>)<% end %>
|
179
|
+
end <% if captured %>&&
|
180
|
+
(<%= captured %> = if <%= accumulate %>.empty?
|
181
|
+
<%= captured %>.map{[]} # Transpose hack won't work for empty case
|
182
|
+
else
|
183
|
+
<%= accumulate %>.transpose
|
184
|
+
end) <% end -%>
|
185
|
+
RUBY
|
186
|
+
REPEATED_TEMPLATE.location = [__FILE__, line + 1]
|
187
|
+
|
188
|
+
def initialize(str, node_var = 'node0')
|
189
|
+
@string = str
|
190
|
+
@root = node_var
|
191
|
+
|
192
|
+
@temps = 0 # avoid name clashes between temp variables
|
193
|
+
@captures = 0 # number of captures seen
|
194
|
+
@unify = {} # named wildcard -> temp variable
|
195
|
+
@params = 0 # highest % (param) number seen
|
196
|
+
run(node_var)
|
197
|
+
end
|
198
|
+
|
199
|
+
def run(node_var)
|
200
|
+
@tokens = Compiler.tokens(@string)
|
201
|
+
|
202
|
+
@match_code = with_context(compile_expr, node_var, use_temp_node: false)
|
203
|
+
@match_code.prepend("(captures = Array.new(#{@captures})) && ") \
|
204
|
+
if @captures.positive?
|
205
|
+
|
206
|
+
fail_due_to('unbalanced pattern') unless tokens.empty?
|
207
|
+
end
|
208
|
+
|
209
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
210
|
+
def compile_expr(token = tokens.shift)
|
211
|
+
# read a single pattern-matching expression from the token stream,
|
212
|
+
# return Ruby code which performs the corresponding matching operation
|
213
|
+
#
|
214
|
+
# the 'pattern-matching' expression may be a composite which
|
215
|
+
# contains an arbitrary number of sub-expressions, but that composite
|
216
|
+
# must all have precedence higher or equal to that of `&&`
|
217
|
+
#
|
218
|
+
# Expressions may use placeholders like:
|
219
|
+
# CUR_NODE: Ruby code that evaluates to an AST node
|
220
|
+
# CUR_ELEMENT: Either the node or the type if in first element of
|
221
|
+
# a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...")
|
222
|
+
case token
|
223
|
+
when '(' then compile_seq
|
224
|
+
when '{' then compile_union
|
225
|
+
when '[' then compile_intersect
|
226
|
+
when '!' then compile_negation
|
227
|
+
when '$' then compile_capture
|
228
|
+
when '^' then compile_ascend
|
229
|
+
when '`' then compile_descend
|
230
|
+
when WILDCARD then compile_wildcard(token[1..-1])
|
231
|
+
when FUNCALL then compile_funcall(token)
|
232
|
+
when LITERAL then compile_literal(token)
|
233
|
+
when PREDICATE then compile_predicate(token)
|
234
|
+
when NODE then compile_nodetype(token)
|
235
|
+
when PARAM then compile_param(token[1..-1])
|
236
|
+
when CLOSING then fail_due_to("#{token} in invalid position")
|
237
|
+
when nil then fail_due_to('pattern ended prematurely')
|
238
|
+
else fail_due_to("invalid token #{token.inspect}")
|
239
|
+
end
|
240
|
+
end
|
241
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
242
|
+
|
243
|
+
def tokens_until(stop, what)
|
244
|
+
return to_enum __method__, stop, what unless block_given?
|
245
|
+
|
246
|
+
fail_due_to("empty #{what}") if tokens.first == stop && what
|
247
|
+
yield until tokens.first == stop
|
248
|
+
tokens.shift
|
249
|
+
end
|
250
|
+
|
251
|
+
def compile_seq
|
252
|
+
terms = tokens_until(')', 'sequence').map { variadic_seq_term }
|
253
|
+
Sequence.new(self, *terms).compile
|
254
|
+
end
|
255
|
+
|
256
|
+
def compile_guard_clause
|
257
|
+
"#{CUR_NODE}.is_a?(RuboCop::AST::Node)"
|
258
|
+
end
|
259
|
+
|
260
|
+
def variadic_seq_term
|
261
|
+
token = tokens.shift
|
262
|
+
case token
|
263
|
+
when CAPTURED_REST then compile_captured_ellipsis
|
264
|
+
when REST then compile_ellipsis
|
265
|
+
when '$<' then compile_any_order(next_capture)
|
266
|
+
when '<' then compile_any_order
|
267
|
+
else compile_repeated_expr(token)
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def compile_repeated_expr(token)
|
272
|
+
before = @captures
|
273
|
+
expr = compile_expr(token)
|
274
|
+
min, max = parse_repetition_token
|
275
|
+
return [1, expr] if min.nil?
|
276
|
+
|
277
|
+
if @captures != before
|
278
|
+
captured = "captures[#{before}...#{@captures}]"
|
279
|
+
accumulate = next_temp_variable(:accumulate)
|
280
|
+
end
|
281
|
+
arity = min..max || Float::INFINITY
|
282
|
+
|
283
|
+
[arity, repeated_generator(expr, captured, accumulate)]
|
284
|
+
end
|
285
|
+
|
286
|
+
def repeated_generator(expr, captured, accumulate)
|
287
|
+
with_temp_variables do |child|
|
288
|
+
lambda do |range|
|
289
|
+
fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX
|
290
|
+
REPEATED_TEMPLATE.result(binding)
|
291
|
+
end
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
def parse_repetition_token
|
296
|
+
case tokens.first
|
297
|
+
when '*' then min = 0
|
298
|
+
when '+' then min = 1
|
299
|
+
when '?' then min = 0
|
300
|
+
max = 1
|
301
|
+
else return
|
302
|
+
end
|
303
|
+
tokens.shift
|
304
|
+
[min, max]
|
305
|
+
end
|
306
|
+
|
307
|
+
# @private
|
308
|
+
# Builds Ruby code for a sequence
|
309
|
+
# (head *first_terms variadic_term *last_terms)
|
310
|
+
class Sequence < SimpleDelegator
|
311
|
+
def initialize(compiler, *arity_term_list)
|
312
|
+
@arities, @terms = arity_term_list.transpose
|
313
|
+
|
314
|
+
super(compiler)
|
315
|
+
@variadic_index = @arities.find_index { |a| a.is_a?(Range) }
|
316
|
+
fail_due_to 'multiple variable patterns in same sequence' \
|
317
|
+
if @variadic_index && !@arities.one? { |a| a.is_a?(Range) }
|
318
|
+
end
|
319
|
+
|
320
|
+
def compile
|
321
|
+
[
|
322
|
+
compile_guard_clause,
|
323
|
+
compile_child_nb_guard,
|
324
|
+
compile_seq_head,
|
325
|
+
*compile_first_terms,
|
326
|
+
compile_variadic_term,
|
327
|
+
*compile_last_terms
|
328
|
+
].compact.join(" &&\n") << SEQ_HEAD_GUARD
|
329
|
+
end
|
330
|
+
|
331
|
+
private
|
332
|
+
|
333
|
+
def first_terms_arity
|
334
|
+
first_terms_range { |r| @arities[r].inject(0, :+) } || 0
|
335
|
+
end
|
336
|
+
|
337
|
+
def last_terms_arity
|
338
|
+
last_terms_range { |r| @arities[r].inject(0, :+) } || 0
|
339
|
+
end
|
340
|
+
|
341
|
+
def variadic_term_min_arity
|
342
|
+
@variadic_index ? @arities[@variadic_index].begin : 0
|
343
|
+
end
|
344
|
+
|
345
|
+
def first_terms_range
|
346
|
+
yield 1..(@variadic_index || @terms.size) - 1 if seq_head?
|
347
|
+
end
|
348
|
+
|
349
|
+
def last_terms_range
|
350
|
+
yield @variadic_index + 1...@terms.size if @variadic_index
|
351
|
+
end
|
352
|
+
|
353
|
+
def seq_head?
|
354
|
+
@variadic_index != 0
|
355
|
+
end
|
356
|
+
|
357
|
+
def compile_child_nb_guard
|
358
|
+
fixed = first_terms_arity + last_terms_arity
|
359
|
+
min = fixed + variadic_term_min_arity
|
360
|
+
op = if @variadic_index
|
361
|
+
max_variadic = @arities[@variadic_index].end
|
362
|
+
if max_variadic != Float::INFINITY
|
363
|
+
range = min..fixed + max_variadic
|
364
|
+
return "(#{range}).cover?(#{CUR_NODE}.children.size)"
|
365
|
+
end
|
366
|
+
'>='
|
367
|
+
else
|
368
|
+
'=='
|
369
|
+
end
|
370
|
+
"#{CUR_NODE}.children.size #{op} #{min}"
|
371
|
+
end
|
372
|
+
|
373
|
+
def term(index, range)
|
374
|
+
t = @terms[index]
|
375
|
+
if t.respond_to? :call
|
376
|
+
t.call(range)
|
377
|
+
else
|
378
|
+
with_child_context(t, range.begin)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
|
382
|
+
def compile_seq_head
|
383
|
+
return unless seq_head?
|
384
|
+
|
385
|
+
fail_due_to 'sequences cannot start with <' \
|
386
|
+
if @terms[0].respond_to? :call
|
387
|
+
|
388
|
+
with_seq_head_context(@terms[0])
|
389
|
+
end
|
390
|
+
|
391
|
+
def compile_first_terms
|
392
|
+
first_terms_range { |range| compile_terms(range, 0) }
|
393
|
+
end
|
394
|
+
|
395
|
+
def compile_last_terms
|
396
|
+
last_terms_range { |r| compile_terms(r, -last_terms_arity) }
|
397
|
+
end
|
398
|
+
|
399
|
+
def compile_terms(index_range, start)
|
400
|
+
index_range.map do |i|
|
401
|
+
current = start
|
402
|
+
start += @arities.fetch(i)
|
403
|
+
term(i, current..start - 1)
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def compile_variadic_term
|
408
|
+
variadic_arity { |arity| term(@variadic_index, arity) }
|
409
|
+
end
|
410
|
+
|
411
|
+
def variadic_arity
|
412
|
+
return unless @variadic_index
|
413
|
+
|
414
|
+
first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX
|
415
|
+
yield first..-last_terms_arity - 1
|
416
|
+
end
|
417
|
+
end
|
418
|
+
private_constant :Sequence
|
419
|
+
|
420
|
+
def compile_captured_ellipsis
|
421
|
+
capture = next_capture
|
422
|
+
block = lambda { |range|
|
423
|
+
# Consider ($...) like (_ $...):
|
424
|
+
range = 0..range.end if range.begin == SEQ_HEAD_INDEX
|
425
|
+
"(#{capture} = #{CUR_NODE}.children[#{range}])"
|
426
|
+
}
|
427
|
+
[0..Float::INFINITY, block]
|
428
|
+
end
|
429
|
+
|
430
|
+
def compile_ellipsis
|
431
|
+
[0..Float::INFINITY, 'true']
|
432
|
+
end
|
433
|
+
|
434
|
+
# rubocop:disable Metrics/AbcSize
|
435
|
+
# rubocop:disable Metrics/MethodLength
|
436
|
+
def compile_any_order(capture_all = nil)
|
437
|
+
rest = capture_rest = nil
|
438
|
+
patterns = []
|
439
|
+
with_temp_variables do |child, matched|
|
440
|
+
tokens_until('>', 'any child') do
|
441
|
+
fail_due_to 'ellipsis must be at the end of <>' if rest
|
442
|
+
token = tokens.shift
|
443
|
+
case token
|
444
|
+
when CAPTURED_REST then rest = capture_rest = next_capture
|
445
|
+
when REST then rest = true
|
446
|
+
else patterns << compile_expr(token)
|
447
|
+
end
|
448
|
+
end
|
449
|
+
[rest ? patterns.size..Float::INFINITY : patterns.size,
|
450
|
+
->(range) { ANY_ORDER_TEMPLATE.result(binding) }]
|
451
|
+
end
|
452
|
+
end
|
453
|
+
# rubocop:enable Metrics/MethodLength
|
454
|
+
# rubocop:enable Metrics/AbcSize
|
455
|
+
|
456
|
+
def insure_same_captures(enum, what)
|
457
|
+
return to_enum __method__, enum, what unless block_given?
|
458
|
+
|
459
|
+
captures_before = captures_after = nil
|
460
|
+
enum.each do
|
461
|
+
captures_before ||= @captures
|
462
|
+
@captures = captures_before
|
463
|
+
yield
|
464
|
+
captures_after ||= @captures
|
465
|
+
fail_due_to("each #{what} must have same # of captures") if captures_after != @captures
|
466
|
+
end
|
467
|
+
end
|
468
|
+
|
469
|
+
def access_unify(name)
|
470
|
+
var = @unify[name]
|
471
|
+
|
472
|
+
if var == :forbidden_unification
|
473
|
+
fail_due_to "Wildcard #{name} was first seen in a subset of a" \
|
474
|
+
" union and can't be used outside that union"
|
475
|
+
end
|
476
|
+
var
|
477
|
+
end
|
478
|
+
|
479
|
+
def forbid_unification(*names)
|
480
|
+
names.each do |name|
|
481
|
+
@unify[name] = :forbidden_unification
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
486
|
+
def unify_in_union(enum)
|
487
|
+
# We need to reset @unify before each branch is processed.
|
488
|
+
# Moreover we need to keep track of newly encountered wildcards.
|
489
|
+
# Var `new_unify_intersection` will hold those that are encountered
|
490
|
+
# in all branches; these are not a problem.
|
491
|
+
# Var `partial_unify` will hold those encountered in only a subset
|
492
|
+
# of the branches; these can't be used outside of the union.
|
493
|
+
|
494
|
+
return to_enum __method__, enum unless block_given?
|
495
|
+
|
496
|
+
new_unify_intersection = nil
|
497
|
+
partial_unify = []
|
498
|
+
unify_before = @unify.dup
|
499
|
+
|
500
|
+
result = enum.each do |e|
|
501
|
+
@unify = unify_before.dup if new_unify_intersection
|
502
|
+
yield e
|
503
|
+
new_unify = @unify.keys - unify_before.keys
|
504
|
+
if new_unify_intersection.nil?
|
505
|
+
# First iteration
|
506
|
+
new_unify_intersection = new_unify
|
507
|
+
else
|
508
|
+
union = new_unify_intersection | new_unify
|
509
|
+
new_unify_intersection &= new_unify
|
510
|
+
partial_unify |= union - new_unify_intersection
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
# At this point, all members of `new_unify_intersection` can be used
|
515
|
+
# for unification outside of the union, but partial_unify may not
|
516
|
+
|
517
|
+
forbid_unification(*partial_unify)
|
518
|
+
|
519
|
+
result
|
520
|
+
end
|
521
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
522
|
+
|
523
|
+
def compile_union
|
524
|
+
# we need to ensure that each branch of the {} contains the same
|
525
|
+
# number of captures (since only one branch of the {} can actually
|
526
|
+
# match, the same variables are used to hold the captures for each
|
527
|
+
# branch)
|
528
|
+
enum = tokens_until('}', 'union')
|
529
|
+
enum = unify_in_union(enum)
|
530
|
+
terms = insure_same_captures(enum, 'branch of {}')
|
531
|
+
.map { compile_expr }
|
532
|
+
|
533
|
+
"(#{terms.join(' || ')})"
|
534
|
+
end
|
535
|
+
|
536
|
+
def compile_intersect
|
537
|
+
tokens_until(']', 'intersection')
|
538
|
+
.map { compile_expr }
|
539
|
+
.join(' && ')
|
540
|
+
end
|
541
|
+
|
542
|
+
def compile_capture
|
543
|
+
"(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})"
|
544
|
+
end
|
545
|
+
|
546
|
+
def compile_negation
|
547
|
+
"!(#{compile_expr})"
|
548
|
+
end
|
549
|
+
|
550
|
+
def compile_ascend
|
551
|
+
with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent")
|
552
|
+
end
|
553
|
+
|
554
|
+
def compile_descend
|
555
|
+
with_temp_variables do |descendant|
|
556
|
+
pattern = with_context(compile_expr, descendant,
|
557
|
+
use_temp_node: false)
|
558
|
+
[
|
559
|
+
"RuboCop::AST::NodePattern.descend(#{CUR_ELEMENT}).",
|
560
|
+
"any? do |#{descendant}|",
|
561
|
+
" #{pattern}",
|
562
|
+
'end'
|
563
|
+
].join("\n")
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
def compile_wildcard(name)
|
568
|
+
if name.empty?
|
569
|
+
'true'
|
570
|
+
elsif @unify.key?(name)
|
571
|
+
# we have already seen a wildcard with this name before
|
572
|
+
# so the value it matched the first time will already be stored
|
573
|
+
# in a temp. check if this value matches the one stored in the temp
|
574
|
+
"#{CUR_ELEMENT} == #{access_unify(name)}"
|
575
|
+
else
|
576
|
+
n = @unify[name] = "unify_#{name.gsub('-', '__')}"
|
577
|
+
# double assign to avoid "assigned but unused variable"
|
578
|
+
"(#{n} = #{CUR_ELEMENT}; " \
|
579
|
+
"#{n} = #{n}; true)"
|
580
|
+
end
|
581
|
+
end
|
582
|
+
|
583
|
+
def compile_literal(literal)
|
584
|
+
"#{CUR_ELEMENT} == #{literal}"
|
585
|
+
end
|
586
|
+
|
587
|
+
def compile_predicate(predicate)
|
588
|
+
if predicate.end_with?('(') # is there an arglist?
|
589
|
+
args = compile_args(tokens)
|
590
|
+
predicate = predicate[0..-2] # drop the trailing (
|
591
|
+
"#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})"
|
592
|
+
else
|
593
|
+
"#{CUR_ELEMENT}.#{predicate}"
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
def compile_funcall(method)
|
598
|
+
# call a method in the context which this pattern-matching
|
599
|
+
# code is used in. pass target value as an argument
|
600
|
+
method = method[1..-1] # drop the leading #
|
601
|
+
if method.end_with?('(') # is there an arglist?
|
602
|
+
args = compile_args(tokens)
|
603
|
+
method = method[0..-2] # drop the trailing (
|
604
|
+
"#{method}(#{CUR_ELEMENT},#{args.join(',')})"
|
605
|
+
else
|
606
|
+
"#{method}(#{CUR_ELEMENT})"
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
def compile_nodetype(type)
|
611
|
+
"#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?"
|
612
|
+
end
|
613
|
+
|
614
|
+
def compile_param(number)
|
615
|
+
"#{CUR_ELEMENT} == #{get_param(number)}"
|
616
|
+
end
|
617
|
+
|
618
|
+
def compile_args(tokens)
|
619
|
+
index = tokens.find_index { |token| token == ')' }
|
620
|
+
|
621
|
+
tokens.slice!(0..index).each_with_object([]) do |token, args|
|
622
|
+
next if [')', ','].include?(token)
|
623
|
+
|
624
|
+
args << compile_arg(token)
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
def compile_arg(token)
|
629
|
+
case token
|
630
|
+
when WILDCARD then
|
631
|
+
name = token[1..-1]
|
632
|
+
access_unify(name) || fail_due_to('invalid in arglist: ' + token)
|
633
|
+
when LITERAL then token
|
634
|
+
when PARAM then get_param(token[1..-1])
|
635
|
+
when CLOSING then fail_due_to("#{token} in invalid position")
|
636
|
+
when nil then fail_due_to('pattern ended prematurely')
|
637
|
+
else fail_due_to("invalid token in arglist: #{token.inspect}")
|
638
|
+
end
|
639
|
+
end
|
640
|
+
|
641
|
+
def next_capture
|
642
|
+
index = @captures
|
643
|
+
@captures += 1
|
644
|
+
"captures[#{index}]"
|
645
|
+
end
|
646
|
+
|
647
|
+
def get_param(number)
|
648
|
+
number = number.empty? ? 1 : Integer(number)
|
649
|
+
@params = number if number > @params
|
650
|
+
number.zero? ? @root : "param#{number}"
|
651
|
+
end
|
652
|
+
|
653
|
+
def emit_yield_capture(when_no_capture = '')
|
654
|
+
yield_val = if @captures.zero?
|
655
|
+
when_no_capture
|
656
|
+
elsif @captures == 1
|
657
|
+
'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710
|
658
|
+
else
|
659
|
+
'*captures'
|
660
|
+
end
|
661
|
+
"yield(#{yield_val})"
|
662
|
+
end
|
663
|
+
|
664
|
+
def emit_retval
|
665
|
+
if @captures.zero?
|
666
|
+
'true'
|
667
|
+
elsif @captures == 1
|
668
|
+
'captures[0]'
|
669
|
+
else
|
670
|
+
'captures'
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|
674
|
+
def emit_param_list
|
675
|
+
(1..@params).map { |n| "param#{n}" }.join(',')
|
676
|
+
end
|
677
|
+
|
678
|
+
def emit_trailing_params
|
679
|
+
params = emit_param_list
|
680
|
+
params.empty? ? '' : ",#{params}"
|
681
|
+
end
|
682
|
+
|
683
|
+
def emit_method_code
|
684
|
+
<<~RUBY
|
685
|
+
return unless #{@match_code}
|
686
|
+
block_given? ? #{emit_yield_capture} : (return #{emit_retval})
|
687
|
+
RUBY
|
688
|
+
end
|
689
|
+
|
690
|
+
def fail_due_to(message)
|
691
|
+
raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}"
|
692
|
+
end
|
693
|
+
|
694
|
+
def with_temp_node(cur_node)
|
695
|
+
with_temp_variables do |node|
|
696
|
+
yield "(#{node} = #{cur_node})", node
|
697
|
+
end
|
698
|
+
.gsub("\n", "\n ") # Nicer indent for debugging
|
699
|
+
end
|
700
|
+
|
701
|
+
def with_temp_variables(&block)
|
702
|
+
names = block.parameters.map { |_, name| next_temp_variable(name) }
|
703
|
+
yield(*names)
|
704
|
+
end
|
705
|
+
|
706
|
+
def next_temp_variable(name)
|
707
|
+
"#{name}#{next_temp_value}"
|
708
|
+
end
|
709
|
+
|
710
|
+
def next_temp_value
|
711
|
+
@temps += 1
|
712
|
+
end
|
713
|
+
|
714
|
+
def auto_use_temp_node?(code)
|
715
|
+
code.scan(CUR_PLACEHOLDER).count > 1
|
716
|
+
end
|
717
|
+
|
718
|
+
# with_<...>_context methods are used whenever the context,
|
719
|
+
# i.e the current node or the current element can be determined.
|
720
|
+
|
721
|
+
def with_child_context(code, child_index)
|
722
|
+
with_context(code, "#{CUR_NODE}.children[#{child_index}]")
|
723
|
+
end
|
724
|
+
|
725
|
+
def with_context(code, cur_node,
|
726
|
+
use_temp_node: auto_use_temp_node?(code))
|
727
|
+
if use_temp_node
|
728
|
+
with_temp_node(cur_node) do |init, temp_var|
|
729
|
+
substitute_cur_node(code, temp_var, first_cur_node: init)
|
730
|
+
end
|
731
|
+
else
|
732
|
+
substitute_cur_node(code, cur_node)
|
733
|
+
end
|
734
|
+
end
|
735
|
+
|
736
|
+
def with_seq_head_context(code)
|
737
|
+
fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD)
|
738
|
+
|
739
|
+
code.gsub CUR_ELEMENT, "#{CUR_NODE}.type"
|
740
|
+
end
|
741
|
+
|
742
|
+
def substitute_cur_node(code, cur_node, first_cur_node: cur_node)
|
743
|
+
iter = 0
|
744
|
+
code
|
745
|
+
.gsub(CUR_ELEMENT, CUR_NODE)
|
746
|
+
.gsub(CUR_NODE) do
|
747
|
+
iter += 1
|
748
|
+
iter == 1 ? first_cur_node : cur_node
|
749
|
+
end
|
750
|
+
.gsub(SEQ_HEAD_GUARD, '')
|
751
|
+
end
|
752
|
+
|
753
|
+
def self.tokens(pattern)
|
754
|
+
pattern.scan(TOKEN).reject { |token| token =~ /\A#{SEPARATORS}\Z/ }
|
755
|
+
end
|
756
|
+
end
|
757
|
+
private_constant :Compiler
|
758
|
+
|
759
|
+
# Helpers for defining methods based on a pattern string
|
760
|
+
module Macros
|
761
|
+
# Define a method which applies a pattern to an AST node
|
762
|
+
#
|
763
|
+
# The new method will return nil if the node does not match
|
764
|
+
# If the node matches, and a block is provided, the new method will
|
765
|
+
# yield to the block (passing any captures as block arguments).
|
766
|
+
# If the node matches, and no block is provided, the new method will
|
767
|
+
# return the captures, or `true` if there were none.
|
768
|
+
def def_node_matcher(method_name, pattern_str)
|
769
|
+
compiler = Compiler.new(pattern_str, 'node')
|
770
|
+
src = "def #{method_name}(node = self" \
|
771
|
+
"#{compiler.emit_trailing_params});" \
|
772
|
+
"#{compiler.emit_method_code};end"
|
773
|
+
|
774
|
+
location = caller_locations(1, 1).first
|
775
|
+
class_eval(src, location.path, location.lineno)
|
776
|
+
end
|
777
|
+
|
778
|
+
# Define a method which recurses over the descendants of an AST node,
|
779
|
+
# checking whether any of them match the provided pattern
|
780
|
+
#
|
781
|
+
# If the method name ends with '?', the new method will return `true`
|
782
|
+
# as soon as it finds a descendant which matches. Otherwise, it will
|
783
|
+
# yield all descendants which match.
|
784
|
+
def def_node_search(method_name, pattern_str)
|
785
|
+
compiler = Compiler.new(pattern_str, 'node')
|
786
|
+
called_from = caller(1..1).first.split(':')
|
787
|
+
|
788
|
+
if method_name.to_s.end_with?('?')
|
789
|
+
node_search_first(method_name, compiler, called_from)
|
790
|
+
else
|
791
|
+
node_search_all(method_name, compiler, called_from)
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
def node_search_first(method_name, compiler, called_from)
|
796
|
+
node_search(method_name, compiler, 'return true', '', called_from)
|
797
|
+
end
|
798
|
+
|
799
|
+
def node_search_all(method_name, compiler, called_from)
|
800
|
+
yield_code = compiler.emit_yield_capture('node')
|
801
|
+
prelude = "return enum_for(:#{method_name}, node0" \
|
802
|
+
"#{compiler.emit_trailing_params}) unless block_given?"
|
803
|
+
|
804
|
+
node_search(method_name, compiler, yield_code, prelude, called_from)
|
805
|
+
end
|
806
|
+
|
807
|
+
def node_search(method_name, compiler, on_match, prelude, called_from)
|
808
|
+
src = node_search_body(method_name, compiler.emit_trailing_params,
|
809
|
+
prelude, compiler.match_code, on_match)
|
810
|
+
filename, lineno = *called_from
|
811
|
+
class_eval(src, filename, lineno.to_i)
|
812
|
+
end
|
813
|
+
|
814
|
+
def node_search_body(method_name, trailing_params, prelude, match_code,
|
815
|
+
on_match)
|
816
|
+
<<~RUBY
|
817
|
+
def #{method_name}(node0#{trailing_params})
|
818
|
+
#{prelude}
|
819
|
+
node0.each_node do |node|
|
820
|
+
if #{match_code}
|
821
|
+
#{on_match}
|
822
|
+
end
|
823
|
+
end
|
824
|
+
nil
|
825
|
+
end
|
826
|
+
RUBY
|
827
|
+
end
|
828
|
+
end
|
829
|
+
|
830
|
+
attr_reader :pattern
|
831
|
+
|
832
|
+
def initialize(str)
|
833
|
+
@pattern = str
|
834
|
+
compiler = Compiler.new(str)
|
835
|
+
src = "def match(node0#{compiler.emit_trailing_params});" \
|
836
|
+
"#{compiler.emit_method_code}end"
|
837
|
+
instance_eval(src, __FILE__, __LINE__ + 1)
|
838
|
+
end
|
839
|
+
|
840
|
+
def match(*args)
|
841
|
+
# If we're here, it's because the singleton method has not been defined,
|
842
|
+
# either because we've been dup'ed or serialized through YAML
|
843
|
+
initialize(pattern)
|
844
|
+
match(*args)
|
845
|
+
end
|
846
|
+
|
847
|
+
def marshal_load(pattern)
|
848
|
+
initialize pattern
|
849
|
+
end
|
850
|
+
|
851
|
+
def marshal_dump
|
852
|
+
pattern
|
853
|
+
end
|
854
|
+
|
855
|
+
def ==(other)
|
856
|
+
other.is_a?(NodePattern) &&
|
857
|
+
Compiler.tokens(other.pattern) == Compiler.tokens(pattern)
|
858
|
+
end
|
859
|
+
alias eql? ==
|
860
|
+
|
861
|
+
def to_s
|
862
|
+
"#<#{self.class} #{pattern}>"
|
863
|
+
end
|
864
|
+
|
865
|
+
# Yields its argument and any descendants, depth-first.
|
866
|
+
#
|
867
|
+
def self.descend(element, &block)
|
868
|
+
return to_enum(__method__, element) unless block_given?
|
869
|
+
|
870
|
+
yield element
|
871
|
+
|
872
|
+
if element.is_a?(::RuboCop::AST::Node)
|
873
|
+
element.children.each do |child|
|
874
|
+
descend(child, &block)
|
875
|
+
end
|
876
|
+
end
|
877
|
+
|
878
|
+
nil
|
879
|
+
end
|
880
|
+
end
|
881
|
+
end
|
882
|
+
end
|
883
|
+
# rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity
|