rubocop-ast 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e0750c7434a863460f8b900080b40ed7515c6e1eb415d0b91ce612f6bcac7a67
4
- data.tar.gz: 0b08bbe1e7ed4612351d4296264f18bbe0e3af5d069410d46e017f4826ac1bc9
3
+ metadata.gz: b0c196be6c58699fcb84209860508c96e69e8122adb8e14d6641134009802712
4
+ data.tar.gz: a2d50c781f612349b4aa7e24a4ea4008c359ce766a6f5e0f7e925ac78d57dbb7
5
5
  SHA512:
6
- metadata.gz: 7259c958f33d8a754275a5c25d8b6094f2f41be2a058935dca5d689903accd0bba77bff7ac440501c35274fb00f9a9c29c4c02f8e988e2a7bb0f3fc334773cab
7
- data.tar.gz: e28f39434e75ad138bc49646e36d9bbb39fe25011d0bd41034c4da02d5bbb801b52e5caf06a05453e9ada004b158e1c573deb4749468f126fb4cbb88bebd3a74
6
+ metadata.gz: 2d263c6a0ab978cb1b81dde253ce752c2b64af9e94c59ac6a6a364aef3cec37c03028e4d7c00086c11b8b46abae9373edad636528782f84ad914ea527b644ec0
7
+ data.tar.gz: edeefb4a74f678920907e130ea6a7fea86b4c67cf9a9bbaefa21ef231738c0de541d61de5d9c234384e46b3243a74ffaac84288b2c68c49e3178cb39d2d96b37
data/README.md CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Contains the classes needed by [RuboCop](https://github.com/rubocop-hq/rubocop) to deal with Ruby's AST, in particular:
7
7
  * `RuboCop::AST::Node`
8
- * `RuboCop::NodePattern` ([doc](manual/node_pattern.md))
8
+ * `RuboCop::AST::NodePattern` ([doc](manual/node_pattern.md))
9
9
 
10
10
  This gem may be used independently from the main RuboCop gem.
11
11
 
@@ -25,7 +25,7 @@ gem 'rubocop-ast'
25
25
 
26
26
  ## Usage
27
27
 
28
- Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::NodePattern`](manual/node_pattern.md)
28
+ Refer to the documentation of `RuboCop::AST::Node` and [`RuboCop::AST::NodePattern`](manual/node_pattern.md)
29
29
 
30
30
  ## Contributing
31
31
 
data/lib/rubocop/ast.rb CHANGED
@@ -3,9 +3,7 @@
3
3
  require 'parser'
4
4
  require 'forwardable'
5
5
 
6
- require_relative 'error'
7
- require_relative 'node_pattern'
8
-
6
+ require_relative 'ast/node_pattern'
9
7
  require_relative 'ast/sexp'
10
8
  require_relative 'ast/node'
11
9
  require_relative 'ast/node/mixin/method_identifier_predicates'
@@ -56,8 +54,7 @@ require_relative 'ast/node/when_node'
56
54
  require_relative 'ast/node/while_node'
57
55
  require_relative 'ast/node/yield_node'
58
56
  require_relative 'ast/builder'
57
+ require_relative 'ast/processed_source'
58
+ require_relative 'ast/token'
59
59
  require_relative 'ast/traversal'
60
60
  require_relative 'ast/version'
61
-
62
- require_relative 'token'
63
- require_relative 'processed_source'
@@ -0,0 +1,883 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+ require 'erb'
5
+
6
+ # rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity
7
+ module RuboCop
8
+ module AST
9
+ # This class performs a pattern-matching operation on an AST node.
10
+ #
11
+ # Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then
12
+ # pass an AST node to `NodePattern#match`. Alternatively, use one of the class
13
+ # macros in `NodePattern::Macros` to define your own pattern-matching method.
14
+ #
15
+ # If the match fails, `nil` will be returned. If the match succeeds, the
16
+ # return value depends on whether a block was provided to `#match`, and
17
+ # whether the pattern contained any "captures" (values which are extracted
18
+ # from a matching AST.)
19
+ #
20
+ # - With block: #match yields the captures (if any) and passes the return
21
+ # value of the block through.
22
+ # - With no block, but one capture: the capture is returned.
23
+ # - With no block, but multiple captures: captures are returned as an array.
24
+ # - With no block and no captures: #match returns `true`.
25
+ #
26
+ # ## Pattern string format examples
27
+ #
28
+ # ':sym' # matches a literal symbol
29
+ # '1' # matches a literal integer
30
+ # 'nil' # matches a literal nil
31
+ # 'send' # matches (send ...)
32
+ # '(send)' # matches (send)
33
+ # '(send ...)' # matches (send ...)
34
+ # '(op-asgn)' # node types with hyphenated names also work
35
+ # '{send class}' # matches (send ...) or (class ...)
36
+ # '({send class})' # matches (send) or (class)
37
+ # '(send const)' # matches (send (const ...))
38
+ # '(send _ :new)' # matches (send <anything> :new)
39
+ # '(send $_ :new)' # as above, but whatever matches the $_ is captured
40
+ # '(send $_ $_)' # you can use as many captures as you want
41
+ # '(send !const ...)' # ! negates the next part of the pattern
42
+ # '$(send const ...)' # arbitrary matching can be performed on a capture
43
+ # '(send _recv _msg)' # wildcards can be named (for readability)
44
+ # '(send ... :new)' # you can match against the last children
45
+ # '(array <str sym>)' # you can match children in any order. This
46
+ # # would match `['x', :y]` as well as `[:y, 'x']
47
+ # '(_ <str sym ...>)' # will match if arguments have at least a `str` and
48
+ # # a `sym` node, but can have more.
49
+ # '(array <$str $_>)' # captures are in the order of the pattern,
50
+ # # irrespective of the actual order of the children
51
+ # '(array int*)' # will match an array of 0 or more integers
52
+ # '(array int ?)' # will match 0 or 1 integer.
53
+ # # Note: Space needed to distinguish from int?
54
+ # '(array int+)' # will match an array of 1 or more integers
55
+ # '(array (int $_)+)' # as above and will capture the numbers in an array
56
+ # '(send $...)' # capture all the children as an array
57
+ # '(send $... int)' # capture all children but the last as an array
58
+ # '(send _x :+ _x)' # unification is performed on named wildcards
59
+ # # (like Prolog variables...)
60
+ # # (#== is used to see if values unify)
61
+ # '(int odd?)' # words which end with a ? are predicate methods,
62
+ # # are are called on the target to see if it matches
63
+ # # any Ruby method which the matched object supports
64
+ # # can be used
65
+ # # if a truthy value is returned, the match succeeds
66
+ # '(int [!1 !2])' # [] contains multiple patterns, ALL of which must
67
+ # # match in that position
68
+ # # in other words, while {} is pattern union (logical
69
+ # # OR), [] is intersection (logical AND)
70
+ # '(send %1 _)' # % stands for a parameter which must be supplied to
71
+ # # #match at matching time
72
+ # # it will be compared to the corresponding value in
73
+ # # the AST using #==
74
+ # # a bare '%' is the same as '%1'
75
+ # # the number of extra parameters passed to #match
76
+ # # must equal the highest % value in the pattern
77
+ # # for consistency, %0 is the 'root node' which is
78
+ # # passed as the 1st argument to #match, where the
79
+ # # matching process starts
80
+ # '^^send' # each ^ ascends one level in the AST
81
+ # # so this matches against the grandparent node
82
+ # '`send' # descends any number of level in the AST
83
+ # # so this matches against any descendant node
84
+ # '#method' # we call this a 'funcall'; it calls a method in the
85
+ # # context where a pattern-matching method is defined
86
+ # # if that returns a truthy value, the match succeeds
87
+ # 'equal?(%1)' # predicates can be given 1 or more extra args
88
+ # '#method(%0, 1)' # funcalls can also be given 1 or more extra args
89
+ #
90
+ # You can nest arbitrarily deep:
91
+ #
92
+ # # matches node parsed from 'Const = Class.new' or 'Const = Module.new':
93
+ # '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))'
94
+ # # matches a node parsed from an 'if', with a '==' comparison,
95
+ # # and no 'else' branch:
96
+ # '(if (send _ :== _) _ nil?)'
97
+ #
98
+ # Note that patterns like 'send' are implemented by calling `#send_type?` on
99
+ # the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`,
100
+ # and so on. Therefore, if you add methods which are named like
101
+ # `#prefix_type?` to the AST node class, then 'prefix' will become usable as
102
+ # a pattern.
103
+ #
104
+ # Also note that if you need a "guard clause" to protect against possible nils
105
+ # in a certain place in the AST, you can do it like this: `[!nil <pattern>]`
106
+ #
107
+ # The compiler code is very simple; don't be afraid to read through it!
108
+ class NodePattern
109
+ # @private
110
+ Invalid = Class.new(StandardError)
111
+
112
+ # @private
113
+ # Builds Ruby code which implements a pattern
114
+ class Compiler
115
+ SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze
116
+ IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze
117
+ META = Regexp.union(
118
+ %w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?"
119
+ ).freeze
120
+ NUMBER = /-?\d+(?:\.\d+)?/.freeze
121
+ STRING = /".+?"/.freeze
122
+ METHOD_NAME = /\#?#{IDENTIFIER}[\!\?]?\(?/.freeze
123
+ PARAM_NUMBER = /%\d*/.freeze
124
+
125
+ SEPARATORS = /[\s]+/.freeze
126
+ TOKENS = Regexp.union(META, PARAM_NUMBER, NUMBER,
127
+ METHOD_NAME, SYMBOL, STRING)
128
+
129
+ TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze
130
+
131
+ NODE = /\A#{IDENTIFIER}\Z/.freeze
132
+ PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze
133
+ WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze
134
+
135
+ FUNCALL = /\A\##{METHOD_NAME}/.freeze
136
+ LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze
137
+ PARAM = /\A#{PARAM_NUMBER}\Z/.freeze
138
+ CLOSING = /\A(?:\)|\}|\])\Z/.freeze
139
+
140
+ REST = '...'
141
+ CAPTURED_REST = '$...'
142
+
143
+ attr_reader :match_code, :tokens, :captures
144
+
145
+ SEQ_HEAD_INDEX = -1
146
+
147
+ # Placeholders while compiling, see with_..._context methods
148
+ CUR_PLACEHOLDER = '@@@cur'
149
+ CUR_NODE = "#{CUR_PLACEHOLDER} node@@@"
150
+ CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@"
151
+ SEQ_HEAD_GUARD = '@@@seq guard head@@@'
152
+
153
+ line = __LINE__
154
+ ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
155
+ <% if capture_rest %>(<%= capture_rest %> = []) && <% end -%>
156
+ <% if capture_all %>(<%= capture_all %> = <% end -%>
157
+ <%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%>
158
+ .each_with_object({}) { |<%= child %>, <%= matched %>|
159
+ case
160
+ <% patterns.each_with_index do |pattern, i| -%>
161
+ when !<%= matched %>[<%= i %>] && <%=
162
+ with_context(pattern, child, use_temp_node: false)
163
+ %> then <%= matched %>[<%= i %>] = true
164
+ <% end -%>
165
+ <% if !rest %> else break({})
166
+ <% elsif capture_rest %> else <%= capture_rest %> << <%= child %>
167
+ <% end -%>
168
+ end
169
+ }.size == <%= patterns.size -%>
170
+ RUBY
171
+ ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1]
172
+
173
+ line = __LINE__
174
+ REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
175
+ <% if captured %>(<%= accumulate %> = Array.new) && <% end %>
176
+ <%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>|
177
+ <%= with_context(expr, child, use_temp_node: false) %><% if captured %>&&
178
+ <%= accumulate %>.push(<%= captured %>)<% end %>
179
+ end <% if captured %>&&
180
+ (<%= captured %> = if <%= accumulate %>.empty?
181
+ <%= captured %>.map{[]} # Transpose hack won't work for empty case
182
+ else
183
+ <%= accumulate %>.transpose
184
+ end) <% end -%>
185
+ RUBY
186
+ REPEATED_TEMPLATE.location = [__FILE__, line + 1]
187
+
188
+ def initialize(str, node_var = 'node0')
189
+ @string = str
190
+ @root = node_var
191
+
192
+ @temps = 0 # avoid name clashes between temp variables
193
+ @captures = 0 # number of captures seen
194
+ @unify = {} # named wildcard -> temp variable
195
+ @params = 0 # highest % (param) number seen
196
+ run(node_var)
197
+ end
198
+
199
+ def run(node_var)
200
+ @tokens = Compiler.tokens(@string)
201
+
202
+ @match_code = with_context(compile_expr, node_var, use_temp_node: false)
203
+ @match_code.prepend("(captures = Array.new(#{@captures})) && ") \
204
+ if @captures.positive?
205
+
206
+ fail_due_to('unbalanced pattern') unless tokens.empty?
207
+ end
208
+
209
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
210
+ def compile_expr(token = tokens.shift)
211
+ # read a single pattern-matching expression from the token stream,
212
+ # return Ruby code which performs the corresponding matching operation
213
+ #
214
+ # the 'pattern-matching' expression may be a composite which
215
+ # contains an arbitrary number of sub-expressions, but that composite
216
+ # must all have precedence higher or equal to that of `&&`
217
+ #
218
+ # Expressions may use placeholders like:
219
+ # CUR_NODE: Ruby code that evaluates to an AST node
220
+ # CUR_ELEMENT: Either the node or the type if in first element of
221
+ # a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...")
222
+ case token
223
+ when '(' then compile_seq
224
+ when '{' then compile_union
225
+ when '[' then compile_intersect
226
+ when '!' then compile_negation
227
+ when '$' then compile_capture
228
+ when '^' then compile_ascend
229
+ when '`' then compile_descend
230
+ when WILDCARD then compile_wildcard(token[1..-1])
231
+ when FUNCALL then compile_funcall(token)
232
+ when LITERAL then compile_literal(token)
233
+ when PREDICATE then compile_predicate(token)
234
+ when NODE then compile_nodetype(token)
235
+ when PARAM then compile_param(token[1..-1])
236
+ when CLOSING then fail_due_to("#{token} in invalid position")
237
+ when nil then fail_due_to('pattern ended prematurely')
238
+ else fail_due_to("invalid token #{token.inspect}")
239
+ end
240
+ end
241
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
242
+
243
+ def tokens_until(stop, what)
244
+ return to_enum __method__, stop, what unless block_given?
245
+
246
+ fail_due_to("empty #{what}") if tokens.first == stop && what
247
+ yield until tokens.first == stop
248
+ tokens.shift
249
+ end
250
+
251
+ def compile_seq
252
+ terms = tokens_until(')', 'sequence').map { variadic_seq_term }
253
+ Sequence.new(self, *terms).compile
254
+ end
255
+
256
+ def compile_guard_clause
257
+ "#{CUR_NODE}.is_a?(RuboCop::AST::Node)"
258
+ end
259
+
260
+ def variadic_seq_term
261
+ token = tokens.shift
262
+ case token
263
+ when CAPTURED_REST then compile_captured_ellipsis
264
+ when REST then compile_ellipsis
265
+ when '$<' then compile_any_order(next_capture)
266
+ when '<' then compile_any_order
267
+ else compile_repeated_expr(token)
268
+ end
269
+ end
270
+
271
+ def compile_repeated_expr(token)
272
+ before = @captures
273
+ expr = compile_expr(token)
274
+ min, max = parse_repetition_token
275
+ return [1, expr] if min.nil?
276
+
277
+ if @captures != before
278
+ captured = "captures[#{before}...#{@captures}]"
279
+ accumulate = next_temp_variable(:accumulate)
280
+ end
281
+ arity = min..max || Float::INFINITY
282
+
283
+ [arity, repeated_generator(expr, captured, accumulate)]
284
+ end
285
+
286
+ def repeated_generator(expr, captured, accumulate)
287
+ with_temp_variables do |child|
288
+ lambda do |range|
289
+ fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX
290
+ REPEATED_TEMPLATE.result(binding)
291
+ end
292
+ end
293
+ end
294
+
295
+ def parse_repetition_token
296
+ case tokens.first
297
+ when '*' then min = 0
298
+ when '+' then min = 1
299
+ when '?' then min = 0
300
+ max = 1
301
+ else return
302
+ end
303
+ tokens.shift
304
+ [min, max]
305
+ end
306
+
307
+ # @private
308
+ # Builds Ruby code for a sequence
309
+ # (head *first_terms variadic_term *last_terms)
310
+ class Sequence < SimpleDelegator
311
+ def initialize(compiler, *arity_term_list)
312
+ @arities, @terms = arity_term_list.transpose
313
+
314
+ super(compiler)
315
+ @variadic_index = @arities.find_index { |a| a.is_a?(Range) }
316
+ fail_due_to 'multiple variable patterns in same sequence' \
317
+ if @variadic_index && !@arities.one? { |a| a.is_a?(Range) }
318
+ end
319
+
320
+ def compile
321
+ [
322
+ compile_guard_clause,
323
+ compile_child_nb_guard,
324
+ compile_seq_head,
325
+ *compile_first_terms,
326
+ compile_variadic_term,
327
+ *compile_last_terms
328
+ ].compact.join(" &&\n") << SEQ_HEAD_GUARD
329
+ end
330
+
331
+ private
332
+
333
+ def first_terms_arity
334
+ first_terms_range { |r| @arities[r].inject(0, :+) } || 0
335
+ end
336
+
337
+ def last_terms_arity
338
+ last_terms_range { |r| @arities[r].inject(0, :+) } || 0
339
+ end
340
+
341
+ def variadic_term_min_arity
342
+ @variadic_index ? @arities[@variadic_index].begin : 0
343
+ end
344
+
345
+ def first_terms_range
346
+ yield 1..(@variadic_index || @terms.size) - 1 if seq_head?
347
+ end
348
+
349
+ def last_terms_range
350
+ yield @variadic_index + 1...@terms.size if @variadic_index
351
+ end
352
+
353
+ def seq_head?
354
+ @variadic_index != 0
355
+ end
356
+
357
+ def compile_child_nb_guard
358
+ fixed = first_terms_arity + last_terms_arity
359
+ min = fixed + variadic_term_min_arity
360
+ op = if @variadic_index
361
+ max_variadic = @arities[@variadic_index].end
362
+ if max_variadic != Float::INFINITY
363
+ range = min..fixed + max_variadic
364
+ return "(#{range}).cover?(#{CUR_NODE}.children.size)"
365
+ end
366
+ '>='
367
+ else
368
+ '=='
369
+ end
370
+ "#{CUR_NODE}.children.size #{op} #{min}"
371
+ end
372
+
373
+ def term(index, range)
374
+ t = @terms[index]
375
+ if t.respond_to? :call
376
+ t.call(range)
377
+ else
378
+ with_child_context(t, range.begin)
379
+ end
380
+ end
381
+
382
+ def compile_seq_head
383
+ return unless seq_head?
384
+
385
+ fail_due_to 'sequences cannot start with <' \
386
+ if @terms[0].respond_to? :call
387
+
388
+ with_seq_head_context(@terms[0])
389
+ end
390
+
391
+ def compile_first_terms
392
+ first_terms_range { |range| compile_terms(range, 0) }
393
+ end
394
+
395
+ def compile_last_terms
396
+ last_terms_range { |r| compile_terms(r, -last_terms_arity) }
397
+ end
398
+
399
+ def compile_terms(index_range, start)
400
+ index_range.map do |i|
401
+ current = start
402
+ start += @arities.fetch(i)
403
+ term(i, current..start - 1)
404
+ end
405
+ end
406
+
407
+ def compile_variadic_term
408
+ variadic_arity { |arity| term(@variadic_index, arity) }
409
+ end
410
+
411
+ def variadic_arity
412
+ return unless @variadic_index
413
+
414
+ first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX
415
+ yield first..-last_terms_arity - 1
416
+ end
417
+ end
418
+ private_constant :Sequence
419
+
420
+ def compile_captured_ellipsis
421
+ capture = next_capture
422
+ block = lambda { |range|
423
+ # Consider ($...) like (_ $...):
424
+ range = 0..range.end if range.begin == SEQ_HEAD_INDEX
425
+ "(#{capture} = #{CUR_NODE}.children[#{range}])"
426
+ }
427
+ [0..Float::INFINITY, block]
428
+ end
429
+
430
+ def compile_ellipsis
431
+ [0..Float::INFINITY, 'true']
432
+ end
433
+
434
+ # rubocop:disable Metrics/AbcSize
435
+ # rubocop:disable Metrics/MethodLength
436
+ def compile_any_order(capture_all = nil)
437
+ rest = capture_rest = nil
438
+ patterns = []
439
+ with_temp_variables do |child, matched|
440
+ tokens_until('>', 'any child') do
441
+ fail_due_to 'ellipsis must be at the end of <>' if rest
442
+ token = tokens.shift
443
+ case token
444
+ when CAPTURED_REST then rest = capture_rest = next_capture
445
+ when REST then rest = true
446
+ else patterns << compile_expr(token)
447
+ end
448
+ end
449
+ [rest ? patterns.size..Float::INFINITY : patterns.size,
450
+ ->(range) { ANY_ORDER_TEMPLATE.result(binding) }]
451
+ end
452
+ end
453
+ # rubocop:enable Metrics/MethodLength
454
+ # rubocop:enable Metrics/AbcSize
455
+
456
+ def insure_same_captures(enum, what)
457
+ return to_enum __method__, enum, what unless block_given?
458
+
459
+ captures_before = captures_after = nil
460
+ enum.each do
461
+ captures_before ||= @captures
462
+ @captures = captures_before
463
+ yield
464
+ captures_after ||= @captures
465
+ fail_due_to("each #{what} must have same # of captures") if captures_after != @captures
466
+ end
467
+ end
468
+
469
+ def access_unify(name)
470
+ var = @unify[name]
471
+
472
+ if var == :forbidden_unification
473
+ fail_due_to "Wildcard #{name} was first seen in a subset of a" \
474
+ " union and can't be used outside that union"
475
+ end
476
+ var
477
+ end
478
+
479
+ def forbid_unification(*names)
480
+ names.each do |name|
481
+ @unify[name] = :forbidden_unification
482
+ end
483
+ end
484
+
485
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
486
+ def unify_in_union(enum)
487
+ # We need to reset @unify before each branch is processed.
488
+ # Moreover we need to keep track of newly encountered wildcards.
489
+ # Var `new_unify_intersection` will hold those that are encountered
490
+ # in all branches; these are not a problem.
491
+ # Var `partial_unify` will hold those encountered in only a subset
492
+ # of the branches; these can't be used outside of the union.
493
+
494
+ return to_enum __method__, enum unless block_given?
495
+
496
+ new_unify_intersection = nil
497
+ partial_unify = []
498
+ unify_before = @unify.dup
499
+
500
+ result = enum.each do |e|
501
+ @unify = unify_before.dup if new_unify_intersection
502
+ yield e
503
+ new_unify = @unify.keys - unify_before.keys
504
+ if new_unify_intersection.nil?
505
+ # First iteration
506
+ new_unify_intersection = new_unify
507
+ else
508
+ union = new_unify_intersection | new_unify
509
+ new_unify_intersection &= new_unify
510
+ partial_unify |= union - new_unify_intersection
511
+ end
512
+ end
513
+
514
+ # At this point, all members of `new_unify_intersection` can be used
515
+ # for unification outside of the union, but partial_unify may not
516
+
517
+ forbid_unification(*partial_unify)
518
+
519
+ result
520
+ end
521
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
522
+
523
+ def compile_union
524
+ # we need to ensure that each branch of the {} contains the same
525
+ # number of captures (since only one branch of the {} can actually
526
+ # match, the same variables are used to hold the captures for each
527
+ # branch)
528
+ enum = tokens_until('}', 'union')
529
+ enum = unify_in_union(enum)
530
+ terms = insure_same_captures(enum, 'branch of {}')
531
+ .map { compile_expr }
532
+
533
+ "(#{terms.join(' || ')})"
534
+ end
535
+
536
+ def compile_intersect
537
+ tokens_until(']', 'intersection')
538
+ .map { compile_expr }
539
+ .join(' && ')
540
+ end
541
+
542
+ def compile_capture
543
+ "(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})"
544
+ end
545
+
546
+ def compile_negation
547
+ "!(#{compile_expr})"
548
+ end
549
+
550
+ def compile_ascend
551
+ with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent")
552
+ end
553
+
554
+ def compile_descend
555
+ with_temp_variables do |descendant|
556
+ pattern = with_context(compile_expr, descendant,
557
+ use_temp_node: false)
558
+ [
559
+ "RuboCop::AST::NodePattern.descend(#{CUR_ELEMENT}).",
560
+ "any? do |#{descendant}|",
561
+ " #{pattern}",
562
+ 'end'
563
+ ].join("\n")
564
+ end
565
+ end
566
+
567
+ def compile_wildcard(name)
568
+ if name.empty?
569
+ 'true'
570
+ elsif @unify.key?(name)
571
+ # we have already seen a wildcard with this name before
572
+ # so the value it matched the first time will already be stored
573
+ # in a temp. check if this value matches the one stored in the temp
574
+ "#{CUR_ELEMENT} == #{access_unify(name)}"
575
+ else
576
+ n = @unify[name] = "unify_#{name.gsub('-', '__')}"
577
+ # double assign to avoid "assigned but unused variable"
578
+ "(#{n} = #{CUR_ELEMENT}; " \
579
+ "#{n} = #{n}; true)"
580
+ end
581
+ end
582
+
583
+ def compile_literal(literal)
584
+ "#{CUR_ELEMENT} == #{literal}"
585
+ end
586
+
587
+ def compile_predicate(predicate)
588
+ if predicate.end_with?('(') # is there an arglist?
589
+ args = compile_args(tokens)
590
+ predicate = predicate[0..-2] # drop the trailing (
591
+ "#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})"
592
+ else
593
+ "#{CUR_ELEMENT}.#{predicate}"
594
+ end
595
+ end
596
+
597
+ def compile_funcall(method)
598
+ # call a method in the context which this pattern-matching
599
+ # code is used in. pass target value as an argument
600
+ method = method[1..-1] # drop the leading #
601
+ if method.end_with?('(') # is there an arglist?
602
+ args = compile_args(tokens)
603
+ method = method[0..-2] # drop the trailing (
604
+ "#{method}(#{CUR_ELEMENT},#{args.join(',')})"
605
+ else
606
+ "#{method}(#{CUR_ELEMENT})"
607
+ end
608
+ end
609
+
610
+ def compile_nodetype(type)
611
+ "#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?"
612
+ end
613
+
614
+ def compile_param(number)
615
+ "#{CUR_ELEMENT} == #{get_param(number)}"
616
+ end
617
+
618
+ def compile_args(tokens)
619
+ index = tokens.find_index { |token| token == ')' }
620
+
621
+ tokens.slice!(0..index).each_with_object([]) do |token, args|
622
+ next if [')', ','].include?(token)
623
+
624
+ args << compile_arg(token)
625
+ end
626
+ end
627
+
628
+ def compile_arg(token)
629
+ case token
630
+ when WILDCARD then
631
+ name = token[1..-1]
632
+ access_unify(name) || fail_due_to('invalid in arglist: ' + token)
633
+ when LITERAL then token
634
+ when PARAM then get_param(token[1..-1])
635
+ when CLOSING then fail_due_to("#{token} in invalid position")
636
+ when nil then fail_due_to('pattern ended prematurely')
637
+ else fail_due_to("invalid token in arglist: #{token.inspect}")
638
+ end
639
+ end
640
+
641
+ def next_capture
642
+ index = @captures
643
+ @captures += 1
644
+ "captures[#{index}]"
645
+ end
646
+
647
+ def get_param(number)
648
+ number = number.empty? ? 1 : Integer(number)
649
+ @params = number if number > @params
650
+ number.zero? ? @root : "param#{number}"
651
+ end
652
+
653
+ def emit_yield_capture(when_no_capture = '')
654
+ yield_val = if @captures.zero?
655
+ when_no_capture
656
+ elsif @captures == 1
657
+ 'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710
658
+ else
659
+ '*captures'
660
+ end
661
+ "yield(#{yield_val})"
662
+ end
663
+
664
+ def emit_retval
665
+ if @captures.zero?
666
+ 'true'
667
+ elsif @captures == 1
668
+ 'captures[0]'
669
+ else
670
+ 'captures'
671
+ end
672
+ end
673
+
674
+ def emit_param_list
675
+ (1..@params).map { |n| "param#{n}" }.join(',')
676
+ end
677
+
678
+ def emit_trailing_params
679
+ params = emit_param_list
680
+ params.empty? ? '' : ",#{params}"
681
+ end
682
+
683
+ def emit_method_code
684
+ <<~RUBY
685
+ return unless #{@match_code}
686
+ block_given? ? #{emit_yield_capture} : (return #{emit_retval})
687
+ RUBY
688
+ end
689
+
690
+ def fail_due_to(message)
691
+ raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}"
692
+ end
693
+
694
+ def with_temp_node(cur_node)
695
+ with_temp_variables do |node|
696
+ yield "(#{node} = #{cur_node})", node
697
+ end
698
+ .gsub("\n", "\n ") # Nicer indent for debugging
699
+ end
700
+
701
+ def with_temp_variables(&block)
702
+ names = block.parameters.map { |_, name| next_temp_variable(name) }
703
+ yield(*names)
704
+ end
705
+
706
+ def next_temp_variable(name)
707
+ "#{name}#{next_temp_value}"
708
+ end
709
+
710
+ def next_temp_value
711
+ @temps += 1
712
+ end
713
+
714
+ def auto_use_temp_node?(code)
715
+ code.scan(CUR_PLACEHOLDER).count > 1
716
+ end
717
+
718
+ # with_<...>_context methods are used whenever the context,
719
+ # i.e the current node or the current element can be determined.
720
+
721
+ def with_child_context(code, child_index)
722
+ with_context(code, "#{CUR_NODE}.children[#{child_index}]")
723
+ end
724
+
725
+ def with_context(code, cur_node,
726
+ use_temp_node: auto_use_temp_node?(code))
727
+ if use_temp_node
728
+ with_temp_node(cur_node) do |init, temp_var|
729
+ substitute_cur_node(code, temp_var, first_cur_node: init)
730
+ end
731
+ else
732
+ substitute_cur_node(code, cur_node)
733
+ end
734
+ end
735
+
736
+ def with_seq_head_context(code)
737
+ fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD)
738
+
739
+ code.gsub CUR_ELEMENT, "#{CUR_NODE}.type"
740
+ end
741
+
742
+ def substitute_cur_node(code, cur_node, first_cur_node: cur_node)
743
+ iter = 0
744
+ code
745
+ .gsub(CUR_ELEMENT, CUR_NODE)
746
+ .gsub(CUR_NODE) do
747
+ iter += 1
748
+ iter == 1 ? first_cur_node : cur_node
749
+ end
750
+ .gsub(SEQ_HEAD_GUARD, '')
751
+ end
752
+
753
+ def self.tokens(pattern)
754
+ pattern.scan(TOKEN).reject { |token| token =~ /\A#{SEPARATORS}\Z/ }
755
+ end
756
+ end
757
+ private_constant :Compiler
758
+
759
+ # Helpers for defining methods based on a pattern string
760
+ module Macros
761
+ # Define a method which applies a pattern to an AST node
762
+ #
763
+ # The new method will return nil if the node does not match
764
+ # If the node matches, and a block is provided, the new method will
765
+ # yield to the block (passing any captures as block arguments).
766
+ # If the node matches, and no block is provided, the new method will
767
+ # return the captures, or `true` if there were none.
768
+ def def_node_matcher(method_name, pattern_str)
769
+ compiler = Compiler.new(pattern_str, 'node')
770
+ src = "def #{method_name}(node = self" \
771
+ "#{compiler.emit_trailing_params});" \
772
+ "#{compiler.emit_method_code};end"
773
+
774
+ location = caller_locations(1, 1).first
775
+ class_eval(src, location.path, location.lineno)
776
+ end
777
+
778
+ # Define a method which recurses over the descendants of an AST node,
779
+ # checking whether any of them match the provided pattern
780
+ #
781
+ # If the method name ends with '?', the new method will return `true`
782
+ # as soon as it finds a descendant which matches. Otherwise, it will
783
+ # yield all descendants which match.
784
+ def def_node_search(method_name, pattern_str)
785
+ compiler = Compiler.new(pattern_str, 'node')
786
+ called_from = caller(1..1).first.split(':')
787
+
788
+ if method_name.to_s.end_with?('?')
789
+ node_search_first(method_name, compiler, called_from)
790
+ else
791
+ node_search_all(method_name, compiler, called_from)
792
+ end
793
+ end
794
+
795
+ def node_search_first(method_name, compiler, called_from)
796
+ node_search(method_name, compiler, 'return true', '', called_from)
797
+ end
798
+
799
+ def node_search_all(method_name, compiler, called_from)
800
+ yield_code = compiler.emit_yield_capture('node')
801
+ prelude = "return enum_for(:#{method_name}, node0" \
802
+ "#{compiler.emit_trailing_params}) unless block_given?"
803
+
804
+ node_search(method_name, compiler, yield_code, prelude, called_from)
805
+ end
806
+
807
+ def node_search(method_name, compiler, on_match, prelude, called_from)
808
+ src = node_search_body(method_name, compiler.emit_trailing_params,
809
+ prelude, compiler.match_code, on_match)
810
+ filename, lineno = *called_from
811
+ class_eval(src, filename, lineno.to_i)
812
+ end
813
+
814
+ def node_search_body(method_name, trailing_params, prelude, match_code,
815
+ on_match)
816
+ <<~RUBY
817
+ def #{method_name}(node0#{trailing_params})
818
+ #{prelude}
819
+ node0.each_node do |node|
820
+ if #{match_code}
821
+ #{on_match}
822
+ end
823
+ end
824
+ nil
825
+ end
826
+ RUBY
827
+ end
828
+ end
829
+
830
+ attr_reader :pattern
831
+
832
+ def initialize(str)
833
+ @pattern = str
834
+ compiler = Compiler.new(str)
835
+ src = "def match(node0#{compiler.emit_trailing_params});" \
836
+ "#{compiler.emit_method_code}end"
837
+ instance_eval(src, __FILE__, __LINE__ + 1)
838
+ end
839
+
840
+ def match(*args)
841
+ # If we're here, it's because the singleton method has not been defined,
842
+ # either because we've been dup'ed or serialized through YAML
843
+ initialize(pattern)
844
+ match(*args)
845
+ end
846
+
847
+ def marshal_load(pattern)
848
+ initialize pattern
849
+ end
850
+
851
+ def marshal_dump
852
+ pattern
853
+ end
854
+
855
+ def ==(other)
856
+ other.is_a?(NodePattern) &&
857
+ Compiler.tokens(other.pattern) == Compiler.tokens(pattern)
858
+ end
859
+ alias eql? ==
860
+
861
+ def to_s
862
+ "#<#{self.class} #{pattern}>"
863
+ end
864
+
865
+ # Yields its argument and any descendants, depth-first.
866
+ #
867
+ def self.descend(element, &block)
868
+ return to_enum(__method__, element) unless block_given?
869
+
870
+ yield element
871
+
872
+ if element.is_a?(::RuboCop::AST::Node)
873
+ element.children.each do |child|
874
+ descend(child, &block)
875
+ end
876
+ end
877
+
878
+ nil
879
+ end
880
+ end
881
+ end
882
+ end
883
+ # rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity