rubocop-ast 0.0.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +23 -4
  3. data/lib/rubocop/ast.rb +10 -7
  4. data/lib/rubocop/ast/builder.rb +6 -1
  5. data/lib/rubocop/ast/ext/range.rb +28 -0
  6. data/lib/rubocop/ast/node.rb +41 -8
  7. data/lib/rubocop/ast/node/array_node.rb +2 -8
  8. data/lib/rubocop/ast/node/break_node.rb +1 -6
  9. data/lib/rubocop/ast/node/case_match_node.rb +3 -9
  10. data/lib/rubocop/ast/node/case_node.rb +13 -9
  11. data/lib/rubocop/ast/node/def_node.rb +5 -24
  12. data/lib/rubocop/ast/node/defined_node.rb +2 -0
  13. data/lib/rubocop/ast/node/float_node.rb +1 -0
  14. data/lib/rubocop/ast/node/forward_args_node.rb +15 -0
  15. data/lib/rubocop/ast/node/hash_node.rb +21 -8
  16. data/lib/rubocop/ast/node/if_node.rb +7 -14
  17. data/lib/rubocop/ast/node/index_node.rb +48 -0
  18. data/lib/rubocop/ast/node/indexasgn_node.rb +50 -0
  19. data/lib/rubocop/ast/node/int_node.rb +1 -0
  20. data/lib/rubocop/ast/node/lambda_node.rb +65 -0
  21. data/lib/rubocop/ast/node/mixin/method_dispatch_node.rb +2 -8
  22. data/lib/rubocop/ast/node/mixin/method_identifier_predicates.rb +99 -3
  23. data/lib/rubocop/ast/node/mixin/parameterized_node.rb +56 -0
  24. data/lib/rubocop/ast/node/next_node.rb +12 -0
  25. data/lib/rubocop/ast/node/pair_node.rb +2 -2
  26. data/lib/rubocop/ast/node/regexp_node.rb +61 -2
  27. data/lib/rubocop/ast/node/return_node.rb +1 -13
  28. data/lib/rubocop/ast/node/send_node.rb +9 -2
  29. data/lib/rubocop/ast/node/super_node.rb +2 -0
  30. data/lib/rubocop/ast/node/when_node.rb +3 -9
  31. data/lib/rubocop/ast/node/yield_node.rb +2 -0
  32. data/lib/rubocop/ast/node_pattern.rb +952 -0
  33. data/lib/rubocop/ast/processed_source.rb +246 -0
  34. data/lib/rubocop/ast/token.rb +116 -0
  35. data/lib/rubocop/ast/traversal.rb +5 -3
  36. data/lib/rubocop/ast/version.rb +1 -1
  37. metadata +16 -13
  38. data/lib/rubocop/ast/node/retry_node.rb +0 -17
  39. data/lib/rubocop/error.rb +0 -34
  40. data/lib/rubocop/node_pattern.rb +0 -881
  41. data/lib/rubocop/processed_source.rb +0 -211
  42. data/lib/rubocop/token.rb +0 -114
@@ -2,8 +2,11 @@
2
2
 
3
3
  module RuboCop
4
4
  module AST
5
+ # Requires implementing `arguments`.
6
+ #
5
7
  # Common functionality for nodes that are parameterized:
6
8
  # `send`, `super`, `zsuper`, `def`, `defs`
9
+ # and (modern only): `index`, `indexasgn`, `lambda`
7
10
  module ParameterizedNode
8
11
  # Checks whether this node's arguments are wrapped in parentheses.
9
12
  #
@@ -56,6 +59,59 @@ module RuboCop
56
59
  arguments? &&
57
60
  (last_argument.block_pass_type? || last_argument.blockarg_type?)
58
61
  end
62
+
63
+ # A specialized `ParameterizedNode` for node that have a single child
64
+ # containing either `nil`, an argument, or a `begin` node with all the
65
+ # arguments
66
+ module WrappedArguments
67
+ include ParameterizedNode
68
+ # @return [Array] The arguments of the node.
69
+ def arguments
70
+ first = children.first
71
+ if first&.begin_type?
72
+ first.children
73
+ else
74
+ children
75
+ end
76
+ end
77
+ end
78
+
79
+ # A specialized `ParameterizedNode`.
80
+ # Requires implementing `first_argument_index`
81
+ # Implements `arguments` as `children[first_argument_index..-1]`
82
+ # and optimizes other calls
83
+ module RestArguments
84
+ include ParameterizedNode
85
+ # @return [Array] arguments, if any
86
+ def arguments
87
+ children[first_argument_index..-1]
88
+ end
89
+
90
+ # A shorthand for getting the first argument of the node.
91
+ # Equivalent to `arguments.first`.
92
+ #
93
+ # @return [Node, nil] the first argument of the node,
94
+ # or `nil` if there are no arguments
95
+ def first_argument
96
+ children[first_argument_index]
97
+ end
98
+
99
+ # A shorthand for getting the last argument of the node.
100
+ # Equivalent to `arguments.last`.
101
+ #
102
+ # @return [Node, nil] the last argument of the node,
103
+ # or `nil` if there are no arguments
104
+ def last_argument
105
+ children[-1] if arguments?
106
+ end
107
+
108
+ # Checks whether this node has any arguments.
109
+ #
110
+ # @return [Boolean] whether this node has any arguments
111
+ def arguments?
112
+ children.size > first_argument_index
113
+ end
114
+ end
59
115
  end
60
116
  end
61
117
  end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RuboCop
4
+ module AST
5
+ # A node extension for `next` nodes. This will be used in place of a
6
+ # plain node when the builder constructs the AST, making its methods
7
+ # available to all `next` nodes within RuboCop.
8
+ class NextNode < Node
9
+ include ParameterizedNode::WrappedArguments
10
+ end
11
+ end
12
+ end
@@ -32,7 +32,7 @@ module RuboCop
32
32
  #
33
33
  # @param [Boolean] with_spacing whether to include spacing
34
34
  # @return [String] the delimiter of the `pair`
35
- def delimiter(with_spacing = false)
35
+ def delimiter(*deprecated, with_spacing: deprecated.first)
36
36
  if with_spacing
37
37
  hash_rocket? ? SPACED_HASH_ROCKET : SPACED_COLON
38
38
  else
@@ -44,7 +44,7 @@ module RuboCop
44
44
  #
45
45
  # @param [Boolean] with_spacing whether to include spacing
46
46
  # @return [String] the inverse delimiter of the `pair`
47
- def inverse_delimiter(with_spacing = false)
47
+ def inverse_delimiter(*deprecated, with_spacing: deprecated.first)
48
48
  if with_spacing
49
49
  hash_rocket? ? SPACED_COLON : SPACED_HASH_ROCKET
50
50
  else
@@ -10,12 +10,15 @@ module RuboCop
10
10
  x: Regexp::EXTENDED,
11
11
  i: Regexp::IGNORECASE,
12
12
  m: Regexp::MULTILINE,
13
- n: Regexp::NOENCODING
13
+ n: Regexp::NOENCODING,
14
+ o: 0
14
15
  }.freeze
15
16
 
17
+ # Note: The 'o' option is ignored.
18
+ #
16
19
  # @return [Regexp] a regexp of this node
17
20
  def to_regexp
18
- option = regopt.children.map { |opt| OPTIONS[opt] }.inject(:|)
21
+ option = regopt.children.map { |opt| OPTIONS.fetch(opt) }.inject(:|)
19
22
  Regexp.new(content, option)
20
23
  end
21
24
 
@@ -28,6 +31,62 @@ module RuboCop
28
31
  def content
29
32
  children.select(&:str_type?).map(&:str_content).join
30
33
  end
34
+
35
+ # @return [Bool] if the regexp is a /.../ literal
36
+ def slash_literal?
37
+ loc.begin.source == '/'
38
+ end
39
+
40
+ # @return [Bool] if the regexp is a %r{...} literal (using any delimiters)
41
+ def percent_r_literal?
42
+ !slash_literal?
43
+ end
44
+
45
+ # @return [String] the regexp delimiters (without %r)
46
+ def delimiters
47
+ [loc.begin.source[-1], loc.end.source[0]]
48
+ end
49
+
50
+ # @return [Bool] if char is one of the delimiters
51
+ def delimiter?(char)
52
+ delimiters.include?(char)
53
+ end
54
+
55
+ # @return [Bool] if regexp contains interpolation
56
+ def interpolation?
57
+ children.any?(&:begin_type?)
58
+ end
59
+
60
+ # @return [Bool] if regexp uses the multiline regopt
61
+ def multiline_mode?
62
+ regopt_include?(:m)
63
+ end
64
+
65
+ # @return [Bool] if regexp uses the extended regopt
66
+ def extended?
67
+ regopt_include?(:x)
68
+ end
69
+
70
+ # @return [Bool] if regexp uses the ignore-case regopt
71
+ def ignore_case?
72
+ regopt_include?(:i)
73
+ end
74
+
75
+ # @return [Bool] if regexp uses the single-interpolation regopt
76
+ def single_interpolation?
77
+ regopt_include?(:o)
78
+ end
79
+
80
+ # @return [Bool] if regexp uses the no-encoding regopt
81
+ def no_encoding?
82
+ regopt_include?(:n)
83
+ end
84
+
85
+ private
86
+
87
+ def regopt_include?(option)
88
+ regopt.children.include?(option)
89
+ end
31
90
  end
32
91
  end
33
92
  end
@@ -6,19 +6,7 @@ module RuboCop
6
6
  # plain node when the builder constructs the AST, making its methods
7
7
  # available to all `return` nodes within RuboCop.
8
8
  class ReturnNode < Node
9
- include MethodDispatchNode
10
- include ParameterizedNode
11
-
12
- # Returns the arguments of the `return`.
13
- #
14
- # @return [Array] The arguments of the `return`.
15
- def arguments
16
- if node_parts.one? && node_parts.first.begin_type?
17
- node_parts.first.children
18
- else
19
- node_parts
20
- end
21
- end
9
+ include ParameterizedNode::WrappedArguments
22
10
  end
23
11
  end
24
12
  end
@@ -6,12 +6,19 @@ module RuboCop
6
6
  # node when the builder constructs the AST, making its methods available
7
7
  # to all `send` nodes within RuboCop.
8
8
  class SendNode < Node
9
- include ParameterizedNode
9
+ include ParameterizedNode::RestArguments
10
10
  include MethodDispatchNode
11
11
 
12
12
  def_node_matcher :attribute_accessor?, <<~PATTERN
13
- (send nil? ${:attr_reader :attr_writer :attr_accessor :attr} $...)
13
+ [(send nil? ${:attr_reader :attr_writer :attr_accessor :attr} $...)
14
+ (_ _ _ _ ...)]
14
15
  PATTERN
16
+
17
+ private
18
+
19
+ def first_argument_index
20
+ 2
21
+ end
15
22
  end
16
23
  end
17
24
  end
@@ -16,6 +16,8 @@ module RuboCop
16
16
  def node_parts
17
17
  [nil, :super, *to_a]
18
18
  end
19
+
20
+ alias arguments children
19
21
  end
20
22
  end
21
23
  end
@@ -13,17 +13,11 @@ module RuboCop
13
13
  node_parts[0...-1]
14
14
  end
15
15
 
16
- # Calls the given block for each condition node in the `when` branch.
17
- # If no block is given, an `Enumerator` is returned.
18
- #
19
- # @return [self] if a block is given
20
- # @return [Enumerator] if no block is given
21
- def each_condition
16
+ # @deprecated Use `conditions.each`
17
+ def each_condition(&block)
22
18
  return conditions.to_enum(__method__) unless block_given?
23
19
 
24
- conditions.each do |condition|
25
- yield condition
26
- end
20
+ conditions.each(&block)
27
21
 
28
22
  self
29
23
  end
@@ -16,6 +16,8 @@ module RuboCop
16
16
  def node_parts
17
17
  [nil, :yield, *to_a]
18
18
  end
19
+
20
+ alias arguments children
19
21
  end
20
22
  end
21
23
  end
@@ -0,0 +1,952 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'delegate'
4
+ require 'erb'
5
+
6
+ # rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity
7
+ module RuboCop
8
+ module AST
9
+ # This class performs a pattern-matching operation on an AST node.
10
+ #
11
+ # Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then
12
+ # pass an AST node to `NodePattern#match`. Alternatively, use one of the class
13
+ # macros in `NodePattern::Macros` to define your own pattern-matching method.
14
+ #
15
+ # If the match fails, `nil` will be returned. If the match succeeds, the
16
+ # return value depends on whether a block was provided to `#match`, and
17
+ # whether the pattern contained any "captures" (values which are extracted
18
+ # from a matching AST.)
19
+ #
20
+ # - With block: #match yields the captures (if any) and passes the return
21
+ # value of the block through.
22
+ # - With no block, but one capture: the capture is returned.
23
+ # - With no block, but multiple captures: captures are returned as an array.
24
+ # - With no block and no captures: #match returns `true`.
25
+ #
26
+ # ## Pattern string format examples
27
+ #
28
+ # ':sym' # matches a literal symbol
29
+ # '1' # matches a literal integer
30
+ # 'nil' # matches a literal nil
31
+ # 'send' # matches (send ...)
32
+ # '(send)' # matches (send)
33
+ # '(send ...)' # matches (send ...)
34
+ # '(op-asgn)' # node types with hyphenated names also work
35
+ # '{send class}' # matches (send ...) or (class ...)
36
+ # '({send class})' # matches (send) or (class)
37
+ # '(send const)' # matches (send (const ...))
38
+ # '(send _ :new)' # matches (send <anything> :new)
39
+ # '(send $_ :new)' # as above, but whatever matches the $_ is captured
40
+ # '(send $_ $_)' # you can use as many captures as you want
41
+ # '(send !const ...)' # ! negates the next part of the pattern
42
+ # '$(send const ...)' # arbitrary matching can be performed on a capture
43
+ # '(send _recv _msg)' # wildcards can be named (for readability)
44
+ # '(send ... :new)' # you can match against the last children
45
+ # '(array <str sym>)' # you can match children in any order. This
46
+ # # would match `['x', :y]` as well as `[:y, 'x']
47
+ # '(_ <str sym ...>)' # will match if arguments have at least a `str` and
48
+ # # a `sym` node, but can have more.
49
+ # '(array <$str $_>)' # captures are in the order of the pattern,
50
+ # # irrespective of the actual order of the children
51
+ # '(array int*)' # will match an array of 0 or more integers
52
+ # '(array int ?)' # will match 0 or 1 integer.
53
+ # # Note: Space needed to distinguish from int?
54
+ # '(array int+)' # will match an array of 1 or more integers
55
+ # '(array (int $_)+)' # as above and will capture the numbers in an array
56
+ # '(send $...)' # capture all the children as an array
57
+ # '(send $... int)' # capture all children but the last as an array
58
+ # '(send _x :+ _x)' # unification is performed on named wildcards
59
+ # # (like Prolog variables...)
60
+ # # (#== is used to see if values unify)
61
+ # '(int odd?)' # words which end with a ? are predicate methods,
62
+ # # are are called on the target to see if it matches
63
+ # # any Ruby method which the matched object supports
64
+ # # can be used
65
+ # # if a truthy value is returned, the match succeeds
66
+ # '(int [!1 !2])' # [] contains multiple patterns, ALL of which must
67
+ # # match in that position
68
+ # # in other words, while {} is pattern union (logical
69
+ # # OR), [] is intersection (logical AND)
70
+ # '(send %1 _)' # % stands for a parameter which must be supplied to
71
+ # # #match at matching time
72
+ # # it will be compared to the corresponding value in
73
+ # # the AST using #=== so you can pass Procs, Regexp,
74
+ # # etc. in addition to Nodes or literals.
75
+ # # `Array#===` will never match a node element, but
76
+ # # `Set#===` is an alias to `Set#include?` (Ruby 2.5+
77
+ # # only), and so can be very useful to match within
78
+ # # many possible literals / Nodes.
79
+ # # a bare '%' is the same as '%1'
80
+ # # the number of extra parameters passed to #match
81
+ # # must equal the highest % value in the pattern
82
+ # # for consistency, %0 is the 'root node' which is
83
+ # # passed as the 1st argument to #match, where the
84
+ # # matching process starts
85
+ # '(send _ %named)' # arguments can also be passed as named
86
+ # # parameters (see `%1`)
87
+ # # Note that the macros `def_node_matcher` and
88
+ # # `def_node_search` accept default values for these.
89
+ # '(send _ %CONST)' # the named constant will act like `%1` and `%named`.
90
+ # '^^send' # each ^ ascends one level in the AST
91
+ # # so this matches against the grandparent node
92
+ # '`send' # descends any number of level in the AST
93
+ # # so this matches against any descendant node
94
+ # '#method' # we call this a 'funcall'; it calls a method in the
95
+ # # context where a pattern-matching method is defined
96
+ # # if that returns a truthy value, the match succeeds
97
+ # 'equal?(%1)' # predicates can be given 1 or more extra args
98
+ # '#method(%0, 1)' # funcalls can also be given 1 or more extra args
99
+ # # These arguments can be patterns themselves, in
100
+ # # which case a matcher responding to === will be
101
+ # # passed.
102
+ # '# comment' # comments are accepted at the end of lines
103
+ #
104
+ # You can nest arbitrarily deep:
105
+ #
106
+ # # matches node parsed from 'Const = Class.new' or 'Const = Module.new':
107
+ # '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))'
108
+ # # matches a node parsed from an 'if', with a '==' comparison,
109
+ # # and no 'else' branch:
110
+ # '(if (send _ :== _) _ nil?)'
111
+ #
112
+ # Note that patterns like 'send' are implemented by calling `#send_type?` on
113
+ # the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`,
114
+ # and so on. Therefore, if you add methods which are named like
115
+ # `#prefix_type?` to the AST node class, then 'prefix' will become usable as
116
+ # a pattern.
117
+ class NodePattern
118
+ # @private
119
+ Invalid = Class.new(StandardError)
120
+
121
+ # @private
122
+ # Builds Ruby code which implements a pattern
123
+ class Compiler
124
+ SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze
125
+ IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze
126
+ COMMENT = /#\s.*$/.freeze
127
+
128
+ META = Regexp.union(
129
+ %w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?"
130
+ ).freeze
131
+ NUMBER = /-?\d+(?:\.\d+)?/.freeze
132
+ STRING = /".+?"/.freeze
133
+ METHOD_NAME = /\#?#{IDENTIFIER}[!?]?\(?/.freeze
134
+ PARAM_CONST = /%[A-Z:][a-zA-Z_:]+/.freeze
135
+ KEYWORD_NAME = /%[a-z_]+/.freeze
136
+ PARAM_NUMBER = /%\d*/.freeze
137
+
138
+ SEPARATORS = /\s+/.freeze
139
+ ONLY_SEPARATOR = /\A#{SEPARATORS}\Z/.freeze
140
+
141
+ TOKENS = Regexp.union(META, PARAM_CONST, KEYWORD_NAME, PARAM_NUMBER, NUMBER,
142
+ METHOD_NAME, SYMBOL, STRING)
143
+
144
+ TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze
145
+
146
+ NODE = /\A#{IDENTIFIER}\Z/.freeze
147
+ PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze
148
+ WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze
149
+
150
+ FUNCALL = /\A\##{METHOD_NAME}/.freeze
151
+ LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze
152
+ PARAM = /\A#{PARAM_NUMBER}\Z/.freeze
153
+ CONST = /\A#{PARAM_CONST}\Z/.freeze
154
+ KEYWORD = /\A#{KEYWORD_NAME}\Z/.freeze
155
+ CLOSING = /\A(?:\)|\}|\])\Z/.freeze
156
+
157
+ REST = '...'
158
+ CAPTURED_REST = '$...'
159
+
160
+ attr_reader :match_code, :tokens, :captures
161
+
162
+ SEQ_HEAD_INDEX = -1
163
+
164
+ # Placeholders while compiling, see with_..._context methods
165
+ CUR_PLACEHOLDER = '@@@cur'
166
+ CUR_NODE = "#{CUR_PLACEHOLDER} node@@@"
167
+ CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@"
168
+ SEQ_HEAD_GUARD = '@@@seq guard head@@@'
169
+ MULTIPLE_CUR_PLACEHOLDER = /#{CUR_PLACEHOLDER}.*#{CUR_PLACEHOLDER}/.freeze
170
+
171
+ line = __LINE__
172
+ ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
173
+ <% if capture_rest %>(<%= capture_rest %> = []) && <% end -%>
174
+ <% if capture_all %>(<%= capture_all %> = <% end -%>
175
+ <%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%>
176
+ .each_with_object({}) { |<%= child %>, <%= matched %>|
177
+ case
178
+ <% patterns.each_with_index do |pattern, i| -%>
179
+ when !<%= matched %>[<%= i %>] && <%=
180
+ with_context(pattern, child, use_temp_node: false)
181
+ %> then <%= matched %>[<%= i %>] = true
182
+ <% end -%>
183
+ <% if !rest %> else break({})
184
+ <% elsif capture_rest %> else <%= capture_rest %> << <%= child %>
185
+ <% end -%>
186
+ end
187
+ }.size == <%= patterns.size -%>
188
+ RUBY
189
+ ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1]
190
+
191
+ line = __LINE__
192
+ REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
193
+ <% if captured %>(<%= accumulate %> = Array.new) && <% end %>
194
+ <%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>|
195
+ <%= with_context(expr, child, use_temp_node: false) %><% if captured %>&&
196
+ <%= accumulate %>.push(<%= captured %>)<% end %>
197
+ end <% if captured %>&&
198
+ (<%= captured %> = if <%= accumulate %>.empty?
199
+ <%= captured %>.map{[]} # Transpose hack won't work for empty case
200
+ else
201
+ <%= accumulate %>.transpose
202
+ end) <% end -%>
203
+ RUBY
204
+ REPEATED_TEMPLATE.location = [__FILE__, line + 1]
205
+
206
+ def initialize(str, root = 'node0', node_var = root)
207
+ @string = str
208
+ # For def_node_pattern, root == node_var
209
+ # For def_node_search, root is the root node to search on,
210
+ # and node_var is the current descendant being searched.
211
+ @root = root
212
+ @node_var = node_var
213
+
214
+ @temps = 0 # avoid name clashes between temp variables
215
+ @captures = 0 # number of captures seen
216
+ @unify = {} # named wildcard -> temp variable
217
+ @params = 0 # highest % (param) number seen
218
+ @keywords = Set[] # keyword parameters seen
219
+ run
220
+ end
221
+
222
+ def run
223
+ @tokens = Compiler.tokens(@string)
224
+
225
+ @match_code = with_context(compile_expr, @node_var, use_temp_node: false)
226
+ @match_code.prepend("(captures = Array.new(#{@captures})) && ") \
227
+ if @captures.positive?
228
+
229
+ fail_due_to('unbalanced pattern') unless tokens.empty?
230
+ end
231
+
232
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
233
+ def compile_expr(token = tokens.shift)
234
+ # read a single pattern-matching expression from the token stream,
235
+ # return Ruby code which performs the corresponding matching operation
236
+ #
237
+ # the 'pattern-matching' expression may be a composite which
238
+ # contains an arbitrary number of sub-expressions, but that composite
239
+ # must all have precedence higher or equal to that of `&&`
240
+ #
241
+ # Expressions may use placeholders like:
242
+ # CUR_NODE: Ruby code that evaluates to an AST node
243
+ # CUR_ELEMENT: Either the node or the type if in first element of
244
+ # a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...")
245
+ if (atom = compile_atom(token))
246
+ return atom_to_expr(atom)
247
+ end
248
+
249
+ case token
250
+ when '(' then compile_seq
251
+ when '{' then compile_union
252
+ when '[' then compile_intersect
253
+ when '!' then compile_negation
254
+ when '$' then compile_capture
255
+ when '^' then compile_ascend
256
+ when '`' then compile_descend
257
+ when WILDCARD then compile_new_wildcard(token[1..-1])
258
+ when FUNCALL then compile_funcall(token)
259
+ when PREDICATE then compile_predicate(token)
260
+ when NODE then compile_nodetype(token)
261
+ else fail_due_to("invalid token #{token.inspect}")
262
+ end
263
+ end
264
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
265
+
266
+ def tokens_until(stop, what)
267
+ return to_enum __method__, stop, what unless block_given?
268
+
269
+ fail_due_to("empty #{what}") if tokens.first == stop
270
+ yield until tokens.first == stop
271
+ tokens.shift
272
+ end
273
+
274
+ def compile_seq
275
+ terms = tokens_until(')', 'sequence').map { variadic_seq_term }
276
+ Sequence.new(self, *terms).compile
277
+ end
278
+
279
+ def compile_guard_clause
280
+ "#{CUR_NODE}.is_a?(RuboCop::AST::Node)"
281
+ end
282
+
283
+ def variadic_seq_term
284
+ token = tokens.shift
285
+ case token
286
+ when CAPTURED_REST then compile_captured_ellipsis
287
+ when REST then compile_ellipsis
288
+ when '$<' then compile_any_order(next_capture)
289
+ when '<' then compile_any_order
290
+ else compile_repeated_expr(token)
291
+ end
292
+ end
293
+
294
+ def compile_repeated_expr(token)
295
+ before = @captures
296
+ expr = compile_expr(token)
297
+ min, max = parse_repetition_token
298
+ return [1, expr] if min.nil?
299
+
300
+ if @captures != before
301
+ captured = "captures[#{before}...#{@captures}]"
302
+ accumulate = next_temp_variable(:accumulate)
303
+ end
304
+ arity = min..max || Float::INFINITY
305
+
306
+ [arity, repeated_generator(expr, captured, accumulate)]
307
+ end
308
+
309
+ def repeated_generator(expr, captured, accumulate)
310
+ with_temp_variables do |child|
311
+ lambda do |range|
312
+ fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX
313
+ REPEATED_TEMPLATE.result(binding)
314
+ end
315
+ end
316
+ end
317
+
318
+ def parse_repetition_token
319
+ case tokens.first
320
+ when '*' then min = 0
321
+ when '+' then min = 1
322
+ when '?' then min = 0
323
+ max = 1
324
+ else return
325
+ end
326
+ tokens.shift
327
+ [min, max]
328
+ end
329
+
330
+ # @private
331
+ # Builds Ruby code for a sequence
332
+ # (head *first_terms variadic_term *last_terms)
333
+ class Sequence
334
+ extend Forwardable
335
+ def_delegators :@compiler, :compile_guard_clause, :with_seq_head_context,
336
+ :with_child_context, :fail_due_to
337
+
338
+ def initialize(compiler, *arity_term_list)
339
+ @arities, @terms = arity_term_list.transpose
340
+
341
+ @compiler = compiler
342
+ @variadic_index = @arities.find_index { |a| a.is_a?(Range) }
343
+ fail_due_to 'multiple variable patterns in same sequence' \
344
+ if @variadic_index && !@arities.one? { |a| a.is_a?(Range) }
345
+ end
346
+
347
+ def compile
348
+ [
349
+ compile_guard_clause,
350
+ compile_child_nb_guard,
351
+ compile_seq_head,
352
+ *compile_first_terms,
353
+ compile_variadic_term,
354
+ *compile_last_terms
355
+ ].compact.join(" &&\n") << SEQ_HEAD_GUARD
356
+ end
357
+
358
+ private
359
+
360
+ def first_terms_arity
361
+ first_terms_range { |r| @arities[r].inject(0, :+) } || 0
362
+ end
363
+
364
+ def last_terms_arity
365
+ last_terms_range { |r| @arities[r].inject(0, :+) } || 0
366
+ end
367
+
368
+ def variadic_term_min_arity
369
+ @variadic_index ? @arities[@variadic_index].begin : 0
370
+ end
371
+
372
+ def first_terms_range
373
+ yield 1..(@variadic_index || @terms.size) - 1 if seq_head?
374
+ end
375
+
376
+ def last_terms_range
377
+ yield @variadic_index + 1...@terms.size if @variadic_index
378
+ end
379
+
380
+ def seq_head?
381
+ @variadic_index != 0
382
+ end
383
+
384
+ def compile_child_nb_guard
385
+ fixed = first_terms_arity + last_terms_arity
386
+ min = fixed + variadic_term_min_arity
387
+ op = if @variadic_index
388
+ max_variadic = @arities[@variadic_index].end
389
+ if max_variadic != Float::INFINITY
390
+ range = min..fixed + max_variadic
391
+ return "(#{range}).cover?(#{CUR_NODE}.children.size)"
392
+ end
393
+ '>='
394
+ else
395
+ '=='
396
+ end
397
+ "#{CUR_NODE}.children.size #{op} #{min}"
398
+ end
399
+
400
+ def term(index, range)
401
+ t = @terms[index]
402
+ if t.respond_to? :call
403
+ t.call(range)
404
+ else
405
+ with_child_context(t, range.begin)
406
+ end
407
+ end
408
+
409
+ def compile_seq_head
410
+ return unless seq_head?
411
+
412
+ fail_due_to 'sequences cannot start with <' \
413
+ if @terms[0].respond_to? :call
414
+
415
+ with_seq_head_context(@terms[0])
416
+ end
417
+
418
+ def compile_first_terms
419
+ first_terms_range { |range| compile_terms(range, 0) }
420
+ end
421
+
422
+ def compile_last_terms
423
+ last_terms_range { |r| compile_terms(r, -last_terms_arity) }
424
+ end
425
+
426
+ def compile_terms(index_range, start)
427
+ index_range.map do |i|
428
+ current = start
429
+ start += @arities.fetch(i)
430
+ term(i, current..start - 1)
431
+ end
432
+ end
433
+
434
+ def compile_variadic_term
435
+ variadic_arity { |arity| term(@variadic_index, arity) }
436
+ end
437
+
438
+ def variadic_arity
439
+ return unless @variadic_index
440
+
441
+ first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX
442
+ yield first..-last_terms_arity - 1
443
+ end
444
+ end
445
+ private_constant :Sequence
446
+
447
+ def compile_captured_ellipsis
448
+ capture = next_capture
449
+ block = lambda { |range|
450
+ # Consider ($...) like (_ $...):
451
+ range = 0..range.end if range.begin == SEQ_HEAD_INDEX
452
+ "(#{capture} = #{CUR_NODE}.children[#{range}])"
453
+ }
454
+ [0..Float::INFINITY, block]
455
+ end
456
+
457
+ def compile_ellipsis
458
+ [0..Float::INFINITY, 'true']
459
+ end
460
+
461
+ # rubocop:disable Metrics/MethodLength
462
+ def compile_any_order(capture_all = nil)
463
+ rest = capture_rest = nil
464
+ patterns = []
465
+ with_temp_variables do |child, matched|
466
+ tokens_until('>', 'any child') do
467
+ fail_due_to 'ellipsis must be at the end of <>' if rest
468
+ token = tokens.shift
469
+ case token
470
+ when CAPTURED_REST then rest = capture_rest = next_capture
471
+ when REST then rest = true
472
+ else patterns << compile_expr(token)
473
+ end
474
+ end
475
+ [rest ? patterns.size..Float::INFINITY : patterns.size,
476
+ ->(range) { ANY_ORDER_TEMPLATE.result(binding) }]
477
+ end
478
+ end
479
+ # rubocop:enable Metrics/MethodLength
480
+
481
+ def insure_same_captures(enum, what)
482
+ return to_enum __method__, enum, what unless block_given?
483
+
484
+ captures_before = captures_after = nil
485
+ enum.each do
486
+ captures_before ||= @captures
487
+ @captures = captures_before
488
+ yield
489
+ captures_after ||= @captures
490
+ fail_due_to("each #{what} must have same # of captures") if captures_after != @captures
491
+ end
492
+ end
493
+
494
+ def access_unify(name)
495
+ var = @unify[name]
496
+
497
+ if var == :forbidden_unification
498
+ fail_due_to "Wildcard #{name} was first seen in a subset of a" \
499
+ " union and can't be used outside that union"
500
+ end
501
+ var
502
+ end
503
+
504
+ def forbid_unification(*names)
505
+ names.each do |name|
506
+ @unify[name] = :forbidden_unification
507
+ end
508
+ end
509
+
510
+ # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
511
+ def unify_in_union(enum)
512
+ # We need to reset @unify before each branch is processed.
513
+ # Moreover we need to keep track of newly encountered wildcards.
514
+ # Var `new_unify_intersection` will hold those that are encountered
515
+ # in all branches; these are not a problem.
516
+ # Var `partial_unify` will hold those encountered in only a subset
517
+ # of the branches; these can't be used outside of the union.
518
+
519
+ return to_enum __method__, enum unless block_given?
520
+
521
+ new_unify_intersection = nil
522
+ partial_unify = []
523
+ unify_before = @unify.dup
524
+
525
+ result = enum.each do |e|
526
+ @unify = unify_before.dup if new_unify_intersection
527
+ yield e
528
+ new_unify = @unify.keys - unify_before.keys
529
+ if new_unify_intersection.nil?
530
+ # First iteration
531
+ new_unify_intersection = new_unify
532
+ else
533
+ union = new_unify_intersection | new_unify
534
+ new_unify_intersection &= new_unify
535
+ partial_unify |= union - new_unify_intersection
536
+ end
537
+ end
538
+
539
+ # At this point, all members of `new_unify_intersection` can be used
540
+ # for unification outside of the union, but partial_unify may not
541
+
542
+ forbid_unification(*partial_unify)
543
+
544
+ result
545
+ end
546
+ # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
547
+
548
+ def compile_union
549
+ # we need to ensure that each branch of the {} contains the same
550
+ # number of captures (since only one branch of the {} can actually
551
+ # match, the same variables are used to hold the captures for each
552
+ # branch)
553
+ enum = tokens_until('}', 'union')
554
+ enum = unify_in_union(enum)
555
+ terms = insure_same_captures(enum, 'branch of {}')
556
+ .map { compile_expr }
557
+
558
+ "(#{terms.join(' || ')})"
559
+ end
560
+
561
+ def compile_intersect
562
+ tokens_until(']', 'intersection')
563
+ .map { compile_expr }
564
+ .join(' && ')
565
+ end
566
+
567
+ def compile_capture
568
+ "(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})"
569
+ end
570
+
571
+ def compile_negation
572
+ "!(#{compile_expr})"
573
+ end
574
+
575
+ def compile_ascend
576
+ with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent")
577
+ end
578
+
579
+ def compile_descend
580
+ with_temp_variables do |descendant|
581
+ pattern = with_context(compile_expr, descendant,
582
+ use_temp_node: false)
583
+ [
584
+ "RuboCop::AST::NodePattern.descend(#{CUR_ELEMENT}).",
585
+ "any? do |#{descendant}|",
586
+ " #{pattern}",
587
+ 'end'
588
+ ].join("\n")
589
+ end
590
+ end
591
+
592
+ # Known wildcards are considered atoms, see `compile_atom`
593
+ def compile_new_wildcard(name)
594
+ return 'true' if name.empty?
595
+
596
+ n = @unify[name] = "unify_#{name.gsub('-', '__')}"
597
+ # double assign to avoid "assigned but unused variable"
598
+ "(#{n} = #{CUR_ELEMENT}; #{n} = #{n}; true)"
599
+ end
600
+
601
+ def compile_predicate(predicate)
602
+ if predicate.end_with?('(') # is there an arglist?
603
+ args = compile_args
604
+ predicate = predicate[0..-2] # drop the trailing (
605
+ "#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})"
606
+ else
607
+ "#{CUR_ELEMENT}.#{predicate}"
608
+ end
609
+ end
610
+
611
+ def compile_funcall(method)
612
+ # call a method in the context which this pattern-matching
613
+ # code is used in. pass target value as an argument
614
+ method = method[1..-1] # drop the leading #
615
+ if method.end_with?('(') # is there an arglist?
616
+ args = compile_args
617
+ method = method[0..-2] # drop the trailing (
618
+ "#{method}(#{CUR_ELEMENT},#{args.join(',')})"
619
+ else
620
+ "#{method}(#{CUR_ELEMENT})"
621
+ end
622
+ end
623
+
624
+ def compile_nodetype(type)
625
+ "#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?"
626
+ end
627
+
628
+ def compile_args
629
+ tokens_until(')', 'call arguments').map do
630
+ arg = compile_arg
631
+ tokens.shift if tokens.first == ','
632
+ arg
633
+ end
634
+ end
635
+
636
+ def atom_to_expr(atom)
637
+ "#{atom} === #{CUR_ELEMENT}"
638
+ end
639
+
640
+ def expr_to_atom(expr)
641
+ with_temp_variables do |compare|
642
+ in_context = with_context(expr, compare, use_temp_node: false)
643
+ "::RuboCop::AST::NodePattern::Matcher.new{|#{compare}| #{in_context}}"
644
+ end
645
+ end
646
+
647
+ # @return compiled atom (e.g. ":literal" or "SOME_CONST")
648
+ # or nil if not a simple atom (unknown wildcard, other tokens)
649
+ def compile_atom(token)
650
+ case token
651
+ when WILDCARD then access_unify(token[1..-1]) # could be nil
652
+ when LITERAL then token
653
+ when KEYWORD then get_keyword(token[1..-1])
654
+ when CONST then get_const(token[1..-1])
655
+ when PARAM then get_param(token[1..-1])
656
+ when CLOSING then fail_due_to("#{token} in invalid position")
657
+ when nil then fail_due_to('pattern ended prematurely')
658
+ end
659
+ end
660
+
661
+ def compile_arg
662
+ token = tokens.shift
663
+ compile_atom(token) || expr_to_atom(compile_expr(token))
664
+ end
665
+
666
+ def next_capture
667
+ index = @captures
668
+ @captures += 1
669
+ "captures[#{index}]"
670
+ end
671
+
672
+ def get_param(number)
673
+ number = number.empty? ? 1 : Integer(number)
674
+ @params = number if number > @params
675
+ number.zero? ? @root : "param#{number}"
676
+ end
677
+
678
+ def get_keyword(name)
679
+ @keywords << name
680
+ name
681
+ end
682
+
683
+ def get_const(const)
684
+ const # Output the constant exactly as given
685
+ end
686
+
687
+ def emit_yield_capture(when_no_capture = '')
688
+ yield_val = if @captures.zero?
689
+ when_no_capture
690
+ elsif @captures == 1
691
+ 'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710
692
+ else
693
+ '*captures'
694
+ end
695
+ "yield(#{yield_val})"
696
+ end
697
+
698
+ def emit_retval
699
+ if @captures.zero?
700
+ 'true'
701
+ elsif @captures == 1
702
+ 'captures[0]'
703
+ else
704
+ 'captures'
705
+ end
706
+ end
707
+
708
+ def emit_param_list
709
+ (1..@params).map { |n| "param#{n}" }.join(',')
710
+ end
711
+
712
+ def emit_keyword_list(forwarding: false)
713
+ pattern = "%<keyword>s: #{'%<keyword>s' if forwarding}"
714
+ @keywords.map { |k| format(pattern, keyword: k) }.join(',')
715
+ end
716
+
717
+ def emit_params(*first, forwarding: false)
718
+ params = emit_param_list
719
+ keywords = emit_keyword_list(forwarding: forwarding)
720
+ [*first, params, keywords].reject(&:empty?).join(',')
721
+ end
722
+
723
+ def emit_method_code
724
+ <<~RUBY
725
+ return unless #{@match_code}
726
+ block_given? ? #{emit_yield_capture} : (return #{emit_retval})
727
+ RUBY
728
+ end
729
+
730
+ def fail_due_to(message)
731
+ raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}"
732
+ end
733
+
734
+ def with_temp_node(cur_node)
735
+ with_temp_variables do |node|
736
+ yield "(#{node} = #{cur_node})", node
737
+ end
738
+ .gsub("\n", "\n ") # Nicer indent for debugging
739
+ end
740
+
741
+ def with_temp_variables(&block)
742
+ names = block.parameters.map { |_, name| next_temp_variable(name) }
743
+ yield(*names)
744
+ end
745
+
746
+ def next_temp_variable(name)
747
+ "#{name}#{next_temp_value}"
748
+ end
749
+
750
+ def next_temp_value
751
+ @temps += 1
752
+ end
753
+
754
+ def auto_use_temp_node?(code)
755
+ code.match?(MULTIPLE_CUR_PLACEHOLDER)
756
+ end
757
+
758
+ # with_<...>_context methods are used whenever the context,
759
+ # i.e the current node or the current element can be determined.
760
+
761
+ def with_child_context(code, child_index)
762
+ with_context(code, "#{CUR_NODE}.children[#{child_index}]")
763
+ end
764
+
765
+ def with_context(code, cur_node,
766
+ use_temp_node: auto_use_temp_node?(code))
767
+ if use_temp_node
768
+ with_temp_node(cur_node) do |init, temp_var|
769
+ substitute_cur_node(code, temp_var, first_cur_node: init)
770
+ end
771
+ else
772
+ substitute_cur_node(code, cur_node)
773
+ end
774
+ end
775
+
776
+ def with_seq_head_context(code)
777
+ fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD)
778
+
779
+ code.gsub CUR_ELEMENT, "#{CUR_NODE}.type"
780
+ end
781
+
782
+ def substitute_cur_node(code, cur_node, first_cur_node: cur_node)
783
+ iter = 0
784
+ code
785
+ .gsub(CUR_ELEMENT, CUR_NODE)
786
+ .gsub(CUR_NODE) do
787
+ iter += 1
788
+ iter == 1 ? first_cur_node : cur_node
789
+ end
790
+ .gsub(SEQ_HEAD_GUARD, '')
791
+ end
792
+
793
+ def self.tokens(pattern)
794
+ pattern.gsub(COMMENT, '').scan(TOKEN).grep_v(ONLY_SEPARATOR)
795
+ end
796
+
797
+ # This method minimizes the closure for our method
798
+ def wrapping_block(method_name, **defaults)
799
+ proc do |*args, **values|
800
+ send method_name, *args, **defaults, **values
801
+ end
802
+ end
803
+
804
+ def def_helper(base, method_name, **defaults)
805
+ location = caller_locations(3, 1).first
806
+ unless defaults.empty?
807
+ call = :"without_defaults_#{method_name}"
808
+ base.send :define_method, method_name, &wrapping_block(call, **defaults)
809
+ method_name = call
810
+ end
811
+ src = yield method_name
812
+ base.class_eval(src, location.path, location.lineno)
813
+ end
814
+
815
+ def def_node_matcher(base, method_name, **defaults)
816
+ def_helper(base, method_name, **defaults) do |name|
817
+ <<~RUBY
818
+ def #{name}(#{emit_params('node = self')})
819
+ #{emit_method_code}
820
+ end
821
+ RUBY
822
+ end
823
+ end
824
+
825
+ def def_node_search(base, method_name, **defaults)
826
+ def_helper(base, method_name, **defaults) do |name|
827
+ emit_node_search(name)
828
+ end
829
+ end
830
+
831
+ def emit_node_search(method_name)
832
+ if method_name.to_s.end_with?('?')
833
+ on_match = 'return true'
834
+ else
835
+ args = emit_params(":#{method_name}", @root, forwarding: true)
836
+ prelude = "return enum_for(#{args}) unless block_given?\n"
837
+ on_match = emit_yield_capture(@node_var)
838
+ end
839
+ emit_node_search_body(method_name, prelude: prelude, on_match: on_match)
840
+ end
841
+
842
+ def emit_node_search_body(method_name, prelude:, on_match:)
843
+ <<~RUBY
844
+ def #{method_name}(#{emit_params(@root)})
845
+ #{prelude}
846
+ #{@root}.each_node do |#{@node_var}|
847
+ if #{match_code}
848
+ #{on_match}
849
+ end
850
+ end
851
+ nil
852
+ end
853
+ RUBY
854
+ end
855
+ end
856
+ private_constant :Compiler
857
+
858
+ # Helpers for defining methods based on a pattern string
859
+ module Macros
860
+ # Define a method which applies a pattern to an AST node
861
+ #
862
+ # The new method will return nil if the node does not match
863
+ # If the node matches, and a block is provided, the new method will
864
+ # yield to the block (passing any captures as block arguments).
865
+ # If the node matches, and no block is provided, the new method will
866
+ # return the captures, or `true` if there were none.
867
+ def def_node_matcher(method_name, pattern_str, **keyword_defaults)
868
+ Compiler.new(pattern_str, 'node')
869
+ .def_node_matcher(self, method_name, **keyword_defaults)
870
+ end
871
+
872
+ # Define a method which recurses over the descendants of an AST node,
873
+ # checking whether any of them match the provided pattern
874
+ #
875
+ # If the method name ends with '?', the new method will return `true`
876
+ # as soon as it finds a descendant which matches. Otherwise, it will
877
+ # yield all descendants which match.
878
+ def def_node_search(method_name, pattern_str, **keyword_defaults)
879
+ Compiler.new(pattern_str, 'node0', 'node')
880
+ .def_node_search(self, method_name, **keyword_defaults)
881
+ end
882
+ end
883
+
884
+ attr_reader :pattern
885
+
886
+ def initialize(str)
887
+ @pattern = str
888
+ compiler = Compiler.new(str, 'node0')
889
+ src = "def match(#{compiler.emit_params('node0')});" \
890
+ "#{compiler.emit_method_code}end"
891
+ instance_eval(src, __FILE__, __LINE__ + 1)
892
+ end
893
+
894
+ def match(*args, **rest)
895
+ # If we're here, it's because the singleton method has not been defined,
896
+ # either because we've been dup'ed or serialized through YAML
897
+ initialize(pattern)
898
+ if rest.empty?
899
+ match(*args)
900
+ else
901
+ match(*args, **rest)
902
+ end
903
+ end
904
+
905
+ def marshal_load(pattern)
906
+ initialize pattern
907
+ end
908
+
909
+ def marshal_dump
910
+ pattern
911
+ end
912
+
913
+ def ==(other)
914
+ other.is_a?(NodePattern) &&
915
+ Compiler.tokens(other.pattern) == Compiler.tokens(pattern)
916
+ end
917
+ alias eql? ==
918
+
919
+ def to_s
920
+ "#<#{self.class} #{pattern}>"
921
+ end
922
+
923
+ # Yields its argument and any descendants, depth-first.
924
+ #
925
+ def self.descend(element, &block)
926
+ return to_enum(__method__, element) unless block_given?
927
+
928
+ yield element
929
+
930
+ if element.is_a?(::RuboCop::AST::Node)
931
+ element.children.each do |child|
932
+ descend(child, &block)
933
+ end
934
+ end
935
+
936
+ nil
937
+ end
938
+
939
+ # @api private
940
+ class Matcher
941
+ def initialize(&block)
942
+ @block = block
943
+ end
944
+
945
+ def ===(compare)
946
+ @block.call(compare)
947
+ end
948
+ end
949
+ end
950
+ end
951
+ end
952
+ # rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity