rubocop-ast 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest/sha1'
4
+
5
+ module RuboCop
6
+ module AST
7
+ # ProcessedSource contains objects which are generated by Parser
8
+ # and other information such as disabled lines for cops.
9
+ # It also provides a convenient way to access source lines.
10
+ class ProcessedSource
11
+ STRING_SOURCE_NAME = '(string)'
12
+
13
+ attr_reader :path, :buffer, :ast, :comments, :tokens, :diagnostics,
14
+ :parser_error, :raw_source, :ruby_version
15
+
16
+ def self.from_file(path, ruby_version)
17
+ file = File.read(path, mode: 'rb')
18
+ new(file, ruby_version, path)
19
+ end
20
+
21
+ def initialize(source, ruby_version, path = nil)
22
+ # Defaults source encoding to UTF-8, regardless of the encoding it has
23
+ # been read with, which could be non-utf8 depending on the default
24
+ # external encoding.
25
+ source.force_encoding(Encoding::UTF_8) unless source.encoding == Encoding::UTF_8
26
+
27
+ @raw_source = source
28
+ @path = path
29
+ @diagnostics = []
30
+ @ruby_version = ruby_version
31
+ @parser_error = nil
32
+
33
+ parse(source, ruby_version)
34
+ end
35
+
36
+ def ast_with_comments
37
+ return if !ast || !comments
38
+
39
+ @ast_with_comments ||= Parser::Source::Comment.associate(ast, comments)
40
+ end
41
+
42
+ # Returns the source lines, line break characters removed, excluding a
43
+ # possible __END__ and everything that comes after.
44
+ def lines
45
+ @lines ||= begin
46
+ all_lines = @buffer.source_lines
47
+ last_token_line = tokens.any? ? tokens.last.line : all_lines.size
48
+ result = []
49
+ all_lines.each_with_index do |line, ix|
50
+ break if ix >= last_token_line && line == '__END__'
51
+
52
+ result << line
53
+ end
54
+ result
55
+ end
56
+ end
57
+
58
+ def [](*args)
59
+ lines[*args]
60
+ end
61
+
62
+ def valid_syntax?
63
+ return false if @parser_error
64
+
65
+ @diagnostics.none? { |d| %i[error fatal].include?(d.level) }
66
+ end
67
+
68
+ # Raw source checksum for tracking infinite loops.
69
+ def checksum
70
+ Digest::SHA1.hexdigest(@raw_source)
71
+ end
72
+
73
+ def each_comment
74
+ comments.each { |comment| yield comment }
75
+ end
76
+
77
+ def find_comment
78
+ comments.find { |comment| yield comment }
79
+ end
80
+
81
+ def each_token
82
+ tokens.each { |token| yield token }
83
+ end
84
+
85
+ def find_token
86
+ tokens.find { |token| yield token }
87
+ end
88
+
89
+ def file_path
90
+ buffer.name
91
+ end
92
+
93
+ def blank?
94
+ ast.nil?
95
+ end
96
+
97
+ def commented?(source_range)
98
+ comment_lines.include?(source_range.line)
99
+ end
100
+
101
+ def comments_before_line(line)
102
+ comments.select { |c| c.location.line <= line }
103
+ end
104
+
105
+ def start_with?(string)
106
+ return false if self[0].nil?
107
+
108
+ self[0].start_with?(string)
109
+ end
110
+
111
+ def preceding_line(token)
112
+ lines[token.line - 2]
113
+ end
114
+
115
+ def current_line(token)
116
+ lines[token.line - 1]
117
+ end
118
+
119
+ def following_line(token)
120
+ lines[token.line]
121
+ end
122
+
123
+ def line_indentation(line_number)
124
+ lines[line_number - 1]
125
+ .match(/^(\s*)/)[1]
126
+ .to_s
127
+ .length
128
+ end
129
+
130
+ private
131
+
132
+ def comment_lines
133
+ @comment_lines ||= comments.map { |c| c.location.line }
134
+ end
135
+
136
+ def parse(source, ruby_version)
137
+ buffer_name = @path || STRING_SOURCE_NAME
138
+ @buffer = Parser::Source::Buffer.new(buffer_name, 1)
139
+
140
+ begin
141
+ @buffer.source = source
142
+ rescue EncodingError => e
143
+ @parser_error = e
144
+ return
145
+ end
146
+
147
+ @ast, @comments, @tokens = tokenize(create_parser(ruby_version))
148
+ end
149
+
150
+ def tokenize(parser)
151
+ begin
152
+ ast, comments, tokens = parser.tokenize(@buffer)
153
+
154
+ ast.respond_to?(:complete!) && ast.complete!
155
+ rescue Parser::SyntaxError
156
+ # All errors are in diagnostics. No need to handle exception.
157
+ end
158
+
159
+ tokens = tokens.map { |t| Token.from_parser_token(t) } if tokens
160
+
161
+ [ast, comments, tokens]
162
+ end
163
+
164
+ # rubocop:disable Metrics/MethodLength
165
+ def parser_class(ruby_version)
166
+ case ruby_version
167
+ when 2.4
168
+ require 'parser/ruby24'
169
+ Parser::Ruby24
170
+ when 2.5
171
+ require 'parser/ruby25'
172
+ Parser::Ruby25
173
+ when 2.6
174
+ require 'parser/ruby26'
175
+ Parser::Ruby26
176
+ when 2.7
177
+ require 'parser/ruby27'
178
+ Parser::Ruby27
179
+ else
180
+ raise ArgumentError,
181
+ "RuboCop found unknown Ruby version: #{ruby_version.inspect}"
182
+ end
183
+ end
184
+ # rubocop:enable Metrics/MethodLength
185
+
186
+ def create_parser(ruby_version)
187
+ builder = RuboCop::AST::Builder.new
188
+
189
+ parser_class(ruby_version).new(builder).tap do |parser|
190
+ # On JRuby there's a risk that we hang in tokenize() if we
191
+ # don't set the all errors as fatal flag. The problem is caused by a bug
192
+ # in Racc that is discussed in issue #93 of the whitequark/parser
193
+ # project on GitHub.
194
+ parser.diagnostics.all_errors_are_fatal = (RUBY_ENGINE != 'ruby')
195
+ parser.diagnostics.ignore_warnings = false
196
+ parser.diagnostics.consumer = lambda do |diagnostic|
197
+ @diagnostics << diagnostic
198
+ end
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RuboCop
4
+ module AST
5
+ # A basic wrapper around Parser's tokens.
6
+ class Token
7
+ attr_reader :pos, :type, :text
8
+
9
+ def self.from_parser_token(parser_token)
10
+ type, details = parser_token
11
+ text, range = details
12
+ new(range, type, text)
13
+ end
14
+
15
+ def initialize(pos, type, text)
16
+ @pos = pos
17
+ @type = type
18
+ # Parser token "text" may be an Integer
19
+ @text = text.to_s
20
+ end
21
+
22
+ def line
23
+ @pos.line
24
+ end
25
+
26
+ def column
27
+ @pos.column
28
+ end
29
+
30
+ def begin_pos
31
+ @pos.begin_pos
32
+ end
33
+
34
+ def end_pos
35
+ @pos.end_pos
36
+ end
37
+
38
+ def to_s
39
+ "[[#{line}, #{column}], #{type}, #{text.inspect}]"
40
+ end
41
+
42
+ # Checks if there is whitespace after token
43
+ def space_after?
44
+ pos.source_buffer.source.match(/\G\s/, end_pos)
45
+ end
46
+
47
+ # Checks if there is whitespace before token
48
+ def space_before?
49
+ position = begin_pos.zero? ? begin_pos : begin_pos - 1
50
+ pos.source_buffer.source.match(/\G\s/, position)
51
+ end
52
+
53
+ ## Type Predicates
54
+
55
+ def comment?
56
+ type == :tCOMMENT
57
+ end
58
+
59
+ def semicolon?
60
+ type == :tSEMI
61
+ end
62
+
63
+ def left_array_bracket?
64
+ type == :tLBRACK
65
+ end
66
+
67
+ def left_ref_bracket?
68
+ type == :tLBRACK2
69
+ end
70
+
71
+ def left_bracket?
72
+ %i[tLBRACK tLBRACK2].include?(type)
73
+ end
74
+
75
+ def right_bracket?
76
+ type == :tRBRACK
77
+ end
78
+
79
+ def left_brace?
80
+ type == :tLBRACE
81
+ end
82
+
83
+ def left_curly_brace?
84
+ type == :tLCURLY
85
+ end
86
+
87
+ def right_curly_brace?
88
+ type == :tRCURLY
89
+ end
90
+
91
+ def left_parens?
92
+ %i[tLPAREN tLPAREN2].include?(type)
93
+ end
94
+
95
+ def right_parens?
96
+ type == :tRPAREN
97
+ end
98
+
99
+ def comma?
100
+ type == :tCOMMA
101
+ end
102
+
103
+ def rescue_modifier?
104
+ type == :kRESCUE_MOD
105
+ end
106
+
107
+ def end?
108
+ type == :kEND
109
+ end
110
+
111
+ def equal_sign?
112
+ %i[tEQL tOP_ASGN].include?(type)
113
+ end
114
+ end
115
+ end
116
+ end
@@ -3,7 +3,7 @@
3
3
  module RuboCop
4
4
  module AST
5
5
  module Version
6
- STRING = '0.0.2'
6
+ STRING = '0.0.3'
7
7
  end
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubocop-ast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bozhidar Batsov
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-05-12 00:00:00.000000000 Z
13
+ date: 2020-05-15 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: parser
@@ -107,13 +107,12 @@ files:
107
107
  - lib/rubocop/ast/node/when_node.rb
108
108
  - lib/rubocop/ast/node/while_node.rb
109
109
  - lib/rubocop/ast/node/yield_node.rb
110
+ - lib/rubocop/ast/node_pattern.rb
111
+ - lib/rubocop/ast/processed_source.rb
110
112
  - lib/rubocop/ast/sexp.rb
113
+ - lib/rubocop/ast/token.rb
111
114
  - lib/rubocop/ast/traversal.rb
112
115
  - lib/rubocop/ast/version.rb
113
- - lib/rubocop/error.rb
114
- - lib/rubocop/node_pattern.rb
115
- - lib/rubocop/processed_source.rb
116
- - lib/rubocop/token.rb
117
116
  homepage: https://github.com/rubocop-hq/rubocop-ast
118
117
  licenses:
119
118
  - MIT
data/lib/rubocop/error.rb DELETED
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module RuboCop
4
- # An Error exception is different from an Offense with severity 'error'
5
- # When this exception is raised, it means that RuboCop is unable to perform
6
- # a requested action (probably due to misconfiguration) and must stop
7
- # immediately, rather than carrying on
8
- class Error < StandardError; end
9
-
10
- class ValidationError < Error; end
11
-
12
- # A wrapper to display errored location of analyzed file.
13
- class ErrorWithAnalyzedFileLocation < Error
14
- def initialize(cause:, node:, cop:)
15
- @cause = cause
16
- @cop = cop
17
- @location = node.is_a?(RuboCop::AST::Node) ? node.loc : node
18
- end
19
-
20
- attr_reader :cause, :cop
21
-
22
- def line
23
- @location&.line
24
- end
25
-
26
- def column
27
- @location&.column
28
- end
29
-
30
- def message
31
- "cause: #{cause.inspect}"
32
- end
33
- end
34
- end
@@ -1,881 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'delegate'
4
- require 'erb'
5
-
6
- # rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity
7
- module RuboCop
8
- # This class performs a pattern-matching operation on an AST node.
9
- #
10
- # Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then
11
- # pass an AST node to `NodePattern#match`. Alternatively, use one of the class
12
- # macros in `NodePattern::Macros` to define your own pattern-matching method.
13
- #
14
- # If the match fails, `nil` will be returned. If the match succeeds, the
15
- # return value depends on whether a block was provided to `#match`, and
16
- # whether the pattern contained any "captures" (values which are extracted
17
- # from a matching AST.)
18
- #
19
- # - With block: #match yields the captures (if any) and passes the return
20
- # value of the block through.
21
- # - With no block, but one capture: the capture is returned.
22
- # - With no block, but multiple captures: captures are returned as an array.
23
- # - With no block and no captures: #match returns `true`.
24
- #
25
- # ## Pattern string format examples
26
- #
27
- # ':sym' # matches a literal symbol
28
- # '1' # matches a literal integer
29
- # 'nil' # matches a literal nil
30
- # 'send' # matches (send ...)
31
- # '(send)' # matches (send)
32
- # '(send ...)' # matches (send ...)
33
- # '(op-asgn)' # node types with hyphenated names also work
34
- # '{send class}' # matches (send ...) or (class ...)
35
- # '({send class})' # matches (send) or (class)
36
- # '(send const)' # matches (send (const ...))
37
- # '(send _ :new)' # matches (send <anything> :new)
38
- # '(send $_ :new)' # as above, but whatever matches the $_ is captured
39
- # '(send $_ $_)' # you can use as many captures as you want
40
- # '(send !const ...)' # ! negates the next part of the pattern
41
- # '$(send const ...)' # arbitrary matching can be performed on a capture
42
- # '(send _recv _msg)' # wildcards can be named (for readability)
43
- # '(send ... :new)' # you can match against the last children
44
- # '(array <str sym>)' # you can match children in any order. This
45
- # # would match `['x', :y]` as well as `[:y, 'x']
46
- # '(_ <str sym ...>)' # will match if arguments have at least a `str` and
47
- # # a `sym` node, but can have more.
48
- # '(array <$str $_>)' # captures are in the order of the pattern,
49
- # # irrespective of the actual order of the children
50
- # '(array int*)' # will match an array of 0 or more integers
51
- # '(array int ?)' # will match 0 or 1 integer.
52
- # # Note: Space needed to distinguish from int?
53
- # '(array int+)' # will match an array of 1 or more integers
54
- # '(array (int $_)+)' # as above and will capture the numbers in an array
55
- # '(send $...)' # capture all the children as an array
56
- # '(send $... int)' # capture all children but the last as an array
57
- # '(send _x :+ _x)' # unification is performed on named wildcards
58
- # # (like Prolog variables...)
59
- # # (#== is used to see if values unify)
60
- # '(int odd?)' # words which end with a ? are predicate methods,
61
- # # are are called on the target to see if it matches
62
- # # any Ruby method which the matched object supports
63
- # # can be used
64
- # # if a truthy value is returned, the match succeeds
65
- # '(int [!1 !2])' # [] contains multiple patterns, ALL of which must
66
- # # match in that position
67
- # # in other words, while {} is pattern union (logical
68
- # # OR), [] is intersection (logical AND)
69
- # '(send %1 _)' # % stands for a parameter which must be supplied to
70
- # # #match at matching time
71
- # # it will be compared to the corresponding value in
72
- # # the AST using #==
73
- # # a bare '%' is the same as '%1'
74
- # # the number of extra parameters passed to #match
75
- # # must equal the highest % value in the pattern
76
- # # for consistency, %0 is the 'root node' which is
77
- # # passed as the 1st argument to #match, where the
78
- # # matching process starts
79
- # '^^send' # each ^ ascends one level in the AST
80
- # # so this matches against the grandparent node
81
- # '`send' # descends any number of level in the AST
82
- # # so this matches against any descendant node
83
- # '#method' # we call this a 'funcall'; it calls a method in the
84
- # # context where a pattern-matching method is defined
85
- # # if that returns a truthy value, the match succeeds
86
- # 'equal?(%1)' # predicates can be given 1 or more extra args
87
- # '#method(%0, 1)' # funcalls can also be given 1 or more extra args
88
- #
89
- # You can nest arbitrarily deep:
90
- #
91
- # # matches node parsed from 'Const = Class.new' or 'Const = Module.new':
92
- # '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))'
93
- # # matches a node parsed from an 'if', with a '==' comparison,
94
- # # and no 'else' branch:
95
- # '(if (send _ :== _) _ nil?)'
96
- #
97
- # Note that patterns like 'send' are implemented by calling `#send_type?` on
98
- # the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`,
99
- # and so on. Therefore, if you add methods which are named like
100
- # `#prefix_type?` to the AST node class, then 'prefix' will become usable as
101
- # a pattern.
102
- #
103
- # Also note that if you need a "guard clause" to protect against possible nils
104
- # in a certain place in the AST, you can do it like this: `[!nil <pattern>]`
105
- #
106
- # The compiler code is very simple; don't be afraid to read through it!
107
- class NodePattern
108
- # @private
109
- Invalid = Class.new(StandardError)
110
-
111
- # @private
112
- # Builds Ruby code which implements a pattern
113
- class Compiler
114
- SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze
115
- IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze
116
- META = Regexp.union(
117
- %w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?"
118
- ).freeze
119
- NUMBER = /-?\d+(?:\.\d+)?/.freeze
120
- STRING = /".+?"/.freeze
121
- METHOD_NAME = /\#?#{IDENTIFIER}[\!\?]?\(?/.freeze
122
- PARAM_NUMBER = /%\d*/.freeze
123
-
124
- SEPARATORS = /[\s]+/.freeze
125
- TOKENS = Regexp.union(META, PARAM_NUMBER, NUMBER,
126
- METHOD_NAME, SYMBOL, STRING)
127
-
128
- TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze
129
-
130
- NODE = /\A#{IDENTIFIER}\Z/.freeze
131
- PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze
132
- WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze
133
-
134
- FUNCALL = /\A\##{METHOD_NAME}/.freeze
135
- LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze
136
- PARAM = /\A#{PARAM_NUMBER}\Z/.freeze
137
- CLOSING = /\A(?:\)|\}|\])\Z/.freeze
138
-
139
- REST = '...'
140
- CAPTURED_REST = '$...'
141
-
142
- attr_reader :match_code, :tokens, :captures
143
-
144
- SEQ_HEAD_INDEX = -1
145
-
146
- # Placeholders while compiling, see with_..._context methods
147
- CUR_PLACEHOLDER = '@@@cur'
148
- CUR_NODE = "#{CUR_PLACEHOLDER} node@@@"
149
- CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@"
150
- SEQ_HEAD_GUARD = '@@@seq guard head@@@'
151
-
152
- line = __LINE__
153
- ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
154
- <% if capture_rest %>(<%= capture_rest %> = []) && <% end -%>
155
- <% if capture_all %>(<%= capture_all %> = <% end -%>
156
- <%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%>
157
- .each_with_object({}) { |<%= child %>, <%= matched %>|
158
- case
159
- <% patterns.each_with_index do |pattern, i| -%>
160
- when !<%= matched %>[<%= i %>] && <%=
161
- with_context(pattern, child, use_temp_node: false)
162
- %> then <%= matched %>[<%= i %>] = true
163
- <% end -%>
164
- <% if !rest %> else break({})
165
- <% elsif capture_rest %> else <%= capture_rest %> << <%= child %>
166
- <% end -%>
167
- end
168
- }.size == <%= patterns.size -%>
169
- RUBY
170
- ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1]
171
-
172
- line = __LINE__
173
- REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>')
174
- <% if captured %>(<%= accumulate %> = Array.new) && <% end %>
175
- <%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>|
176
- <%= with_context(expr, child, use_temp_node: false) %><% if captured %>&&
177
- <%= accumulate %>.push(<%= captured %>)<% end %>
178
- end <% if captured %>&&
179
- (<%= captured %> = if <%= accumulate %>.empty?
180
- <%= captured %>.map{[]} # Transpose hack won't work for empty case
181
- else
182
- <%= accumulate %>.transpose
183
- end) <% end -%>
184
- RUBY
185
- REPEATED_TEMPLATE.location = [__FILE__, line + 1]
186
-
187
- def initialize(str, node_var = 'node0')
188
- @string = str
189
- @root = node_var
190
-
191
- @temps = 0 # avoid name clashes between temp variables
192
- @captures = 0 # number of captures seen
193
- @unify = {} # named wildcard -> temp variable
194
- @params = 0 # highest % (param) number seen
195
- run(node_var)
196
- end
197
-
198
- def run(node_var)
199
- @tokens = Compiler.tokens(@string)
200
-
201
- @match_code = with_context(compile_expr, node_var, use_temp_node: false)
202
- @match_code.prepend("(captures = Array.new(#{@captures})) && ") \
203
- if @captures.positive?
204
-
205
- fail_due_to('unbalanced pattern') unless tokens.empty?
206
- end
207
-
208
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
209
- def compile_expr(token = tokens.shift)
210
- # read a single pattern-matching expression from the token stream,
211
- # return Ruby code which performs the corresponding matching operation
212
- #
213
- # the 'pattern-matching' expression may be a composite which
214
- # contains an arbitrary number of sub-expressions, but that composite
215
- # must all have precedence higher or equal to that of `&&`
216
- #
217
- # Expressions may use placeholders like:
218
- # CUR_NODE: Ruby code that evaluates to an AST node
219
- # CUR_ELEMENT: Either the node or the type if in first element of
220
- # a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...")
221
- case token
222
- when '(' then compile_seq
223
- when '{' then compile_union
224
- when '[' then compile_intersect
225
- when '!' then compile_negation
226
- when '$' then compile_capture
227
- when '^' then compile_ascend
228
- when '`' then compile_descend
229
- when WILDCARD then compile_wildcard(token[1..-1])
230
- when FUNCALL then compile_funcall(token)
231
- when LITERAL then compile_literal(token)
232
- when PREDICATE then compile_predicate(token)
233
- when NODE then compile_nodetype(token)
234
- when PARAM then compile_param(token[1..-1])
235
- when CLOSING then fail_due_to("#{token} in invalid position")
236
- when nil then fail_due_to('pattern ended prematurely')
237
- else fail_due_to("invalid token #{token.inspect}")
238
- end
239
- end
240
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
241
-
242
- def tokens_until(stop, what)
243
- return to_enum __method__, stop, what unless block_given?
244
-
245
- fail_due_to("empty #{what}") if tokens.first == stop && what
246
- yield until tokens.first == stop
247
- tokens.shift
248
- end
249
-
250
- def compile_seq
251
- terms = tokens_until(')', 'sequence').map { variadic_seq_term }
252
- Sequence.new(self, *terms).compile
253
- end
254
-
255
- def compile_guard_clause
256
- "#{CUR_NODE}.is_a?(RuboCop::AST::Node)"
257
- end
258
-
259
- def variadic_seq_term
260
- token = tokens.shift
261
- case token
262
- when CAPTURED_REST then compile_captured_ellipsis
263
- when REST then compile_ellipsis
264
- when '$<' then compile_any_order(next_capture)
265
- when '<' then compile_any_order
266
- else compile_repeated_expr(token)
267
- end
268
- end
269
-
270
- def compile_repeated_expr(token)
271
- before = @captures
272
- expr = compile_expr(token)
273
- min, max = parse_repetition_token
274
- return [1, expr] if min.nil?
275
-
276
- if @captures != before
277
- captured = "captures[#{before}...#{@captures}]"
278
- accumulate = next_temp_variable(:accumulate)
279
- end
280
- arity = min..max || Float::INFINITY
281
-
282
- [arity, repeated_generator(expr, captured, accumulate)]
283
- end
284
-
285
- def repeated_generator(expr, captured, accumulate)
286
- with_temp_variables do |child|
287
- lambda do |range|
288
- fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX
289
- REPEATED_TEMPLATE.result(binding)
290
- end
291
- end
292
- end
293
-
294
- def parse_repetition_token
295
- case tokens.first
296
- when '*' then min = 0
297
- when '+' then min = 1
298
- when '?' then min = 0
299
- max = 1
300
- else return
301
- end
302
- tokens.shift
303
- [min, max]
304
- end
305
-
306
- # @private
307
- # Builds Ruby code for a sequence
308
- # (head *first_terms variadic_term *last_terms)
309
- class Sequence < SimpleDelegator
310
- def initialize(compiler, *arity_term_list)
311
- @arities, @terms = arity_term_list.transpose
312
-
313
- super(compiler)
314
- @variadic_index = @arities.find_index { |a| a.is_a?(Range) }
315
- fail_due_to 'multiple variable patterns in same sequence' \
316
- if @variadic_index && !@arities.one? { |a| a.is_a?(Range) }
317
- end
318
-
319
- def compile
320
- [
321
- compile_guard_clause,
322
- compile_child_nb_guard,
323
- compile_seq_head,
324
- *compile_first_terms,
325
- compile_variadic_term,
326
- *compile_last_terms
327
- ].compact.join(" &&\n") << SEQ_HEAD_GUARD
328
- end
329
-
330
- private
331
-
332
- def first_terms_arity
333
- first_terms_range { |r| @arities[r].inject(0, :+) } || 0
334
- end
335
-
336
- def last_terms_arity
337
- last_terms_range { |r| @arities[r].inject(0, :+) } || 0
338
- end
339
-
340
- def variadic_term_min_arity
341
- @variadic_index ? @arities[@variadic_index].begin : 0
342
- end
343
-
344
- def first_terms_range
345
- yield 1..(@variadic_index || @terms.size) - 1 if seq_head?
346
- end
347
-
348
- def last_terms_range
349
- yield @variadic_index + 1...@terms.size if @variadic_index
350
- end
351
-
352
- def seq_head?
353
- @variadic_index != 0
354
- end
355
-
356
- def compile_child_nb_guard
357
- fixed = first_terms_arity + last_terms_arity
358
- min = fixed + variadic_term_min_arity
359
- op = if @variadic_index
360
- max_variadic = @arities[@variadic_index].end
361
- if max_variadic != Float::INFINITY
362
- range = min..fixed + max_variadic
363
- return "(#{range}).cover?(#{CUR_NODE}.children.size)"
364
- end
365
- '>='
366
- else
367
- '=='
368
- end
369
- "#{CUR_NODE}.children.size #{op} #{min}"
370
- end
371
-
372
- def term(index, range)
373
- t = @terms[index]
374
- if t.respond_to? :call
375
- t.call(range)
376
- else
377
- with_child_context(t, range.begin)
378
- end
379
- end
380
-
381
- def compile_seq_head
382
- return unless seq_head?
383
-
384
- fail_due_to 'sequences cannot start with <' \
385
- if @terms[0].respond_to? :call
386
-
387
- with_seq_head_context(@terms[0])
388
- end
389
-
390
- def compile_first_terms
391
- first_terms_range { |range| compile_terms(range, 0) }
392
- end
393
-
394
- def compile_last_terms
395
- last_terms_range { |r| compile_terms(r, -last_terms_arity) }
396
- end
397
-
398
- def compile_terms(index_range, start)
399
- index_range.map do |i|
400
- current = start
401
- start += @arities.fetch(i)
402
- term(i, current..start - 1)
403
- end
404
- end
405
-
406
- def compile_variadic_term
407
- variadic_arity { |arity| term(@variadic_index, arity) }
408
- end
409
-
410
- def variadic_arity
411
- return unless @variadic_index
412
-
413
- first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX
414
- yield first..-last_terms_arity - 1
415
- end
416
- end
417
- private_constant :Sequence
418
-
419
- def compile_captured_ellipsis
420
- capture = next_capture
421
- block = lambda { |range|
422
- # Consider ($...) like (_ $...):
423
- range = 0..range.end if range.begin == SEQ_HEAD_INDEX
424
- "(#{capture} = #{CUR_NODE}.children[#{range}])"
425
- }
426
- [0..Float::INFINITY, block]
427
- end
428
-
429
- def compile_ellipsis
430
- [0..Float::INFINITY, 'true']
431
- end
432
-
433
- # rubocop:disable Metrics/AbcSize
434
- # rubocop:disable Metrics/MethodLength
435
- def compile_any_order(capture_all = nil)
436
- rest = capture_rest = nil
437
- patterns = []
438
- with_temp_variables do |child, matched|
439
- tokens_until('>', 'any child') do
440
- fail_due_to 'ellipsis must be at the end of <>' if rest
441
- token = tokens.shift
442
- case token
443
- when CAPTURED_REST then rest = capture_rest = next_capture
444
- when REST then rest = true
445
- else patterns << compile_expr(token)
446
- end
447
- end
448
- [rest ? patterns.size..Float::INFINITY : patterns.size,
449
- ->(range) { ANY_ORDER_TEMPLATE.result(binding) }]
450
- end
451
- end
452
- # rubocop:enable Metrics/MethodLength
453
- # rubocop:enable Metrics/AbcSize
454
-
455
- def insure_same_captures(enum, what)
456
- return to_enum __method__, enum, what unless block_given?
457
-
458
- captures_before = captures_after = nil
459
- enum.each do
460
- captures_before ||= @captures
461
- @captures = captures_before
462
- yield
463
- captures_after ||= @captures
464
- fail_due_to("each #{what} must have same # of captures") if captures_after != @captures
465
- end
466
- end
467
-
468
- def access_unify(name)
469
- var = @unify[name]
470
-
471
- if var == :forbidden_unification
472
- fail_due_to "Wildcard #{name} was first seen in a subset of a" \
473
- " union and can't be used outside that union"
474
- end
475
- var
476
- end
477
-
478
- def forbid_unification(*names)
479
- names.each do |name|
480
- @unify[name] = :forbidden_unification
481
- end
482
- end
483
-
484
- # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
485
- def unify_in_union(enum)
486
- # We need to reset @unify before each branch is processed.
487
- # Moreover we need to keep track of newly encountered wildcards.
488
- # Var `new_unify_intersection` will hold those that are encountered
489
- # in all branches; these are not a problem.
490
- # Var `partial_unify` will hold those encountered in only a subset
491
- # of the branches; these can't be used outside of the union.
492
-
493
- return to_enum __method__, enum unless block_given?
494
-
495
- new_unify_intersection = nil
496
- partial_unify = []
497
- unify_before = @unify.dup
498
-
499
- result = enum.each do |e|
500
- @unify = unify_before.dup if new_unify_intersection
501
- yield e
502
- new_unify = @unify.keys - unify_before.keys
503
- if new_unify_intersection.nil?
504
- # First iteration
505
- new_unify_intersection = new_unify
506
- else
507
- union = new_unify_intersection | new_unify
508
- new_unify_intersection &= new_unify
509
- partial_unify |= union - new_unify_intersection
510
- end
511
- end
512
-
513
- # At this point, all members of `new_unify_intersection` can be used
514
- # for unification outside of the union, but partial_unify may not
515
-
516
- forbid_unification(*partial_unify)
517
-
518
- result
519
- end
520
- # rubocop:enable Metrics/MethodLength, Metrics/AbcSize
521
-
522
- def compile_union
523
- # we need to ensure that each branch of the {} contains the same
524
- # number of captures (since only one branch of the {} can actually
525
- # match, the same variables are used to hold the captures for each
526
- # branch)
527
- enum = tokens_until('}', 'union')
528
- enum = unify_in_union(enum)
529
- terms = insure_same_captures(enum, 'branch of {}')
530
- .map { compile_expr }
531
-
532
- "(#{terms.join(' || ')})"
533
- end
534
-
535
- def compile_intersect
536
- tokens_until(']', 'intersection')
537
- .map { compile_expr }
538
- .join(' && ')
539
- end
540
-
541
- def compile_capture
542
- "(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})"
543
- end
544
-
545
- def compile_negation
546
- "!(#{compile_expr})"
547
- end
548
-
549
- def compile_ascend
550
- with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent")
551
- end
552
-
553
- def compile_descend
554
- with_temp_variables do |descendant|
555
- pattern = with_context(compile_expr, descendant,
556
- use_temp_node: false)
557
- [
558
- "RuboCop::NodePattern.descend(#{CUR_ELEMENT}).",
559
- "any? do |#{descendant}|",
560
- " #{pattern}",
561
- 'end'
562
- ].join("\n")
563
- end
564
- end
565
-
566
- def compile_wildcard(name)
567
- if name.empty?
568
- 'true'
569
- elsif @unify.key?(name)
570
- # we have already seen a wildcard with this name before
571
- # so the value it matched the first time will already be stored
572
- # in a temp. check if this value matches the one stored in the temp
573
- "#{CUR_ELEMENT} == #{access_unify(name)}"
574
- else
575
- n = @unify[name] = "unify_#{name.gsub('-', '__')}"
576
- # double assign to avoid "assigned but unused variable"
577
- "(#{n} = #{CUR_ELEMENT}; " \
578
- "#{n} = #{n}; true)"
579
- end
580
- end
581
-
582
- def compile_literal(literal)
583
- "#{CUR_ELEMENT} == #{literal}"
584
- end
585
-
586
- def compile_predicate(predicate)
587
- if predicate.end_with?('(') # is there an arglist?
588
- args = compile_args(tokens)
589
- predicate = predicate[0..-2] # drop the trailing (
590
- "#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})"
591
- else
592
- "#{CUR_ELEMENT}.#{predicate}"
593
- end
594
- end
595
-
596
- def compile_funcall(method)
597
- # call a method in the context which this pattern-matching
598
- # code is used in. pass target value as an argument
599
- method = method[1..-1] # drop the leading #
600
- if method.end_with?('(') # is there an arglist?
601
- args = compile_args(tokens)
602
- method = method[0..-2] # drop the trailing (
603
- "#{method}(#{CUR_ELEMENT},#{args.join(',')})"
604
- else
605
- "#{method}(#{CUR_ELEMENT})"
606
- end
607
- end
608
-
609
- def compile_nodetype(type)
610
- "#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?"
611
- end
612
-
613
- def compile_param(number)
614
- "#{CUR_ELEMENT} == #{get_param(number)}"
615
- end
616
-
617
- def compile_args(tokens)
618
- index = tokens.find_index { |token| token == ')' }
619
-
620
- tokens.slice!(0..index).each_with_object([]) do |token, args|
621
- next if [')', ','].include?(token)
622
-
623
- args << compile_arg(token)
624
- end
625
- end
626
-
627
- def compile_arg(token)
628
- case token
629
- when WILDCARD then
630
- name = token[1..-1]
631
- access_unify(name) || fail_due_to('invalid in arglist: ' + token)
632
- when LITERAL then token
633
- when PARAM then get_param(token[1..-1])
634
- when CLOSING then fail_due_to("#{token} in invalid position")
635
- when nil then fail_due_to('pattern ended prematurely')
636
- else fail_due_to("invalid token in arglist: #{token.inspect}")
637
- end
638
- end
639
-
640
- def next_capture
641
- index = @captures
642
- @captures += 1
643
- "captures[#{index}]"
644
- end
645
-
646
- def get_param(number)
647
- number = number.empty? ? 1 : Integer(number)
648
- @params = number if number > @params
649
- number.zero? ? @root : "param#{number}"
650
- end
651
-
652
- def emit_yield_capture(when_no_capture = '')
653
- yield_val = if @captures.zero?
654
- when_no_capture
655
- elsif @captures == 1
656
- 'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710
657
- else
658
- '*captures'
659
- end
660
- "yield(#{yield_val})"
661
- end
662
-
663
- def emit_retval
664
- if @captures.zero?
665
- 'true'
666
- elsif @captures == 1
667
- 'captures[0]'
668
- else
669
- 'captures'
670
- end
671
- end
672
-
673
- def emit_param_list
674
- (1..@params).map { |n| "param#{n}" }.join(',')
675
- end
676
-
677
- def emit_trailing_params
678
- params = emit_param_list
679
- params.empty? ? '' : ",#{params}"
680
- end
681
-
682
- def emit_method_code
683
- <<~RUBY
684
- return unless #{@match_code}
685
- block_given? ? #{emit_yield_capture} : (return #{emit_retval})
686
- RUBY
687
- end
688
-
689
- def fail_due_to(message)
690
- raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}"
691
- end
692
-
693
- def with_temp_node(cur_node)
694
- with_temp_variables do |node|
695
- yield "(#{node} = #{cur_node})", node
696
- end
697
- .gsub("\n", "\n ") # Nicer indent for debugging
698
- end
699
-
700
- def with_temp_variables(&block)
701
- names = block.parameters.map { |_, name| next_temp_variable(name) }
702
- yield(*names)
703
- end
704
-
705
- def next_temp_variable(name)
706
- "#{name}#{next_temp_value}"
707
- end
708
-
709
- def next_temp_value
710
- @temps += 1
711
- end
712
-
713
- def auto_use_temp_node?(code)
714
- code.scan(CUR_PLACEHOLDER).count > 1
715
- end
716
-
717
- # with_<...>_context methods are used whenever the context,
718
- # i.e the current node or the current element can be determined.
719
-
720
- def with_child_context(code, child_index)
721
- with_context(code, "#{CUR_NODE}.children[#{child_index}]")
722
- end
723
-
724
- def with_context(code, cur_node,
725
- use_temp_node: auto_use_temp_node?(code))
726
- if use_temp_node
727
- with_temp_node(cur_node) do |init, temp_var|
728
- substitute_cur_node(code, temp_var, first_cur_node: init)
729
- end
730
- else
731
- substitute_cur_node(code, cur_node)
732
- end
733
- end
734
-
735
- def with_seq_head_context(code)
736
- fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD)
737
-
738
- code.gsub CUR_ELEMENT, "#{CUR_NODE}.type"
739
- end
740
-
741
- def substitute_cur_node(code, cur_node, first_cur_node: cur_node)
742
- iter = 0
743
- code
744
- .gsub(CUR_ELEMENT, CUR_NODE)
745
- .gsub(CUR_NODE) do
746
- iter += 1
747
- iter == 1 ? first_cur_node : cur_node
748
- end
749
- .gsub(SEQ_HEAD_GUARD, '')
750
- end
751
-
752
- def self.tokens(pattern)
753
- pattern.scan(TOKEN).reject { |token| token =~ /\A#{SEPARATORS}\Z/ }
754
- end
755
- end
756
- private_constant :Compiler
757
-
758
- # Helpers for defining methods based on a pattern string
759
- module Macros
760
- # Define a method which applies a pattern to an AST node
761
- #
762
- # The new method will return nil if the node does not match
763
- # If the node matches, and a block is provided, the new method will
764
- # yield to the block (passing any captures as block arguments).
765
- # If the node matches, and no block is provided, the new method will
766
- # return the captures, or `true` if there were none.
767
- def def_node_matcher(method_name, pattern_str)
768
- compiler = Compiler.new(pattern_str, 'node')
769
- src = "def #{method_name}(node = self" \
770
- "#{compiler.emit_trailing_params});" \
771
- "#{compiler.emit_method_code};end"
772
-
773
- location = caller_locations(1, 1).first
774
- class_eval(src, location.path, location.lineno)
775
- end
776
-
777
- # Define a method which recurses over the descendants of an AST node,
778
- # checking whether any of them match the provided pattern
779
- #
780
- # If the method name ends with '?', the new method will return `true`
781
- # as soon as it finds a descendant which matches. Otherwise, it will
782
- # yield all descendants which match.
783
- def def_node_search(method_name, pattern_str)
784
- compiler = Compiler.new(pattern_str, 'node')
785
- called_from = caller(1..1).first.split(':')
786
-
787
- if method_name.to_s.end_with?('?')
788
- node_search_first(method_name, compiler, called_from)
789
- else
790
- node_search_all(method_name, compiler, called_from)
791
- end
792
- end
793
-
794
- def node_search_first(method_name, compiler, called_from)
795
- node_search(method_name, compiler, 'return true', '', called_from)
796
- end
797
-
798
- def node_search_all(method_name, compiler, called_from)
799
- yield_code = compiler.emit_yield_capture('node')
800
- prelude = "return enum_for(:#{method_name}, node0" \
801
- "#{compiler.emit_trailing_params}) unless block_given?"
802
-
803
- node_search(method_name, compiler, yield_code, prelude, called_from)
804
- end
805
-
806
- def node_search(method_name, compiler, on_match, prelude, called_from)
807
- src = node_search_body(method_name, compiler.emit_trailing_params,
808
- prelude, compiler.match_code, on_match)
809
- filename, lineno = *called_from
810
- class_eval(src, filename, lineno.to_i)
811
- end
812
-
813
- def node_search_body(method_name, trailing_params, prelude, match_code,
814
- on_match)
815
- <<~RUBY
816
- def #{method_name}(node0#{trailing_params})
817
- #{prelude}
818
- node0.each_node do |node|
819
- if #{match_code}
820
- #{on_match}
821
- end
822
- end
823
- nil
824
- end
825
- RUBY
826
- end
827
- end
828
-
829
- attr_reader :pattern
830
-
831
- def initialize(str)
832
- @pattern = str
833
- compiler = Compiler.new(str)
834
- src = "def match(node0#{compiler.emit_trailing_params});" \
835
- "#{compiler.emit_method_code}end"
836
- instance_eval(src, __FILE__, __LINE__ + 1)
837
- end
838
-
839
- def match(*args)
840
- # If we're here, it's because the singleton method has not been defined,
841
- # either because we've been dup'ed or serialized through YAML
842
- initialize(pattern)
843
- match(*args)
844
- end
845
-
846
- def marshal_load(pattern)
847
- initialize pattern
848
- end
849
-
850
- def marshal_dump
851
- pattern
852
- end
853
-
854
- def ==(other)
855
- other.is_a?(NodePattern) &&
856
- Compiler.tokens(other.pattern) == Compiler.tokens(pattern)
857
- end
858
- alias eql? ==
859
-
860
- def to_s
861
- "#<#{self.class} #{pattern}>"
862
- end
863
-
864
- # Yields its argument and any descendants, depth-first.
865
- #
866
- def self.descend(element, &block)
867
- return to_enum(__method__, element) unless block_given?
868
-
869
- yield element
870
-
871
- if element.is_a?(::RuboCop::AST::Node)
872
- element.children.each do |child|
873
- descend(child, &block)
874
- end
875
- end
876
-
877
- nil
878
- end
879
- end
880
- end
881
- # rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity