reference_extractor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,304 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "parser/ast/node"
5
+
6
+ module ReferenceExtractor
7
+ module Internal
8
+ # Convenience methods for working with Parser::AST::Node nodes.
9
+ module NodeHelpers
10
+ class TypeError < ArgumentError; end
11
+
12
+ class << self
13
+ def class_or_module_name(class_or_module_node)
14
+ case type_of(class_or_module_node)
15
+ when CLASS, MODULE
16
+ # (class (const nil :Foo) (const nil :Bar) (nil))
17
+ # "class Foo < Bar; end"
18
+ # (module (const nil :Foo) (nil))
19
+ # "module Foo; end"
20
+ identifier = class_or_module_node.children[0]
21
+ constant_name(identifier)
22
+ else
23
+ raise TypeError
24
+ end
25
+ end
26
+
27
+ def constant_name(constant_node)
28
+ case type_of(constant_node)
29
+ when CONSTANT_ROOT_NAMESPACE
30
+ ""
31
+ when CONSTANT, CONSTANT_ASSIGNMENT, SELF
32
+ # (const nil :Foo)
33
+ # "Foo"
34
+ # (const (cbase) :Foo)
35
+ # "::Foo"
36
+ # (const (lvar :a) :Foo)
37
+ # "a::Foo"
38
+ # (casgn nil :Foo (int 1))
39
+ # "Foo = 1"
40
+ # (casgn (cbase) :Foo (int 1))
41
+ # "::Foo = 1"
42
+ # (casgn (lvar :a) :Foo (int 1))
43
+ # "a::Foo = 1"
44
+ # (casgn (self) :Foo (int 1))
45
+ # "self::Foo = 1"
46
+ namespace, name = constant_node.children
47
+
48
+ if namespace
49
+ [constant_name(namespace), name].join("::")
50
+ else
51
+ name.to_s
52
+ end
53
+ else
54
+ raise TypeError
55
+ end
56
+ end
57
+
58
+ def each_child(node, &block)
59
+ if block
60
+ node.children.each do |child|
61
+ yield(child) if child.is_a?(Parser::AST::Node)
62
+ end
63
+ else
64
+ enum_for(:each_child, node)
65
+ end
66
+ end
67
+
68
+ def enclosing_namespace_path(starting_node, ancestors:)
69
+ ancestors.select { |n| [CLASS, MODULE].include?(type_of(n)) }
70
+ .each_with_object([]) do |node, namespace|
71
+ # when evaluating `class Child < Parent`, the const node for `Parent` is a child of the class
72
+ # node, so it'll be an ancestor, but `Parent` is not evaluated in the namespace of `Child`, so
73
+ # we need to skip it here
74
+ next if type_of(node) == CLASS && parent_class(node) == starting_node
75
+
76
+ namespace.prepend(class_or_module_name(node))
77
+ end
78
+ end
79
+
80
+ def literal_value(string_or_symbol_node)
81
+ case type_of(string_or_symbol_node)
82
+ when STRING, SYMBOL
83
+ # (str "foo")
84
+ # "'foo'"
85
+ # (sym :foo)
86
+ # ":foo"
87
+ string_or_symbol_node.children[0]
88
+ else
89
+ raise TypeError
90
+ end
91
+ end
92
+
93
+ def location(node)
94
+ location = node.location
95
+ Node::Location.new(location.line, location.column)
96
+ end
97
+
98
+ def constant?(node)
99
+ type_of(node) == CONSTANT
100
+ end
101
+
102
+ def constant_assignment?(node)
103
+ type_of(node) == CONSTANT_ASSIGNMENT
104
+ end
105
+
106
+ def class?(node)
107
+ type_of(node) == CLASS
108
+ end
109
+
110
+ def method_call?(node)
111
+ type_of(node) == METHOD_CALL
112
+ end
113
+
114
+ def hash?(node)
115
+ type_of(node) == HASH
116
+ end
117
+
118
+ def string?(node)
119
+ type_of(node) == STRING
120
+ end
121
+
122
+ def symbol?(node)
123
+ type_of(node) == SYMBOL
124
+ end
125
+
126
+ def method_arguments(method_call_node)
127
+ raise TypeError unless method_call?(method_call_node)
128
+
129
+ # (send (lvar :foo) :bar (int 1))
130
+ # "foo.bar(1)"
131
+ method_call_node.children.slice(2..-1)
132
+ end
133
+
134
+ def method_name(method_call_node)
135
+ raise TypeError unless method_call?(method_call_node)
136
+
137
+ # (send (lvar :foo) :bar (int 1))
138
+ # "foo.bar(1)"
139
+ method_call_node.children[1]
140
+ end
141
+
142
+ def module_name_from_definition(node)
143
+ case type_of(node)
144
+ when CLASS, MODULE
145
+ # "class My::Class; end"
146
+ # "module My::Module; end"
147
+ class_or_module_name(node)
148
+ when CONSTANT_ASSIGNMENT
149
+ # "My::Class = ..."
150
+ # "My::Module = ..."
151
+ rvalue = node.children.last
152
+
153
+ case type_of(rvalue)
154
+ when METHOD_CALL
155
+ # "Class.new"
156
+ # "Module.new"
157
+ constant_name(node) if module_creation?(rvalue)
158
+ when BLOCK
159
+ # "Class.new do end"
160
+ # "Module.new do end"
161
+ constant_name(node) if module_creation?(method_call_node(rvalue))
162
+ end
163
+ end
164
+ end
165
+
166
+ def name_location(node)
167
+ location = node.location
168
+
169
+ if location.respond_to?(:name)
170
+ name = location.name
171
+ Node::Location.new(name.line, name.column)
172
+ end
173
+ end
174
+
175
+ def parent_class(class_node)
176
+ raise TypeError unless type_of(class_node) == CLASS
177
+
178
+ # (class (const nil :Foo) (const nil :Bar) (nil))
179
+ # "class Foo < Bar; end"
180
+ class_node.children[1]
181
+ end
182
+
183
+ def parent_module_name(ancestors:)
184
+ definitions = ancestors
185
+ .select { |n| [CLASS, MODULE, CONSTANT_ASSIGNMENT, BLOCK].include?(type_of(n)) }
186
+
187
+ names = definitions.map do |definition|
188
+ name_part_from_definition(definition)
189
+ end.compact
190
+
191
+ names.empty? ? "Object" : names.reverse.join("::")
192
+ end
193
+
194
+ def value_from_hash(hash_node, key)
195
+ raise TypeError unless hash?(hash_node)
196
+
197
+ pair = hash_pairs(hash_node).detect { |pair_node| literal_value(hash_pair_key(pair_node)) == key }
198
+ hash_pair_value(pair) if pair
199
+ end
200
+
201
+ private
202
+
203
+ BLOCK = :block
204
+ CLASS = :class
205
+ CONSTANT = :const
206
+ CONSTANT_ASSIGNMENT = :casgn
207
+ CONSTANT_ROOT_NAMESPACE = :cbase
208
+ HASH = :hash
209
+ HASH_PAIR = :pair
210
+ METHOD_CALL = :send
211
+ MODULE = :module
212
+ SELF = :self
213
+ STRING = :str
214
+ SYMBOL = :sym
215
+
216
+ private_constant(
217
+ :BLOCK, :CLASS, :CONSTANT, :CONSTANT_ASSIGNMENT, :CONSTANT_ROOT_NAMESPACE, :HASH, :HASH_PAIR, :METHOD_CALL,
218
+ :MODULE, :SELF, :STRING, :SYMBOL
219
+ )
220
+
221
+ def type_of(node)
222
+ node.type
223
+ end
224
+
225
+ def hash_pair_key(hash_pair_node)
226
+ raise TypeError unless type_of(hash_pair_node) == HASH_PAIR
227
+
228
+ # (pair (int 1) (int 2))
229
+ # "1 => 2"
230
+ # (pair (sym :answer) (int 42))
231
+ # "answer: 42"
232
+ hash_pair_node.children[0]
233
+ end
234
+
235
+ def hash_pair_value(hash_pair_node)
236
+ raise TypeError unless type_of(hash_pair_node) == HASH_PAIR
237
+
238
+ # (pair (int 1) (int 2))
239
+ # "1 => 2"
240
+ # (pair (sym :answer) (int 42))
241
+ # "answer: 42"
242
+ hash_pair_node.children[1]
243
+ end
244
+
245
+ def hash_pairs(hash_node)
246
+ raise TypeError unless hash?(hash_node)
247
+
248
+ # (hash (pair (int 1) (int 2)) (pair (int 3) (int 4)))
249
+ # "{1 => 2, 3 => 4}"
250
+ hash_node.children.select { |n| type_of(n) == HASH_PAIR }
251
+ end
252
+
253
+ def method_call_node(block_node)
254
+ raise TypeError unless type_of(block_node) == BLOCK
255
+
256
+ # (block (send (lvar :foo) :bar) (args) (int 42))
257
+ # "foo.bar do 42 end"
258
+ block_node.children[0]
259
+ end
260
+
261
+ def module_creation?(node)
262
+ # "Class.new"
263
+ # "Module.new"
264
+ method_call?(node) &&
265
+ dynamic_class_creation?(receiver(node)) &&
266
+ method_name(node) == :new
267
+ end
268
+
269
+ def dynamic_class_creation?(node)
270
+ !!node &&
271
+ constant?(node) &&
272
+ ["Class", "Module"].include?(constant_name(node))
273
+ end
274
+
275
+ def name_from_block_definition(node)
276
+ if method_name(method_call_node(node)) == :class_eval
277
+ receiver = receiver(node)
278
+ constant_name(receiver) if receiver && constant?(receiver)
279
+ end
280
+ end
281
+
282
+ def name_part_from_definition(node)
283
+ case type_of(node)
284
+ when CLASS, MODULE, CONSTANT_ASSIGNMENT
285
+ module_name_from_definition(node)
286
+ when BLOCK
287
+ name_from_block_definition(node)
288
+ end
289
+ end
290
+
291
+ def receiver(method_call_or_block_node)
292
+ case type_of(method_call_or_block_node)
293
+ when METHOD_CALL
294
+ method_call_or_block_node.children[0]
295
+ when BLOCK
296
+ receiver(method_call_node(method_call_or_block_node))
297
+ else
298
+ raise TypeError
299
+ end
300
+ end
301
+ end
302
+ end
303
+ end
304
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ast/node"
4
+
5
+ module ReferenceExtractor
6
+ module Internal
7
+ # A collection of constant definitions parsed from an Abstract Syntax Tree (AST).
8
+ class ParsedConstantDefinitions
9
+ class << self
10
+ # What fully qualified constants can this constant refer to in this context?
11
+ def reference_qualifications(constant_name, namespace_path:)
12
+ return [constant_name] if constant_name.start_with?("::")
13
+
14
+ resolved_constant_name = "::#{constant_name}"
15
+
16
+ possible_namespaces = namespace_path.each_with_object([""]) do |current, acc|
17
+ acc << "#{acc.last}::#{current}" if current
18
+ end
19
+
20
+ possible_namespaces.map { |namespace| namespace + resolved_constant_name }
21
+ end
22
+ end
23
+
24
+ def initialize(root_node:)
25
+ @local_definitions = {}
26
+
27
+ collect_local_definitions_from_root(root_node) if root_node
28
+ end
29
+
30
+ def local_reference?(constant_name, location: nil, namespace_path: [])
31
+ qualifications = self.class.reference_qualifications(constant_name, namespace_path: namespace_path)
32
+
33
+ qualifications.any? do |name|
34
+ @local_definitions[name] &&
35
+ @local_definitions[name] != location
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def collect_local_definitions_from_root(node, current_namespace_path = [])
42
+ if NodeHelpers.constant_assignment?(node)
43
+ add_definition(NodeHelpers.constant_name(node), current_namespace_path, NodeHelpers.name_location(node))
44
+ elsif NodeHelpers.module_name_from_definition(node)
45
+ # handle compact constant nesting (e.g. "module Sales::Order")
46
+ tempnode = node
47
+ while (tempnode = NodeHelpers.each_child(tempnode).find { |node| NodeHelpers.constant?(node) })
48
+ add_definition(NodeHelpers.constant_name(tempnode), current_namespace_path,
49
+ NodeHelpers.name_location(tempnode))
50
+ end
51
+
52
+ current_namespace_path += NodeHelpers.class_or_module_name(node).split("::")
53
+ end
54
+
55
+ NodeHelpers.each_child(node) { |child| collect_local_definitions_from_root(child, current_namespace_path) }
56
+ end
57
+
58
+ def add_definition(constant_name, current_namespace_path, location)
59
+ resolved_constant = [""].concat(current_namespace_path).push(constant_name).join("::")
60
+
61
+ @local_definitions[resolved_constant] = location
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "stringio"
4
+ require "herb"
5
+
6
+ module ReferenceExtractor
7
+ module Internal
8
+ module Parsers
9
+ class Erb
10
+ def initialize(parser: Herb.method(:extract_ruby), ruby_parser: Ruby.new)
11
+ @parser = parser
12
+ @ruby_parser = ruby_parser
13
+ end
14
+
15
+ def call(io:, file_path: "<unknown>")
16
+ erb_source = io.read
17
+ ruby_code = @parser.call(erb_source)
18
+
19
+ @ruby_parser.call(
20
+ io: StringIO.new(ruby_code),
21
+ file_path: file_path
22
+ )
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "singleton"
4
+
5
+ module ReferenceExtractor
6
+ module Internal
7
+ module Parsers
8
+ class Factory
9
+ include Singleton
10
+
11
+ RUBY_REGEX = %r{
12
+ # Although not important for regex, these are ordered from most likely to match to least likely.
13
+ \.(rb|rake|builder|gemspec|ru)\Z
14
+ |
15
+ (Gemfile|Rakefile)\Z
16
+ }x
17
+ private_constant :RUBY_REGEX
18
+
19
+ ERB_REGEX = /\.erb\Z/
20
+ private_constant :ERB_REGEX
21
+
22
+ def initialize
23
+ @ruby_parser = nil
24
+ @erb_parser = nil
25
+ @erb_parser_class = nil
26
+ end
27
+
28
+ def for_path(path)
29
+ case path
30
+ when RUBY_REGEX
31
+ @ruby_parser ||= Ruby.new
32
+ when ERB_REGEX
33
+ @erb_parser ||= erb_parser_class.new
34
+ end
35
+ end
36
+
37
+ def erb_parser_class
38
+ @erb_parser_class ||= Erb
39
+ end
40
+
41
+ def erb_parser_class=(klass)
42
+ @erb_parser_class = klass
43
+ @erb_parser = nil
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # require "parser/source/map"
4
+
5
+ module ReferenceExtractor
6
+ module Internal
7
+ module Parsers
8
+ class ParseResult
9
+ attr_reader :location
10
+ attr_reader :file
11
+ attr_reader :message
12
+
13
+ def initialize(file:, message:, location: nil)
14
+ @location = location
15
+ @file = file
16
+ @message = message
17
+ end
18
+
19
+ def to_s(style = OutputStyles::Plain.new)
20
+ location = self.location
21
+ if location
22
+ <<~EOS
23
+ #{style.filename}#{file}#{style.reset}:#{location.line}:#{location.column}
24
+ #{@message}
25
+ EOS
26
+ else
27
+ <<~EOS
28
+ #{style.filename}#{file}#{style.reset}
29
+ #{@message}
30
+ EOS
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "parser"
4
+ require "prism"
5
+
6
+ module ReferenceExtractor
7
+ module Internal
8
+ module Parsers
9
+ class Ruby
10
+ class RaiseExceptionsParser < Prism::Translation::Parser
11
+ def initialize(builder)
12
+ super
13
+ super.diagnostics.all_errors_are_fatal = true
14
+ end
15
+ end
16
+
17
+ class TolerateInvalidUtf8Builder < Prism::Translation::Parser::Builder
18
+ def string_value(token)
19
+ value(token)
20
+ end
21
+ end
22
+
23
+ def initialize(parser_class: RaiseExceptionsParser)
24
+ @builder = TolerateInvalidUtf8Builder.new
25
+ @parser_class = parser_class
26
+ end
27
+
28
+ def call(io:, file_path: "<unknown>")
29
+ buffer = Parser::Source::Buffer.new(file_path)
30
+ buffer.source = io.read
31
+ parser = @parser_class.new(@builder)
32
+ parser.parse(buffer)
33
+ rescue EncodingError => e
34
+ result = ParseResult.new(file: file_path, message: e.message)
35
+ raise ParseError, result
36
+ rescue Parser::SyntaxError => e
37
+ result = ParseResult.new(file: file_path, message: "Syntax error: #{e}")
38
+ raise ParseError, result
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ReferenceExtractor
4
+ module Internal
5
+ module Parsers
6
+ extend ActiveSupport::Autoload
7
+
8
+ autoload :Erb
9
+ autoload :Factory
10
+ autoload :Ruby
11
+ autoload :ParseResult
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ReferenceExtractor
4
+ module Internal
5
+ # An unresolved reference from a file in one package to a constant that may be defined in a different package.
6
+ # Unresolved means that we know how it's referred to in the file,
7
+ # and we have enough context on that reference to figure out the fully qualified reference such that we
8
+ # can produce a Reference in a separate pass. However, we have not yet resolved it to its fully qualified version.
9
+ UnresolvedReference = Struct.new(
10
+ :constant_name,
11
+ :namespace_path,
12
+ :relative_path,
13
+ :source_location,
14
+ keyword_init: true
15
+ )
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ReferenceExtractor
4
+ module Internal
5
+ extend ActiveSupport::Autoload
6
+
7
+ autoload :AstReferenceExtractor
8
+ autoload :ConstNodeInspector
9
+ autoload :ConstantDiscovery
10
+ autoload :Extractor
11
+ autoload :Node
12
+ autoload :NodeHelpers
13
+ autoload :ParsedConstantDefinitions
14
+ autoload :Parsers
15
+ autoload :UnresolvedReference
16
+ end
17
+ end
@@ -0,0 +1,10 @@
1
+ module ReferenceExtractor
2
+ class ParseError < StandardError
3
+ attr_reader(:result)
4
+
5
+ def initialize(result)
6
+ super(result.message)
7
+ @result = result
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ReferenceExtractor
4
+ # A constant reference from one file to another.
5
+ Reference = Struct.new(
6
+ :relative_path,
7
+ :constant,
8
+ :source_location,
9
+ keyword_init: true
10
+ )
11
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ReferenceExtractor
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support"
4
+ # Provides String#pluralize, ends_with?, and others
5
+ require "active_support/core_ext/string"
6
+
7
+ # ReferenceExtractor extracts constant references from Ruby code, giving you your implicit application structure as a neat graph.
8
+ #
9
+ # @example Extract references from a string snippet
10
+ # extractor = ReferenceExtractor::Extractor.new(
11
+ # autoloaders: Rails.autoloaders,
12
+ # root_path: Rails.root
13
+ # )
14
+ # references = extractor.references_from_string("Order.find(1)")
15
+ # # => [#<ReferenceExtractor::Reference constant=#<ReferenceExtractor::ConstantContext name="::Order" ...>>]
16
+ #
17
+ # @example Extract references from a file
18
+ # references = extractor.references_from_file("app/models/user.rb")
19
+ # # => [#<ReferenceExtractor::Reference ...>, ...]
20
+ module ReferenceExtractor
21
+ extend ActiveSupport::Autoload
22
+
23
+ # public API
24
+ autoload :ConstantContext
25
+ autoload :Extractor
26
+ autoload :ParseError
27
+ autoload :Reference
28
+
29
+ # private API
30
+ autoload :Internal
31
+
32
+ private_constant :Internal
33
+ end
34
+
35
+ require "reference_extractor/version"