rigor-module-graph 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "prism"
4
+ require_relative "constant_name"
5
+ require_relative "edge"
6
+ require_relative "node"
7
+ require_relative "zeitwerk_resolver"
8
+ require_relative "inflector"
9
+
10
+ module Rigor
11
+ module ModuleGraph
12
+ # Per-node edge extractor. One instance per `node_rule`
13
+ # invocation; the plugin builds it with the current path,
14
+ # NodeContext, scope, and (optional) Zeitwerk resolver, then
15
+ # asks for `*_edges(node)`.
16
+ #
17
+ # Confidence ladder per edge:
18
+ #
19
+ # - `zeitwerk` when the owner's lexical name matches the
20
+ # path-inferred name (Phase 2).
21
+ # - `rigor_type` when a mixin arg is a non-constant whose
22
+ # `scope.type_of` is a Singleton — we read its `class_name`
23
+ # instead of dropping the edge (Phase 3).
24
+ # - `unresolved` when scope.type_of declines but we still want
25
+ # to record that *something* was referenced.
26
+ # - `syntax` otherwise.
27
+ class Analyzer
28
+ MIXIN_METHODS = %i[include prepend extend].freeze
29
+ ATTR_METHODS = {
30
+ attr_reader: "read",
31
+ attr_writer: "write",
32
+ attr_accessor: "accessor"
33
+ }.freeze
34
+ ASSOCIATION_METHODS = {
35
+ has_many: "has_many",
36
+ belongs_to: "belongs_to",
37
+ has_one: "has_one",
38
+ has_and_belongs_to_many: "has_and_belongs_to_many"
39
+ }.freeze
40
+ VISIBILITY_MARKERS = %i[public protected private].freeze
41
+
42
+ attr_reader :path, :context, :scope, :zeitwerk, :visibility_map
43
+
44
+ def initialize(path:, context:, scope: nil, zeitwerk: nil, visibility_map: nil)
45
+ @path = path
46
+ @context = context
47
+ @scope = scope
48
+ @zeitwerk = zeitwerk
49
+ @visibility_map = visibility_map
50
+ end
51
+
52
+ # Emits an `inherits` edge when the class declares a
53
+ # superclass. The owner combines the lexical ancestor chain
54
+ # with the class's own constant path (so `module A; class
55
+ # B::C` resolves to `A::B::C`). Confidence is elevated to
56
+ # `zeitwerk` when the path-inferred name matches.
57
+ def class_edges(node)
58
+ owner = owner_for_decl(node)
59
+ return [] unless owner
60
+
61
+ superclass_name = ConstantName.render(node.superclass)
62
+ return [] unless superclass_name
63
+
64
+ [build_edge(
65
+ from: owner, to: superclass_name, kind: "inherits", node: node
66
+ )]
67
+ end
68
+
69
+ # Modules don't introduce dependency edges by themselves —
70
+ # the include/prepend/extend calls inside them do, and those
71
+ # are caught by `Prism::CallNode`. Returns an empty array so
72
+ # the plugin's `Prism::ModuleNode` rule can stay symmetric
73
+ # with the class rule.
74
+ def module_edges(_node)
75
+ []
76
+ end
77
+
78
+ # Phase 5a — a Node row for the class declaration itself.
79
+ # Used by the plugin's +rule: "node"+ diagnostic emitter so
80
+ # downstream tooling can list classes by file / line.
81
+ def class_node_metadata(node)
82
+ owner = owner_for_decl(node)
83
+ return nil unless owner
84
+
85
+ Node.build(
86
+ kind: "class", name: owner,
87
+ path: path, line: line_of(node), column: column_of(node)
88
+ )
89
+ end
90
+
91
+ # Phase 5a — a Node row for the module declaration.
92
+ def module_node_metadata(node)
93
+ owner = owner_for_decl(node)
94
+ return nil unless owner
95
+
96
+ Node.build(
97
+ kind: "module", name: owner,
98
+ path: path, line: line_of(node), column: column_of(node)
99
+ )
100
+ end
101
+
102
+ # Phase 5a — a Node row for a +def+ / +def self.+. Reads
103
+ # visibility from the +VisibilityMap+ when one is wired in;
104
+ # defaults to +public+ otherwise.
105
+ def method_node_metadata(node)
106
+ owner = ConstantName.lexical_owner(context)
107
+ return nil unless owner
108
+
109
+ Node.build(
110
+ kind: node.receiver.nil? ? "instance_method" : "class_method",
111
+ name: node.name.to_s,
112
+ owner: owner,
113
+ visibility: visibility_for(node),
114
+ path: path, line: line_of(node), column: column_of(node)
115
+ )
116
+ end
117
+
118
+ # Phase 5a — Node rows for +attr_reader+ / +attr_writer+ /
119
+ # +attr_accessor+ calls. One Node per symbol argument.
120
+ def attribute_nodes(node)
121
+ access = ATTR_METHODS[node.name]
122
+ return [] unless access
123
+ return [] unless node.receiver.nil?
124
+
125
+ owner = ConstantName.lexical_owner(context)
126
+ return [] unless owner
127
+
128
+ # Inside a class body the running visibility is what the
129
+ # bare keyword markers set. We approximate by reading the
130
+ # nearest enclosing def-or-attr-marker's visibility — but
131
+ # attr_* calls are sibling statements, not nested defs, so
132
+ # we fall back to public unless the class body's visibility
133
+ # tracker covers them. For MVP we record public; the
134
+ # filter side still excludes private nodes when callers
135
+ # add visibility tracking later.
136
+ attr_visibility = visibility_for(node) || "public"
137
+
138
+ arguments_of(node).filter_map do |arg|
139
+ name = symbol_name(arg)
140
+ next unless name
141
+
142
+ Node.build(
143
+ kind: "attribute", name: name, owner: owner,
144
+ visibility: attr_visibility, access: access,
145
+ path: path, line: line_of(node), column: column_of(node)
146
+ )
147
+ end
148
+ end
149
+
150
+ # Phase 5b — Rails ActiveRecord association edges. For
151
+ # +has_many :invoices+ we infer +Invoice+ via the bundled
152
+ # Inflector; +class_name: "Foo"+ overrides win when present.
153
+ def association_edges(node)
154
+ kind = ASSOCIATION_METHODS[node.name]
155
+ return [] unless kind
156
+ return [] unless node.receiver.nil?
157
+
158
+ owner = ConstantName.lexical_owner(context)
159
+ return [] unless owner
160
+
161
+ arguments_of(node).filter_map do |arg|
162
+ next unless (sym = symbol_name(arg))
163
+
164
+ target = class_name_from_options(node) ||
165
+ infer_associated_class(owner, sym)
166
+ build_edge(
167
+ from: owner, to: target, kind: kind, node: node,
168
+ confidence: :syntax,
169
+ raw: sym
170
+ )
171
+ end
172
+ end
173
+
174
+ # Rails resolves `has_many :invoices` inside `Billing::Customer`
175
+ # to `Billing::Invoice`, not the top-level `Invoice`, because
176
+ # `compute_type` walks the owner's namespace upwards before
177
+ # falling back to the top level. We don't reproduce that
178
+ # walk (we'd need every constant in scope), but defaulting to
179
+ # the owner's namespace is the right approximation:
180
+ # - `class_name: "Foo"` always wins (the explicit override)
181
+ # - top-level owners (no enclosing namespace) keep the bare
182
+ # name, matching the previous behaviour
183
+ # - namespaced owners get the sibling resolution Rails does
184
+ # by default
185
+ def infer_associated_class(owner, sym)
186
+ bare = Inflector.class_name_for(sym)
187
+ namespace = owner.rpartition("::").first
188
+ return bare if namespace.empty?
189
+
190
+ "#{namespace}::#{bare}"
191
+ end
192
+
193
+ # Emits `include` / `prepend` / `extend` edges for a call
194
+ # whose method name is one of `MIXIN_METHODS`. Skips the call
195
+ # when no class/module encloses it (top-level `include` on
196
+ # Object is rare and adds noise to the graph).
197
+ def call_edges(node)
198
+ return [] unless mixin_call?(node)
199
+
200
+ owner = ConstantName.lexical_owner(context)
201
+ return [] unless owner
202
+
203
+ kind = node.name.to_s
204
+ arguments_of(node).flat_map do |arg|
205
+ build_mixin_edges(owner: owner, kind: kind, arg: arg, node: node)
206
+ end
207
+ end
208
+
209
+ # Phase 2c: a `const_ref` edge for a bare constant read
210
+ # inside a method body. The plugin gates on
211
+ # `include_constant_refs`, so this method assumes the caller
212
+ # already decided to look at constant nodes.
213
+ def constant_read_edges(node)
214
+ return [] unless emit_const_ref?(node)
215
+ # The leftmost name of `Foo::Bar::Baz` is a
216
+ # ConstantReadNode wrapped by the outer ConstantPathNode.
217
+ # The path's own rule covers it, so we skip here.
218
+ return [] if parent_is_constant_path?(node)
219
+
220
+ owner = ConstantName.lexical_owner(context)
221
+ return [] unless owner
222
+
223
+ [build_edge(
224
+ from: owner,
225
+ to: node.name.to_s,
226
+ kind: "const_ref",
227
+ node: node
228
+ )]
229
+ end
230
+
231
+ # Phase 2c: a `const_ref` edge for a `Foo::Bar` reference
232
+ # inside a method body. We only fire on the outermost path
233
+ # — Prism nests a `ConstantPathNode(:Bar)` inside `Foo`'s
234
+ # own `ConstantPathNode`, and we'd double-count if we
235
+ # emitted from both.
236
+ def constant_path_edges(node)
237
+ return [] unless emit_const_ref?(node)
238
+ return [] if parent_is_constant_path?(node)
239
+
240
+ owner = ConstantName.lexical_owner(context)
241
+ return [] unless owner
242
+
243
+ target = ConstantName.render(node)
244
+ return [] unless target
245
+
246
+ [build_edge(
247
+ from: owner, to: target, kind: "const_ref", node: node
248
+ )]
249
+ end
250
+
251
+ def build_mixin_edges(owner:, kind:, arg:, node:)
252
+ if (target = ConstantName.render(arg))
253
+ [build_edge(
254
+ from: owner, to: target, kind: kind, node: node
255
+ )]
256
+ else
257
+ resolved = resolve_via_scope(arg)
258
+ if resolved
259
+ [build_edge(
260
+ from: owner, to: resolved, kind: kind, node: node,
261
+ confidence: :rigor_type, raw: arg_source(arg)
262
+ )]
263
+ else
264
+ unresolved_label = arg_source(arg)
265
+ return [] unless unresolved_label
266
+
267
+ [build_edge(
268
+ from: owner,
269
+ to: unresolved_label,
270
+ kind: kind,
271
+ node: node,
272
+ confidence: :unresolved,
273
+ raw: unresolved_label
274
+ )]
275
+ end
276
+ end
277
+ end
278
+
279
+ def resolve_via_scope(arg)
280
+ return nil unless scope.respond_to?(:type_of)
281
+
282
+ type = scope.type_of(arg)
283
+ return nil if type.nil?
284
+
285
+ if defined?(::Rigor::Type::Singleton) && type.is_a?(::Rigor::Type::Singleton)
286
+ type.class_name
287
+ end
288
+ rescue StandardError
289
+ nil
290
+ end
291
+
292
+ def arg_source(arg)
293
+ loc = arg.location
294
+ return nil unless loc
295
+
296
+ loc.slice
297
+ rescue StandardError
298
+ nil
299
+ end
300
+
301
+ def mixin_call?(node)
302
+ MIXIN_METHODS.include?(node.name) && node.receiver.nil?
303
+ end
304
+
305
+ def symbol_name(arg)
306
+ case arg
307
+ when Prism::SymbolNode
308
+ arg.value
309
+ when Prism::StringNode
310
+ arg.unescaped
311
+ end
312
+ end
313
+
314
+ # Pulls +class_name: "Foo"+ (or +:Foo+) out of the keyword
315
+ # arguments on an association call. Returns nil when absent.
316
+ def class_name_from_options(node)
317
+ args = arguments_of(node)
318
+ keyword_hash = args.find { |a| a.is_a?(Prism::KeywordHashNode) || a.is_a?(Prism::HashNode) }
319
+ return nil unless keyword_hash
320
+
321
+ keyword_hash.elements.each do |elem|
322
+ next unless elem.is_a?(Prism::AssocNode)
323
+
324
+ key = elem.key
325
+ next unless key.is_a?(Prism::SymbolNode) && key.value == "class_name"
326
+
327
+ value = elem.value
328
+ return value.unescaped if value.is_a?(Prism::StringNode)
329
+ return value.value.to_s if value.is_a?(Prism::SymbolNode)
330
+ end
331
+ nil
332
+ end
333
+
334
+ def visibility_for(node)
335
+ return nil unless visibility_map
336
+
337
+ visibility_map.visibility_for(node)
338
+ end
339
+
340
+ def arguments_of(node)
341
+ node.arguments ? node.arguments.arguments : []
342
+ end
343
+
344
+ def owner_for_decl(node)
345
+ own = ConstantName.render(node.constant_path)
346
+ ConstantName.lexical_owner_with(context, own)
347
+ end
348
+
349
+ def line_of(node)
350
+ node.location&.start_line
351
+ end
352
+
353
+ def column_of(node)
354
+ # Prism returns 0-based start_column; downstream tooling
355
+ # and diagnostic JSON expect 1-based columns to match how
356
+ # editors render positions.
357
+ col = node.location&.start_column
358
+ col.nil? ? nil : col + 1
359
+ end
360
+
361
+ def build_edge(from:, to:, kind:, node:, confidence: :syntax, raw: nil)
362
+ # Caller's confidence is the floor — we may bump it up
363
+ # when Zeitwerk agrees with the owner's lexical name. We
364
+ # never demote.
365
+ effective = confidence == :syntax ? zeitwerk_confidence(from) : confidence
366
+ Edge.build(
367
+ from: from,
368
+ to: to,
369
+ kind: kind,
370
+ path: path,
371
+ line: line_of(node),
372
+ column: column_of(node),
373
+ confidence: effective.to_s,
374
+ raw: raw
375
+ )
376
+ end
377
+
378
+ # Returns :zeitwerk when the path-inferred constant for the
379
+ # current file matches the lexical owner, :syntax otherwise.
380
+ # The resolver is optional — when no Zeitwerk config is in
381
+ # play we just stay at :syntax.
382
+ def zeitwerk_confidence(owner)
383
+ return :syntax unless zeitwerk
384
+ return :syntax unless path
385
+
386
+ inferred = zeitwerk.resolve(path)
387
+ zeitwerk.matches?(owner, inferred) ? :zeitwerk : :syntax
388
+ end
389
+
390
+ def emit_const_ref?(node)
391
+ return false unless context.respond_to?(:enclosing_def)
392
+ return false if context.enclosing_def.nil?
393
+ return false if inside_class_header?(node)
394
+ return false if inside_mixin_args?(node)
395
+
396
+ true
397
+ end
398
+
399
+ # Inside `class Foo < Bar; …`, Bar's ConstantReadNode is a
400
+ # child of the ClassNode itself (constant_path / superclass
401
+ # slots). We are walked AFTER `context.ancestors` has been
402
+ # pushed, so the immediate parent here is the ClassNode.
403
+ def inside_class_header?(node)
404
+ parent = context.ancestors.last
405
+ return false unless parent.is_a?(Prism::ClassNode) ||
406
+ parent.is_a?(Prism::ModuleNode)
407
+
408
+ parent.constant_path.equal?(node) ||
409
+ (parent.respond_to?(:superclass) && parent.superclass.equal?(node))
410
+ end
411
+
412
+ # `include Foo` / `prepend Foo` / `extend Foo` — Foo's
413
+ # ConstantReadNode is reached after the include CallNode is
414
+ # on the ancestor stack. Walk up looking for a recent mixin
415
+ # CallNode where this node sits inside its arguments.
416
+ def inside_mixin_args?(node)
417
+ target = node
418
+ context.ancestors.reverse_each do |ancestor|
419
+ if ancestor.is_a?(Prism::CallNode) && mixin_call?(ancestor)
420
+ args = arguments_of(ancestor)
421
+ return true if args.any? { |a| contains_node?(a, target) }
422
+ end
423
+ # Stop at the first class / module / def boundary so we
424
+ # don't accidentally bleed into a containing decl.
425
+ break if ancestor.is_a?(Prism::ClassNode) ||
426
+ ancestor.is_a?(Prism::ModuleNode) ||
427
+ ancestor.is_a?(Prism::DefNode)
428
+ end
429
+ false
430
+ end
431
+
432
+ def contains_node?(haystack, needle)
433
+ return true if haystack.equal?(needle)
434
+ return false unless haystack.is_a?(Prism::Node)
435
+
436
+ haystack.compact_child_nodes.any? { |child| contains_node?(child, needle) }
437
+ end
438
+
439
+ def parent_is_constant_path?(node)
440
+ parent = context.ancestors.last
441
+ parent.is_a?(Prism::ConstantPathNode) && parent.parent.equal?(node)
442
+ end
443
+ end
444
+ end
445
+ end