interlinked-mapper 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,822 @@
1
+ """CodeGraph — a NetworkX-backed directed multigraph of Python symbols."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Iterator
6
+
7
+ import networkx as nx
8
+
9
+ from interlinked.models import (
10
+ NodeData, EdgeData, EdgeType, SymbolType,
11
+ GraphSnapshot, ViewState, ColorScheme,
12
+ )
13
+
14
+
15
+ class CodeGraph:
16
+ """The core graph structure representing an entire Python project.
17
+
18
+ Wraps a NetworkX MultiDiGraph with typed node/edge accessors
19
+ and query methods used by the commander layer.
20
+ """
21
+
22
+ def __init__(self) -> None:
23
+ self._g: nx.MultiDiGraph = nx.MultiDiGraph()
24
+ self._node_data: dict[str, NodeData] = {}
25
+ self._proposed_nodes: dict[str, NodeData] = {}
26
+ self._proposed_edges: list[EdgeData] = []
27
+
28
+ # ── Construction ─────────────────────────────────────────────────
29
+
30
+ def add_node(self, node: NodeData) -> None:
31
+ bucket = self._proposed_nodes if node.is_proposed else self._node_data
32
+ bucket[node.id] = node
33
+ self._g.add_node(node.id, **node.model_dump())
34
+
35
+ def add_edge(self, edge: EdgeData) -> None:
36
+ if edge.is_proposed:
37
+ self._proposed_edges.append(edge)
38
+ self._g.add_edge(
39
+ edge.source, edge.target,
40
+ key=edge.edge_type.value,
41
+ **edge.model_dump(),
42
+ )
43
+
44
+ def build_from(self, nodes: list[NodeData], edges: list[EdgeData]) -> None:
45
+ """Populate from parser output, resolving short names to qualified IDs."""
46
+ for n in nodes:
47
+ self.add_node(n)
48
+
49
+ # Build a lookup: short name -> list of qualified IDs
50
+ name_index: dict[str, list[str]] = {}
51
+ for n in nodes:
52
+ name_index.setdefault(n.name, []).append(n.id)
53
+ # Also index by qualified_name suffix fragments
54
+ # e.g. "graph.CodeGraph" for "analyzer.graph.CodeGraph"
55
+ parts = n.qualified_name.split(".")
56
+ for i in range(1, len(parts)):
57
+ suffix = ".".join(parts[i:])
58
+ name_index.setdefault(suffix, []).append(n.id)
59
+
60
+ node_ids = {n.id for n in nodes}
61
+
62
+ for e in edges:
63
+ resolved = self._resolve_edge(e, node_ids, name_index)
64
+ self.add_edge(resolved)
65
+
66
+ @staticmethod
67
+ def _resolve_edge(
68
+ edge: EdgeData,
69
+ node_ids: set[str],
70
+ name_index: dict[str, list[str]],
71
+ ) -> EdgeData:
72
+ """Try to resolve unqualified source/target names to known node IDs."""
73
+ source = edge.source
74
+ target = edge.target
75
+
76
+ if source not in node_ids:
77
+ candidates = name_index.get(source, [])
78
+ if len(candidates) == 1:
79
+ source = candidates[0]
80
+
81
+ if target not in node_ids:
82
+ candidates = name_index.get(target, [])
83
+ if len(candidates) == 1:
84
+ target = candidates[0]
85
+ elif len(candidates) > 1:
86
+ # Prefer a candidate in the same module as the source
87
+ src_module = source.rsplit(".", 1)[0] if "." in source else source
88
+ for c in candidates:
89
+ if c.startswith(src_module):
90
+ target = c
91
+ break
92
+ else:
93
+ target = candidates[0]
94
+
95
+ if source == edge.source and target == edge.target:
96
+ return edge
97
+
98
+ return EdgeData(
99
+ source=source,
100
+ target=target,
101
+ edge_type=edge.edge_type,
102
+ is_dead=edge.is_dead,
103
+ is_proposed=edge.is_proposed,
104
+ line=edge.line,
105
+ metadata=edge.metadata,
106
+ )
107
+
108
+ # ── Node access ──────────────────────────────────────────────────
109
+
110
+ def get_node(self, node_id: str) -> NodeData | None:
111
+ return self._node_data.get(node_id) or self._proposed_nodes.get(node_id)
112
+
113
+ def all_nodes(self, include_proposed: bool = True) -> list[NodeData]:
114
+ nodes = list(self._node_data.values())
115
+ if include_proposed:
116
+ nodes.extend(self._proposed_nodes.values())
117
+ return nodes
118
+
119
+ def nodes_by_type(self, sym_type: SymbolType) -> list[NodeData]:
120
+ return [n for n in self._node_data.values() if n.symbol_type == sym_type]
121
+
122
+ @property
123
+ def node_count(self) -> int:
124
+ return len(self._node_data) + len(self._proposed_nodes)
125
+
126
+ @property
127
+ def edge_count(self) -> int:
128
+ return self._g.number_of_edges()
129
+
130
+ # ── Edge access ──────────────────────────────────────────────────
131
+
132
+ def all_edges(self, include_proposed: bool = True) -> list[EdgeData]:
133
+ edges: list[EdgeData] = []
134
+ seen = set()
135
+ for u, v, data in self._g.edges(data=True):
136
+ key = (u, v, data.get("edge_type"))
137
+ if key not in seen:
138
+ seen.add(key)
139
+ ed = EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data})
140
+ if not include_proposed and ed.is_proposed:
141
+ continue
142
+ edges.append(ed)
143
+ return edges
144
+
145
+ def edges_from(self, node_id: str, edge_type: EdgeType | None = None) -> list[EdgeData]:
146
+ if node_id not in self._g:
147
+ return []
148
+ result = []
149
+ for _, v, data in self._g.out_edges(node_id, data=True):
150
+ if edge_type and data.get("edge_type") != edge_type.value:
151
+ continue
152
+ result.append(EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data}))
153
+ return result
154
+
155
+ def edges_to(self, node_id: str, edge_type: EdgeType | None = None) -> list[EdgeData]:
156
+ if node_id not in self._g:
157
+ return []
158
+ result = []
159
+ for u, _, data in self._g.in_edges(node_id, data=True):
160
+ if edge_type and data.get("edge_type") != edge_type.value:
161
+ continue
162
+ result.append(EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data}))
163
+ return result
164
+
165
+ # ── Queries ──────────────────────────────────────────────────────
166
+
167
+ def callers_of(self, node_id: str) -> list[NodeData]:
168
+ """Who calls this function?"""
169
+ return [
170
+ self._node_data[e.source]
171
+ for e in self.edges_to(node_id, EdgeType.CALLS)
172
+ if e.source in self._node_data
173
+ ]
174
+
175
+ def callees_of(self, node_id: str) -> list[NodeData]:
176
+ """What does this function call?"""
177
+ return [
178
+ self._node_data[e.target]
179
+ for e in self.edges_from(node_id, EdgeType.CALLS)
180
+ if e.target in self._node_data
181
+ ]
182
+
183
+ def subgraph_around(
184
+ self, node_id: str, depth: int = 2, edge_types: list[EdgeType] | None = None
185
+ ) -> tuple[list[NodeData], list[EdgeData]]:
186
+ """BFS expansion around a node up to `depth` hops."""
187
+ if node_id not in self._g:
188
+ return [], []
189
+
190
+ visited: set[str] = set()
191
+ frontier: set[str] = {node_id}
192
+
193
+ for _ in range(depth):
194
+ next_frontier: set[str] = set()
195
+ for nid in frontier:
196
+ if nid in visited:
197
+ continue
198
+ visited.add(nid)
199
+ for _, v, data in self._g.out_edges(nid, data=True):
200
+ if edge_types and data.get("edge_type") not in [et.value for et in edge_types]:
201
+ continue
202
+ next_frontier.add(v)
203
+ for u, _, data in self._g.in_edges(nid, data=True):
204
+ if edge_types and data.get("edge_type") not in [et.value for et in edge_types]:
205
+ continue
206
+ next_frontier.add(u)
207
+ frontier = next_frontier - visited
208
+
209
+ visited |= frontier
210
+ nodes = [self.get_node(nid) for nid in visited if self.get_node(nid)]
211
+ edges = [
212
+ e for e in self.all_edges()
213
+ if e.source in visited and e.target in visited
214
+ ]
215
+ return nodes, edges # type: ignore[return-value]
216
+
217
+ def trace_variable(self, var_name: str, origin: str | None = None) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
218
+ """Trace a variable's read/write path using real graph pathfinding.
219
+
220
+ Uses nx.ancestors/descendants to find the full data-flow picture,
221
+ then nx.all_simple_paths to find actual connecting paths between
222
+ writers and readers.
223
+
224
+ Returns (nodes, edges, node_roles, edge_roles) where edge_roles
225
+ are keyed by ``"src_id|tgt_id"`` using real node IDs.
226
+ """
227
+ writers: set[str] = set()
228
+ readers: set[str] = set()
229
+ var_targets: set[str] = set()
230
+
231
+ # 1. Find all functions that read/write this variable
232
+ for e in self.all_edges():
233
+ if e.edge_type in (EdgeType.READS, EdgeType.WRITES):
234
+ target_name = e.target.split(".")[-1] if "." in e.target else e.target
235
+ if target_name == var_name:
236
+ if origin and not e.source.startswith(origin):
237
+ continue
238
+ var_targets.add(e.target)
239
+ if e.edge_type == EdgeType.WRITES:
240
+ writers.add(e.source)
241
+ else:
242
+ readers.add(e.source)
243
+
244
+ trace_func_ids = writers | readers
245
+ if not trace_func_ids:
246
+ return [], [], {}, {}
247
+
248
+ # 2. Use NetworkX to find paths between writers and readers
249
+ # This gives us the actual intermediate nodes in the call chain
250
+ path_nodes: set[str] = set(trace_func_ids)
251
+ path_nodes |= var_targets
252
+
253
+ # Use flow-only subgraph (no CONTAINS/INHERITS) for pathfinding — much faster
254
+ flow_graph = self._g.edge_subgraph(
255
+ [(u, v, k) for u, v, k in self._g.edges(keys=True)
256
+ if k not in ("contains", "inherits")]
257
+ )
258
+
259
+ for w in writers:
260
+ for r in readers:
261
+ if w == r:
262
+ continue
263
+ for src, tgt in [(w, r), (r, w)]:
264
+ if src in flow_graph and tgt in flow_graph:
265
+ try:
266
+ for path in nx.all_simple_paths(flow_graph, src, tgt, cutoff=5):
267
+ path_nodes.update(path)
268
+ except nx.NetworkXError:
269
+ pass
270
+
271
+ # Also add ancestors/descendants of each writer/reader within the trace
272
+ for nid in list(trace_func_ids):
273
+ if nid not in flow_graph:
274
+ continue
275
+ try:
276
+ anc = nx.ancestors(flow_graph, nid)
277
+ path_nodes |= (anc & trace_func_ids)
278
+ except nx.NetworkXError:
279
+ pass
280
+ try:
281
+ desc = nx.descendants(flow_graph, nid)
282
+ path_nodes |= (desc & trace_func_ids)
283
+ except nx.NetworkXError:
284
+ pass
285
+
286
+ # 3. Collect ALL edges between path participants
287
+ relevant_edges: list[EdgeData] = []
288
+ edge_roles: dict[str, str] = {}
289
+
290
+ for e in self.all_edges():
291
+ if e.edge_type == EdgeType.CONTAINS:
292
+ continue
293
+ src_in = e.source in path_nodes
294
+ tgt_in = e.target in path_nodes
295
+ tgt_is_var = e.target in var_targets
296
+
297
+ if e.edge_type in (EdgeType.READS, EdgeType.WRITES) and src_in and tgt_is_var:
298
+ relevant_edges.append(e)
299
+ key = f"{e.source}|{e.target}"
300
+ edge_roles[key] = "write" if e.edge_type == EdgeType.WRITES else "read"
301
+ elif src_in and tgt_in:
302
+ relevant_edges.append(e)
303
+ key = f"{e.source}|{e.target}"
304
+ if key not in edge_roles:
305
+ if e.edge_type == EdgeType.WRITES:
306
+ edge_roles[key] = "write"
307
+ elif e.edge_type == EdgeType.READS:
308
+ edge_roles[key] = "read"
309
+ else:
310
+ edge_roles[key] = "flow"
311
+
312
+ # 4. Classify node roles
313
+ node_roles: dict[str, str] = {}
314
+ for vid in var_targets:
315
+ node_roles[vid] = "origin"
316
+
317
+ writer_list = sorted(writers, key=lambda w: min(
318
+ (e.line or 9999 for e in relevant_edges
319
+ if e.source == w and e.edge_type == EdgeType.WRITES), default=9999
320
+ ))
321
+
322
+ for nid in path_nodes - var_targets:
323
+ if nid in writers and nid in readers:
324
+ node_roles[nid] = "mutator"
325
+ elif nid in writers:
326
+ if writer_list and nid == writer_list[0]:
327
+ node_roles[nid] = "origin"
328
+ else:
329
+ node_roles[nid] = "mutator"
330
+ elif nid in readers:
331
+ node_roles[nid] = "destination"
332
+ else:
333
+ # Intermediate node on a path (not a direct reader/writer)
334
+ node_roles[nid] = "passthrough"
335
+
336
+ # Upgrade pure readers to passthrough if they connect to other trace nodes
337
+ for nid in readers - writers:
338
+ if nid not in self._g:
339
+ continue
340
+ has_outgoing = any(
341
+ e.target in path_nodes and e.target != nid
342
+ for e in self.edges_from(nid, EdgeType.CALLS)
343
+ )
344
+ if has_outgoing:
345
+ node_roles[nid] = "passthrough"
346
+
347
+ nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
348
+ return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
349
+
350
+ # ── Tracing & Pathfinding (Phase 1c) ─────────────────────────────
351
+
352
+ def trace_function(self, node_id: str) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
353
+ """Trace a function's call chain — everything that calls it and everything it calls.
354
+
355
+ Uses nx.ancestors and nx.descendants on calls-only subgraph.
356
+ """
357
+ if node_id not in self._g:
358
+ return [], [], {}, {}
359
+
360
+ # Build a calls-only view
361
+ calls_edges = {
362
+ (e.source, e.target) for e in self.all_edges()
363
+ if e.edge_type == EdgeType.CALLS
364
+ }
365
+ calls_graph = self._g.edge_subgraph(
366
+ [(u, v, k) for u, v, k in self._g.edges(keys=True) if k == "calls"]
367
+ )
368
+
369
+ path_nodes: set[str] = {node_id}
370
+ try:
371
+ path_nodes |= nx.ancestors(calls_graph, node_id)
372
+ except nx.NetworkXError:
373
+ pass
374
+ try:
375
+ path_nodes |= nx.descendants(calls_graph, node_id)
376
+ except nx.NetworkXError:
377
+ pass
378
+
379
+ # Collect edges between participants
380
+ relevant_edges: list[EdgeData] = []
381
+ edge_roles: dict[str, str] = {}
382
+ for e in self.all_edges():
383
+ if e.edge_type == EdgeType.CONTAINS:
384
+ continue
385
+ if e.source in path_nodes and e.target in path_nodes:
386
+ relevant_edges.append(e)
387
+ key = f"{e.source}|{e.target}"
388
+ if e.edge_type == EdgeType.CALLS:
389
+ edge_roles[key] = "flow"
390
+ elif e.edge_type == EdgeType.READS:
391
+ edge_roles[key] = "read"
392
+ elif e.edge_type == EdgeType.WRITES:
393
+ edge_roles[key] = "write"
394
+ else:
395
+ edge_roles.setdefault(key, "flow")
396
+
397
+ # Classify roles: the target is origin, callers are upstream, callees are downstream
398
+ node_roles: dict[str, str] = {}
399
+ try:
400
+ upstream = nx.ancestors(calls_graph, node_id)
401
+ except nx.NetworkXError:
402
+ upstream = set()
403
+ try:
404
+ downstream = nx.descendants(calls_graph, node_id)
405
+ except nx.NetworkXError:
406
+ downstream = set()
407
+
408
+ for nid in path_nodes:
409
+ if nid == node_id:
410
+ node_roles[nid] = "origin"
411
+ elif nid in upstream and nid in downstream:
412
+ node_roles[nid] = "mutator" # in a cycle with target
413
+ elif nid in upstream:
414
+ node_roles[nid] = "passthrough" # callers
415
+ elif nid in downstream:
416
+ node_roles[nid] = "destination" # callees
417
+ else:
418
+ node_roles[nid] = "passthrough"
419
+
420
+ nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
421
+ return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
422
+
423
+ def trace_call_chain(self, source: str, target: str, max_depth: int = 8) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
424
+ """Find all call paths from source to target.
425
+
426
+ Uses nx.all_simple_paths on calls-only subgraph.
427
+ """
428
+ if source not in self._g or target not in self._g:
429
+ return [], [], {}, {}
430
+
431
+ calls_graph = self._g.edge_subgraph(
432
+ [(u, v, k) for u, v, k in self._g.edges(keys=True) if k == "calls"]
433
+ )
434
+
435
+ path_nodes: set[str] = set()
436
+ try:
437
+ for path in nx.all_simple_paths(calls_graph, source, target, cutoff=max_depth):
438
+ path_nodes.update(path)
439
+ except nx.NetworkXError:
440
+ pass
441
+
442
+ if not path_nodes:
443
+ # Try reverse direction
444
+ try:
445
+ for path in nx.all_simple_paths(calls_graph, target, source, cutoff=max_depth):
446
+ path_nodes.update(path)
447
+ except nx.NetworkXError:
448
+ pass
449
+
450
+ if not path_nodes:
451
+ return [], [], {}, {}
452
+
453
+ relevant_edges: list[EdgeData] = []
454
+ edge_roles: dict[str, str] = {}
455
+ for e in self.all_edges():
456
+ if e.edge_type == EdgeType.CONTAINS:
457
+ continue
458
+ if e.source in path_nodes and e.target in path_nodes:
459
+ relevant_edges.append(e)
460
+ key = f"{e.source}|{e.target}"
461
+ edge_roles.setdefault(key, "flow")
462
+
463
+ node_roles: dict[str, str] = {}
464
+ for nid in path_nodes:
465
+ if nid == source:
466
+ node_roles[nid] = "origin"
467
+ elif nid == target:
468
+ node_roles[nid] = "destination"
469
+ else:
470
+ node_roles[nid] = "passthrough"
471
+
472
+ nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
473
+ return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
474
+
475
+ # ── Impact & Dependency (Phase 2) ────────────────────────────────
476
+
477
+ def impact_of(self, node_id: str) -> set[str]:
478
+ """Everything downstream — if I change this node, what's affected?
479
+
480
+ Uses edge-subgraph excluding CONTAINS so we only follow
481
+ real data/control flow (calls, reads, writes, returns).
482
+ """
483
+ if node_id not in self._g:
484
+ return set()
485
+ flow_graph = self._g.edge_subgraph(
486
+ [(u, v, k) for u, v, k in self._g.edges(keys=True)
487
+ if k not in ("contains", "inherits")]
488
+ )
489
+ if node_id not in flow_graph:
490
+ return set()
491
+ return nx.descendants(flow_graph, node_id)
492
+
493
+ def feeds_into(self, node_id: str) -> set[str]:
494
+ """Everything upstream — what does this node depend on?"""
495
+ if node_id not in self._g:
496
+ return set()
497
+ return nx.ancestors(self._g, node_id)
498
+
499
+ def path_between(self, source: str, target: str) -> list[str]:
500
+ """Shortest dependency chain from source to target."""
501
+ try:
502
+ return nx.shortest_path(self._g, source, target)
503
+ except (nx.NetworkXNoPath, nx.NodeNotFound):
504
+ # Try reverse
505
+ try:
506
+ return nx.shortest_path(self._g, target, source)
507
+ except (nx.NetworkXNoPath, nx.NodeNotFound):
508
+ return []
509
+
510
+ def all_paths_between(self, source: str, target: str, max_depth: int = 8) -> list[list[str]]:
511
+ """Every route from source to target."""
512
+ paths: list[list[str]] = []
513
+ try:
514
+ paths.extend(nx.all_simple_paths(self._g, source, target, cutoff=max_depth))
515
+ except (nx.NetworkXError, nx.NodeNotFound):
516
+ pass
517
+ if not paths:
518
+ try:
519
+ paths.extend(nx.all_simple_paths(self._g, target, source, cutoff=max_depth))
520
+ except (nx.NetworkXError, nx.NodeNotFound):
521
+ pass
522
+ return paths
523
+
524
+ def are_connected(self, a: str, b: str) -> bool:
525
+ """Can data/control flow from a to b (or b to a)?"""
526
+ try:
527
+ return nx.has_path(self._g, a, b) or nx.has_path(self._g, b, a)
528
+ except nx.NodeNotFound:
529
+ return False
530
+
531
+ # ── Architecture Health (Phase 3) ────────────────────────────────
532
+
533
+ def find_cycles(self) -> list[list[str]]:
534
+ """Find circular dependencies (via calls and imports edges only)."""
535
+ calls_graph = self._g.edge_subgraph(
536
+ [(u, v, k) for u, v, k in self._g.edges(keys=True)
537
+ if k in ("calls", "imports")]
538
+ )
539
+ try:
540
+ return list(nx.simple_cycles(calls_graph))
541
+ except nx.NetworkXError:
542
+ return []
543
+
544
+ def has_circular_deps(self) -> bool:
545
+ """Quick check: are there any circular dependencies?"""
546
+ calls_graph = self._g.edge_subgraph(
547
+ [(u, v, k) for u, v, k in self._g.edges(keys=True)
548
+ if k in ("calls", "imports")]
549
+ )
550
+ return not nx.is_directed_acyclic_graph(calls_graph)
551
+
552
+ def critical_nodes(self, top_n: int = 20) -> list[tuple[str, float]]:
553
+ """Most important nodes by PageRank."""
554
+ try:
555
+ scores = nx.pagerank(self._g)
556
+ return sorted(scores.items(), key=lambda x: -x[1])[:top_n]
557
+ except (nx.NetworkXError, ImportError, ModuleNotFoundError):
558
+ return []
559
+
560
+ def bottlenecks(self, top_n: int = 20) -> list[tuple[str, float]]:
561
+ """Nodes everything flows through — high betweenness centrality."""
562
+ try:
563
+ scores = nx.betweenness_centrality(self._g)
564
+ return sorted(scores.items(), key=lambda x: -x[1])[:top_n]
565
+ except (nx.NetworkXError, ImportError, ModuleNotFoundError):
566
+ return []
567
+
568
+ def most_coupled(self, top_n: int = 20) -> list[tuple[str, int]]:
569
+ """Nodes with highest fan-in + fan-out."""
570
+ return sorted(
571
+ ((n, self._g.in_degree(n) + self._g.out_degree(n)) for n in self._g.nodes()),
572
+ key=lambda x: -x[1],
573
+ )[:top_n]
574
+
575
+ def find_clusters(self) -> list[set[str]]:
576
+ """Isolated groups of code — weakly connected components."""
577
+ return [c for c in nx.weakly_connected_components(self._g)]
578
+
579
+ def circular_clusters(self) -> list[set[str]]:
580
+ """Groups of mutually dependent code — strongly connected components > 1."""
581
+ return [c for c in nx.strongly_connected_components(self._g) if len(c) > 1]
582
+
583
+ def dependency_layers(self) -> list[list[str]]:
584
+ """Topological ordering — natural dependency layers."""
585
+ try:
586
+ condensed = nx.condensation(self._g)
587
+ return [list(gen) for gen in nx.topological_generations(condensed)]
588
+ except nx.NetworkXError:
589
+ return []
590
+
591
+ def coupling_between(self, module_a: str, module_b: str) -> dict[str, Any]:
592
+ """How tightly coupled are two modules? Cross-module edges."""
593
+ a_nodes = {n for n in self._g.nodes() if n.startswith(module_a)}
594
+ b_nodes = {n for n in self._g.nodes() if n.startswith(module_b)}
595
+ cross: list[dict[str, str]] = []
596
+ for e in self.all_edges():
597
+ if e.edge_type == EdgeType.CONTAINS:
598
+ continue
599
+ a_to_b = e.source in a_nodes and e.target in b_nodes
600
+ b_to_a = e.source in b_nodes and e.target in a_nodes
601
+ if a_to_b or b_to_a:
602
+ cross.append({"source": e.source, "target": e.target, "type": e.edge_type.value})
603
+ return {"edge_count": len(cross), "edges": cross}
604
+
605
+ # ── Enhanced Dead Code (Phase 4) ─────────────────────────────────
606
+
607
+ def truly_dead(self, entry_points: list[str] | None = None) -> list[str]:
608
+ """Find code unreachable from any entry point via graph reachability."""
609
+ if entry_points is None:
610
+ entry_points = [n.id for n in self.all_nodes() if n.symbol_type == SymbolType.MODULE]
611
+ reachable: set[str] = set()
612
+ for ep in entry_points:
613
+ if ep in self._g:
614
+ reachable |= nx.descendants(self._g, ep) | {ep}
615
+ return [n.id for n in self.all_nodes(include_proposed=False) if n.id not in reachable]
616
+
617
+ def functions_returning(self, type_hint: str) -> list[NodeData]:
618
+ """Find functions whose return annotation matches a string."""
619
+ results = []
620
+ for n in self._node_data.values():
621
+ if n.symbol_type in (SymbolType.FUNCTION, SymbolType.METHOD):
622
+ if n.signature and f"-> {type_hint}" in (n.metadata.get("return_annotation", "")):
623
+ results.append(n)
624
+ # Fallback: check signature metadata
625
+ if n.metadata.get("return_annotation") == type_hint:
626
+ results.append(n)
627
+ return results
628
+
629
+ def unreachable_from(self, entry_point: str) -> list[NodeData]:
630
+ """Find all nodes NOT reachable from a given entry point via calls."""
631
+ if entry_point not in self._g:
632
+ return []
633
+ reachable = set(nx.descendants(self._g, entry_point)) | {entry_point}
634
+ return [
635
+ n for n in self._node_data.values()
636
+ if n.id not in reachable
637
+ and n.symbol_type in (SymbolType.FUNCTION, SymbolType.METHOD)
638
+ ]
639
+
640
+ # ── Proposed / Hypothetical ──────────────────────────────────────
641
+
642
+ def propose_function(
643
+ self,
644
+ name: str,
645
+ module: str,
646
+ calls: list[str] | None = None,
647
+ called_by: list[str] | None = None,
648
+ signature: str | None = None,
649
+ ) -> NodeData:
650
+ """Add a hypothetical function to the graph."""
651
+ qname = f"{module}.{name}"
652
+ node = NodeData(
653
+ id=qname,
654
+ name=name,
655
+ qualified_name=qname,
656
+ symbol_type=SymbolType.FUNCTION,
657
+ is_proposed=True,
658
+ signature=signature or f"def {name}(...)",
659
+ )
660
+ self.add_node(node)
661
+
662
+ for callee in (calls or []):
663
+ edge = EdgeData(
664
+ source=qname, target=callee,
665
+ edge_type=EdgeType.CALLS, is_proposed=True,
666
+ )
667
+ self.add_edge(edge)
668
+
669
+ for caller in (called_by or []):
670
+ edge = EdgeData(
671
+ source=caller, target=qname,
672
+ edge_type=EdgeType.CALLS, is_proposed=True,
673
+ )
674
+ self.add_edge(edge)
675
+
676
+ return node
677
+
678
+ def clear_proposed(self) -> None:
679
+ """Remove all hypothetical nodes and edges."""
680
+ for nid in list(self._proposed_nodes.keys()):
681
+ self._g.remove_node(nid)
682
+ self._proposed_nodes.clear()
683
+ self._proposed_edges.clear()
684
+
685
+ # ── Snapshot for frontend ────────────────────────────────────────
686
+
687
+ def snapshot(self, view: ViewState | None = None) -> GraphSnapshot:
688
+ """Generate a filtered snapshot based on the current ViewState."""
689
+ if view is None:
690
+ view = ViewState()
691
+
692
+ nodes = self._filter_nodes(view)
693
+ node_ids = {n.id for n in nodes}
694
+ edges = self._filter_edges(view, node_ids)
695
+
696
+ # Remap highlights and trace roles to visible ancestors at the current zoom level
697
+ if view.highlighted_node_ids:
698
+ remapped: set[str] = set()
699
+ remapped_node_roles: dict[str, str] = {}
700
+ remapped_edge_roles: dict[str, str] = {}
701
+ # Priority for role merging when multiple children collapse into one ancestor
702
+ role_priority = {"origin": 0, "mutator": 1, "passthrough": 2, "destination": 3}
703
+
704
+ for hid in view.highlighted_node_ids:
705
+ if hid in node_ids:
706
+ remapped.add(hid)
707
+ if hid in view.trace_node_roles:
708
+ self._merge_role(remapped_node_roles, hid, view.trace_node_roles[hid], role_priority)
709
+ else:
710
+ ancestor = self._ancestor_at_zoom(hid, node_ids)
711
+ if ancestor:
712
+ remapped.add(ancestor)
713
+ if hid in view.trace_node_roles:
714
+ self._merge_role(remapped_node_roles, ancestor, view.trace_node_roles[hid], role_priority)
715
+
716
+ # Remap edge roles to ancestor edges
717
+ for ekey, role in view.trace_edge_roles.items():
718
+ src, tgt = ekey.split("|", 1)
719
+ new_src = src if src in node_ids else (self._ancestor_at_zoom(src, node_ids) or src)
720
+ new_tgt = tgt if tgt in node_ids else (self._ancestor_at_zoom(tgt, node_ids) or tgt)
721
+ new_key = f"{new_src}|{new_tgt}"
722
+ # Write takes priority over read
723
+ if new_key not in remapped_edge_roles or role == "write":
724
+ remapped_edge_roles[new_key] = role
725
+
726
+ view = view.model_copy(update={
727
+ "highlighted_node_ids": list(remapped),
728
+ "trace_node_roles": remapped_node_roles,
729
+ "trace_edge_roles": remapped_edge_roles,
730
+ })
731
+
732
+ return GraphSnapshot(nodes=nodes, edges=edges, view=view)
733
+
734
+ @staticmethod
735
+ def _merge_role(roles: dict[str, str], node_id: str, role: str, priority: dict[str, int]) -> None:
736
+ """Merge a role into the dict, keeping the highest-priority (lowest number) role."""
737
+ if node_id not in roles or priority.get(role, 99) < priority.get(roles[node_id], 99):
738
+ roles[node_id] = role
739
+
740
+ def _filter_nodes(self, view: ViewState) -> list[NodeData]:
741
+ all_nodes = self.all_nodes(include_proposed=view.show_proposed)
742
+
743
+ # Zoom level filter — each level shows ONLY its symbol types
744
+ type_filter: set[SymbolType] = set()
745
+ if view.zoom_level == "module":
746
+ type_filter = {SymbolType.MODULE}
747
+ elif view.zoom_level == "class":
748
+ type_filter = {SymbolType.CLASS}
749
+ elif view.zoom_level == "function":
750
+ type_filter = {SymbolType.FUNCTION, SymbolType.METHOD}
751
+ elif view.zoom_level == "variable":
752
+ type_filter = {SymbolType.VARIABLE, SymbolType.PARAMETER}
753
+ elif view.zoom_level == "all":
754
+ type_filter = set(SymbolType)
755
+ else:
756
+ type_filter = {SymbolType.FUNCTION, SymbolType.METHOD}
757
+
758
+ nodes = [n for n in all_nodes if n.symbol_type in type_filter]
759
+
760
+ if not view.show_dead:
761
+ nodes = [n for n in nodes if not n.is_dead]
762
+
763
+ # Focus node — only show subgraph around it
764
+ if view.focus_node:
765
+ sub_nodes, _ = self.subgraph_around(
766
+ view.focus_node, depth=view.focus_depth
767
+ )
768
+ sub_ids = {n.id for n in sub_nodes}
769
+ nodes = [n for n in nodes if n.id in sub_ids]
770
+
771
+ # Explicit visible list overrides
772
+ if view.visible_node_ids:
773
+ vis = set(view.visible_node_ids)
774
+ nodes = [n for n in nodes if n.id in vis]
775
+
776
+ return nodes
777
+
778
+ def _ancestor_at_zoom(self, node_id: str, visible_ids: set[str]) -> str | None:
779
+ """Walk up the containment hierarchy to find a visible ancestor."""
780
+ current = node_id
781
+ while current:
782
+ if current in visible_ids:
783
+ return current
784
+ # Go up one level: "a.b.c.d" -> "a.b.c"
785
+ if "." in current:
786
+ current = current.rsplit(".", 1)[0]
787
+ else:
788
+ break
789
+ return None
790
+
791
+ def _filter_edges(self, view: ViewState, node_ids: set[str]) -> list[EdgeData]:
792
+ all_edges = self.all_edges(include_proposed=view.show_proposed)
793
+
794
+ if not view.show_dead:
795
+ all_edges = [e for e in all_edges if not e.is_dead]
796
+
797
+ # At coarser zoom levels, aggregate edges up to visible ancestors
798
+ aggregated: dict[tuple[str, str, str], EdgeData] = {}
799
+ for e in all_edges:
800
+ if e.edge_type not in view.visible_edge_types:
801
+ continue
802
+ # Skip containment edges at module/class zoom — they're implicit
803
+ if e.edge_type == EdgeType.CONTAINS:
804
+ if e.source in node_ids and e.target in node_ids:
805
+ aggregated[(e.source, e.target, e.edge_type.value)] = e
806
+ continue
807
+
808
+ src = e.source if e.source in node_ids else self._ancestor_at_zoom(e.source, node_ids)
809
+ tgt = e.target if e.target in node_ids else self._ancestor_at_zoom(e.target, node_ids)
810
+
811
+ if src and tgt and src != tgt:
812
+ key = (src, tgt, e.edge_type.value)
813
+ if key not in aggregated:
814
+ aggregated[key] = EdgeData(
815
+ source=src,
816
+ target=tgt,
817
+ edge_type=e.edge_type,
818
+ is_dead=e.is_dead,
819
+ is_proposed=e.is_proposed,
820
+ )
821
+
822
+ return list(aggregated.values())