interlinked-mapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interlinked/__init__.py +3 -0
- interlinked/analyzer/__init__.py +7 -0
- interlinked/analyzer/dead_code.py +137 -0
- interlinked/analyzer/graph.py +822 -0
- interlinked/analyzer/parser.py +1141 -0
- interlinked/analyzer/similarity.py +486 -0
- interlinked/cli.py +136 -0
- interlinked/commander/__init__.py +6 -0
- interlinked/commander/llm.py +304 -0
- interlinked/commander/query.py +966 -0
- interlinked/commander/repl.py +50 -0
- interlinked/mcp_server.py +324 -0
- interlinked/models.py +107 -0
- interlinked/visualizer/__init__.py +1 -0
- interlinked/visualizer/layouts.py +181 -0
- interlinked/visualizer/server.py +428 -0
- interlinked_mapper-0.1.0.dist-info/METADATA +26 -0
- interlinked_mapper-0.1.0.dist-info/RECORD +21 -0
- interlinked_mapper-0.1.0.dist-info/WHEEL +5 -0
- interlinked_mapper-0.1.0.dist-info/entry_points.txt +2 -0
- interlinked_mapper-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,822 @@
|
|
|
1
|
+
"""CodeGraph — a NetworkX-backed directed multigraph of Python symbols."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Iterator
|
|
6
|
+
|
|
7
|
+
import networkx as nx
|
|
8
|
+
|
|
9
|
+
from interlinked.models import (
|
|
10
|
+
NodeData, EdgeData, EdgeType, SymbolType,
|
|
11
|
+
GraphSnapshot, ViewState, ColorScheme,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class CodeGraph:
|
|
16
|
+
"""The core graph structure representing an entire Python project.
|
|
17
|
+
|
|
18
|
+
Wraps a NetworkX MultiDiGraph with typed node/edge accessors
|
|
19
|
+
and query methods used by the commander layer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._g: nx.MultiDiGraph = nx.MultiDiGraph()
|
|
24
|
+
self._node_data: dict[str, NodeData] = {}
|
|
25
|
+
self._proposed_nodes: dict[str, NodeData] = {}
|
|
26
|
+
self._proposed_edges: list[EdgeData] = []
|
|
27
|
+
|
|
28
|
+
# ── Construction ─────────────────────────────────────────────────
|
|
29
|
+
|
|
30
|
+
def add_node(self, node: NodeData) -> None:
|
|
31
|
+
bucket = self._proposed_nodes if node.is_proposed else self._node_data
|
|
32
|
+
bucket[node.id] = node
|
|
33
|
+
self._g.add_node(node.id, **node.model_dump())
|
|
34
|
+
|
|
35
|
+
def add_edge(self, edge: EdgeData) -> None:
|
|
36
|
+
if edge.is_proposed:
|
|
37
|
+
self._proposed_edges.append(edge)
|
|
38
|
+
self._g.add_edge(
|
|
39
|
+
edge.source, edge.target,
|
|
40
|
+
key=edge.edge_type.value,
|
|
41
|
+
**edge.model_dump(),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def build_from(self, nodes: list[NodeData], edges: list[EdgeData]) -> None:
|
|
45
|
+
"""Populate from parser output, resolving short names to qualified IDs."""
|
|
46
|
+
for n in nodes:
|
|
47
|
+
self.add_node(n)
|
|
48
|
+
|
|
49
|
+
# Build a lookup: short name -> list of qualified IDs
|
|
50
|
+
name_index: dict[str, list[str]] = {}
|
|
51
|
+
for n in nodes:
|
|
52
|
+
name_index.setdefault(n.name, []).append(n.id)
|
|
53
|
+
# Also index by qualified_name suffix fragments
|
|
54
|
+
# e.g. "graph.CodeGraph" for "analyzer.graph.CodeGraph"
|
|
55
|
+
parts = n.qualified_name.split(".")
|
|
56
|
+
for i in range(1, len(parts)):
|
|
57
|
+
suffix = ".".join(parts[i:])
|
|
58
|
+
name_index.setdefault(suffix, []).append(n.id)
|
|
59
|
+
|
|
60
|
+
node_ids = {n.id for n in nodes}
|
|
61
|
+
|
|
62
|
+
for e in edges:
|
|
63
|
+
resolved = self._resolve_edge(e, node_ids, name_index)
|
|
64
|
+
self.add_edge(resolved)
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def _resolve_edge(
|
|
68
|
+
edge: EdgeData,
|
|
69
|
+
node_ids: set[str],
|
|
70
|
+
name_index: dict[str, list[str]],
|
|
71
|
+
) -> EdgeData:
|
|
72
|
+
"""Try to resolve unqualified source/target names to known node IDs."""
|
|
73
|
+
source = edge.source
|
|
74
|
+
target = edge.target
|
|
75
|
+
|
|
76
|
+
if source not in node_ids:
|
|
77
|
+
candidates = name_index.get(source, [])
|
|
78
|
+
if len(candidates) == 1:
|
|
79
|
+
source = candidates[0]
|
|
80
|
+
|
|
81
|
+
if target not in node_ids:
|
|
82
|
+
candidates = name_index.get(target, [])
|
|
83
|
+
if len(candidates) == 1:
|
|
84
|
+
target = candidates[0]
|
|
85
|
+
elif len(candidates) > 1:
|
|
86
|
+
# Prefer a candidate in the same module as the source
|
|
87
|
+
src_module = source.rsplit(".", 1)[0] if "." in source else source
|
|
88
|
+
for c in candidates:
|
|
89
|
+
if c.startswith(src_module):
|
|
90
|
+
target = c
|
|
91
|
+
break
|
|
92
|
+
else:
|
|
93
|
+
target = candidates[0]
|
|
94
|
+
|
|
95
|
+
if source == edge.source and target == edge.target:
|
|
96
|
+
return edge
|
|
97
|
+
|
|
98
|
+
return EdgeData(
|
|
99
|
+
source=source,
|
|
100
|
+
target=target,
|
|
101
|
+
edge_type=edge.edge_type,
|
|
102
|
+
is_dead=edge.is_dead,
|
|
103
|
+
is_proposed=edge.is_proposed,
|
|
104
|
+
line=edge.line,
|
|
105
|
+
metadata=edge.metadata,
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# ── Node access ──────────────────────────────────────────────────
|
|
109
|
+
|
|
110
|
+
def get_node(self, node_id: str) -> NodeData | None:
|
|
111
|
+
return self._node_data.get(node_id) or self._proposed_nodes.get(node_id)
|
|
112
|
+
|
|
113
|
+
def all_nodes(self, include_proposed: bool = True) -> list[NodeData]:
|
|
114
|
+
nodes = list(self._node_data.values())
|
|
115
|
+
if include_proposed:
|
|
116
|
+
nodes.extend(self._proposed_nodes.values())
|
|
117
|
+
return nodes
|
|
118
|
+
|
|
119
|
+
def nodes_by_type(self, sym_type: SymbolType) -> list[NodeData]:
|
|
120
|
+
return [n for n in self._node_data.values() if n.symbol_type == sym_type]
|
|
121
|
+
|
|
122
|
+
@property
|
|
123
|
+
def node_count(self) -> int:
|
|
124
|
+
return len(self._node_data) + len(self._proposed_nodes)
|
|
125
|
+
|
|
126
|
+
@property
|
|
127
|
+
def edge_count(self) -> int:
|
|
128
|
+
return self._g.number_of_edges()
|
|
129
|
+
|
|
130
|
+
# ── Edge access ──────────────────────────────────────────────────
|
|
131
|
+
|
|
132
|
+
def all_edges(self, include_proposed: bool = True) -> list[EdgeData]:
|
|
133
|
+
edges: list[EdgeData] = []
|
|
134
|
+
seen = set()
|
|
135
|
+
for u, v, data in self._g.edges(data=True):
|
|
136
|
+
key = (u, v, data.get("edge_type"))
|
|
137
|
+
if key not in seen:
|
|
138
|
+
seen.add(key)
|
|
139
|
+
ed = EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data})
|
|
140
|
+
if not include_proposed and ed.is_proposed:
|
|
141
|
+
continue
|
|
142
|
+
edges.append(ed)
|
|
143
|
+
return edges
|
|
144
|
+
|
|
145
|
+
def edges_from(self, node_id: str, edge_type: EdgeType | None = None) -> list[EdgeData]:
|
|
146
|
+
if node_id not in self._g:
|
|
147
|
+
return []
|
|
148
|
+
result = []
|
|
149
|
+
for _, v, data in self._g.out_edges(node_id, data=True):
|
|
150
|
+
if edge_type and data.get("edge_type") != edge_type.value:
|
|
151
|
+
continue
|
|
152
|
+
result.append(EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data}))
|
|
153
|
+
return result
|
|
154
|
+
|
|
155
|
+
def edges_to(self, node_id: str, edge_type: EdgeType | None = None) -> list[EdgeData]:
|
|
156
|
+
if node_id not in self._g:
|
|
157
|
+
return []
|
|
158
|
+
result = []
|
|
159
|
+
for u, _, data in self._g.in_edges(node_id, data=True):
|
|
160
|
+
if edge_type and data.get("edge_type") != edge_type.value:
|
|
161
|
+
continue
|
|
162
|
+
result.append(EdgeData(**{k: data[k] for k in EdgeData.model_fields if k in data}))
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
# ── Queries ──────────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
def callers_of(self, node_id: str) -> list[NodeData]:
|
|
168
|
+
"""Who calls this function?"""
|
|
169
|
+
return [
|
|
170
|
+
self._node_data[e.source]
|
|
171
|
+
for e in self.edges_to(node_id, EdgeType.CALLS)
|
|
172
|
+
if e.source in self._node_data
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
def callees_of(self, node_id: str) -> list[NodeData]:
|
|
176
|
+
"""What does this function call?"""
|
|
177
|
+
return [
|
|
178
|
+
self._node_data[e.target]
|
|
179
|
+
for e in self.edges_from(node_id, EdgeType.CALLS)
|
|
180
|
+
if e.target in self._node_data
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
def subgraph_around(
|
|
184
|
+
self, node_id: str, depth: int = 2, edge_types: list[EdgeType] | None = None
|
|
185
|
+
) -> tuple[list[NodeData], list[EdgeData]]:
|
|
186
|
+
"""BFS expansion around a node up to `depth` hops."""
|
|
187
|
+
if node_id not in self._g:
|
|
188
|
+
return [], []
|
|
189
|
+
|
|
190
|
+
visited: set[str] = set()
|
|
191
|
+
frontier: set[str] = {node_id}
|
|
192
|
+
|
|
193
|
+
for _ in range(depth):
|
|
194
|
+
next_frontier: set[str] = set()
|
|
195
|
+
for nid in frontier:
|
|
196
|
+
if nid in visited:
|
|
197
|
+
continue
|
|
198
|
+
visited.add(nid)
|
|
199
|
+
for _, v, data in self._g.out_edges(nid, data=True):
|
|
200
|
+
if edge_types and data.get("edge_type") not in [et.value for et in edge_types]:
|
|
201
|
+
continue
|
|
202
|
+
next_frontier.add(v)
|
|
203
|
+
for u, _, data in self._g.in_edges(nid, data=True):
|
|
204
|
+
if edge_types and data.get("edge_type") not in [et.value for et in edge_types]:
|
|
205
|
+
continue
|
|
206
|
+
next_frontier.add(u)
|
|
207
|
+
frontier = next_frontier - visited
|
|
208
|
+
|
|
209
|
+
visited |= frontier
|
|
210
|
+
nodes = [self.get_node(nid) for nid in visited if self.get_node(nid)]
|
|
211
|
+
edges = [
|
|
212
|
+
e for e in self.all_edges()
|
|
213
|
+
if e.source in visited and e.target in visited
|
|
214
|
+
]
|
|
215
|
+
return nodes, edges # type: ignore[return-value]
|
|
216
|
+
|
|
217
|
+
def trace_variable(self, var_name: str, origin: str | None = None) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
|
|
218
|
+
"""Trace a variable's read/write path using real graph pathfinding.
|
|
219
|
+
|
|
220
|
+
Uses nx.ancestors/descendants to find the full data-flow picture,
|
|
221
|
+
then nx.all_simple_paths to find actual connecting paths between
|
|
222
|
+
writers and readers.
|
|
223
|
+
|
|
224
|
+
Returns (nodes, edges, node_roles, edge_roles) where edge_roles
|
|
225
|
+
are keyed by ``"src_id|tgt_id"`` using real node IDs.
|
|
226
|
+
"""
|
|
227
|
+
writers: set[str] = set()
|
|
228
|
+
readers: set[str] = set()
|
|
229
|
+
var_targets: set[str] = set()
|
|
230
|
+
|
|
231
|
+
# 1. Find all functions that read/write this variable
|
|
232
|
+
for e in self.all_edges():
|
|
233
|
+
if e.edge_type in (EdgeType.READS, EdgeType.WRITES):
|
|
234
|
+
target_name = e.target.split(".")[-1] if "." in e.target else e.target
|
|
235
|
+
if target_name == var_name:
|
|
236
|
+
if origin and not e.source.startswith(origin):
|
|
237
|
+
continue
|
|
238
|
+
var_targets.add(e.target)
|
|
239
|
+
if e.edge_type == EdgeType.WRITES:
|
|
240
|
+
writers.add(e.source)
|
|
241
|
+
else:
|
|
242
|
+
readers.add(e.source)
|
|
243
|
+
|
|
244
|
+
trace_func_ids = writers | readers
|
|
245
|
+
if not trace_func_ids:
|
|
246
|
+
return [], [], {}, {}
|
|
247
|
+
|
|
248
|
+
# 2. Use NetworkX to find paths between writers and readers
|
|
249
|
+
# This gives us the actual intermediate nodes in the call chain
|
|
250
|
+
path_nodes: set[str] = set(trace_func_ids)
|
|
251
|
+
path_nodes |= var_targets
|
|
252
|
+
|
|
253
|
+
# Use flow-only subgraph (no CONTAINS/INHERITS) for pathfinding — much faster
|
|
254
|
+
flow_graph = self._g.edge_subgraph(
|
|
255
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True)
|
|
256
|
+
if k not in ("contains", "inherits")]
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
for w in writers:
|
|
260
|
+
for r in readers:
|
|
261
|
+
if w == r:
|
|
262
|
+
continue
|
|
263
|
+
for src, tgt in [(w, r), (r, w)]:
|
|
264
|
+
if src in flow_graph and tgt in flow_graph:
|
|
265
|
+
try:
|
|
266
|
+
for path in nx.all_simple_paths(flow_graph, src, tgt, cutoff=5):
|
|
267
|
+
path_nodes.update(path)
|
|
268
|
+
except nx.NetworkXError:
|
|
269
|
+
pass
|
|
270
|
+
|
|
271
|
+
# Also add ancestors/descendants of each writer/reader within the trace
|
|
272
|
+
for nid in list(trace_func_ids):
|
|
273
|
+
if nid not in flow_graph:
|
|
274
|
+
continue
|
|
275
|
+
try:
|
|
276
|
+
anc = nx.ancestors(flow_graph, nid)
|
|
277
|
+
path_nodes |= (anc & trace_func_ids)
|
|
278
|
+
except nx.NetworkXError:
|
|
279
|
+
pass
|
|
280
|
+
try:
|
|
281
|
+
desc = nx.descendants(flow_graph, nid)
|
|
282
|
+
path_nodes |= (desc & trace_func_ids)
|
|
283
|
+
except nx.NetworkXError:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
# 3. Collect ALL edges between path participants
|
|
287
|
+
relevant_edges: list[EdgeData] = []
|
|
288
|
+
edge_roles: dict[str, str] = {}
|
|
289
|
+
|
|
290
|
+
for e in self.all_edges():
|
|
291
|
+
if e.edge_type == EdgeType.CONTAINS:
|
|
292
|
+
continue
|
|
293
|
+
src_in = e.source in path_nodes
|
|
294
|
+
tgt_in = e.target in path_nodes
|
|
295
|
+
tgt_is_var = e.target in var_targets
|
|
296
|
+
|
|
297
|
+
if e.edge_type in (EdgeType.READS, EdgeType.WRITES) and src_in and tgt_is_var:
|
|
298
|
+
relevant_edges.append(e)
|
|
299
|
+
key = f"{e.source}|{e.target}"
|
|
300
|
+
edge_roles[key] = "write" if e.edge_type == EdgeType.WRITES else "read"
|
|
301
|
+
elif src_in and tgt_in:
|
|
302
|
+
relevant_edges.append(e)
|
|
303
|
+
key = f"{e.source}|{e.target}"
|
|
304
|
+
if key not in edge_roles:
|
|
305
|
+
if e.edge_type == EdgeType.WRITES:
|
|
306
|
+
edge_roles[key] = "write"
|
|
307
|
+
elif e.edge_type == EdgeType.READS:
|
|
308
|
+
edge_roles[key] = "read"
|
|
309
|
+
else:
|
|
310
|
+
edge_roles[key] = "flow"
|
|
311
|
+
|
|
312
|
+
# 4. Classify node roles
|
|
313
|
+
node_roles: dict[str, str] = {}
|
|
314
|
+
for vid in var_targets:
|
|
315
|
+
node_roles[vid] = "origin"
|
|
316
|
+
|
|
317
|
+
writer_list = sorted(writers, key=lambda w: min(
|
|
318
|
+
(e.line or 9999 for e in relevant_edges
|
|
319
|
+
if e.source == w and e.edge_type == EdgeType.WRITES), default=9999
|
|
320
|
+
))
|
|
321
|
+
|
|
322
|
+
for nid in path_nodes - var_targets:
|
|
323
|
+
if nid in writers and nid in readers:
|
|
324
|
+
node_roles[nid] = "mutator"
|
|
325
|
+
elif nid in writers:
|
|
326
|
+
if writer_list and nid == writer_list[0]:
|
|
327
|
+
node_roles[nid] = "origin"
|
|
328
|
+
else:
|
|
329
|
+
node_roles[nid] = "mutator"
|
|
330
|
+
elif nid in readers:
|
|
331
|
+
node_roles[nid] = "destination"
|
|
332
|
+
else:
|
|
333
|
+
# Intermediate node on a path (not a direct reader/writer)
|
|
334
|
+
node_roles[nid] = "passthrough"
|
|
335
|
+
|
|
336
|
+
# Upgrade pure readers to passthrough if they connect to other trace nodes
|
|
337
|
+
for nid in readers - writers:
|
|
338
|
+
if nid not in self._g:
|
|
339
|
+
continue
|
|
340
|
+
has_outgoing = any(
|
|
341
|
+
e.target in path_nodes and e.target != nid
|
|
342
|
+
for e in self.edges_from(nid, EdgeType.CALLS)
|
|
343
|
+
)
|
|
344
|
+
if has_outgoing:
|
|
345
|
+
node_roles[nid] = "passthrough"
|
|
346
|
+
|
|
347
|
+
nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
|
|
348
|
+
return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
|
|
349
|
+
|
|
350
|
+
# ── Tracing & Pathfinding (Phase 1c) ─────────────────────────────
|
|
351
|
+
|
|
352
|
+
def trace_function(self, node_id: str) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
|
|
353
|
+
"""Trace a function's call chain — everything that calls it and everything it calls.
|
|
354
|
+
|
|
355
|
+
Uses nx.ancestors and nx.descendants on calls-only subgraph.
|
|
356
|
+
"""
|
|
357
|
+
if node_id not in self._g:
|
|
358
|
+
return [], [], {}, {}
|
|
359
|
+
|
|
360
|
+
# Build a calls-only view
|
|
361
|
+
calls_edges = {
|
|
362
|
+
(e.source, e.target) for e in self.all_edges()
|
|
363
|
+
if e.edge_type == EdgeType.CALLS
|
|
364
|
+
}
|
|
365
|
+
calls_graph = self._g.edge_subgraph(
|
|
366
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True) if k == "calls"]
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
path_nodes: set[str] = {node_id}
|
|
370
|
+
try:
|
|
371
|
+
path_nodes |= nx.ancestors(calls_graph, node_id)
|
|
372
|
+
except nx.NetworkXError:
|
|
373
|
+
pass
|
|
374
|
+
try:
|
|
375
|
+
path_nodes |= nx.descendants(calls_graph, node_id)
|
|
376
|
+
except nx.NetworkXError:
|
|
377
|
+
pass
|
|
378
|
+
|
|
379
|
+
# Collect edges between participants
|
|
380
|
+
relevant_edges: list[EdgeData] = []
|
|
381
|
+
edge_roles: dict[str, str] = {}
|
|
382
|
+
for e in self.all_edges():
|
|
383
|
+
if e.edge_type == EdgeType.CONTAINS:
|
|
384
|
+
continue
|
|
385
|
+
if e.source in path_nodes and e.target in path_nodes:
|
|
386
|
+
relevant_edges.append(e)
|
|
387
|
+
key = f"{e.source}|{e.target}"
|
|
388
|
+
if e.edge_type == EdgeType.CALLS:
|
|
389
|
+
edge_roles[key] = "flow"
|
|
390
|
+
elif e.edge_type == EdgeType.READS:
|
|
391
|
+
edge_roles[key] = "read"
|
|
392
|
+
elif e.edge_type == EdgeType.WRITES:
|
|
393
|
+
edge_roles[key] = "write"
|
|
394
|
+
else:
|
|
395
|
+
edge_roles.setdefault(key, "flow")
|
|
396
|
+
|
|
397
|
+
# Classify roles: the target is origin, callers are upstream, callees are downstream
|
|
398
|
+
node_roles: dict[str, str] = {}
|
|
399
|
+
try:
|
|
400
|
+
upstream = nx.ancestors(calls_graph, node_id)
|
|
401
|
+
except nx.NetworkXError:
|
|
402
|
+
upstream = set()
|
|
403
|
+
try:
|
|
404
|
+
downstream = nx.descendants(calls_graph, node_id)
|
|
405
|
+
except nx.NetworkXError:
|
|
406
|
+
downstream = set()
|
|
407
|
+
|
|
408
|
+
for nid in path_nodes:
|
|
409
|
+
if nid == node_id:
|
|
410
|
+
node_roles[nid] = "origin"
|
|
411
|
+
elif nid in upstream and nid in downstream:
|
|
412
|
+
node_roles[nid] = "mutator" # in a cycle with target
|
|
413
|
+
elif nid in upstream:
|
|
414
|
+
node_roles[nid] = "passthrough" # callers
|
|
415
|
+
elif nid in downstream:
|
|
416
|
+
node_roles[nid] = "destination" # callees
|
|
417
|
+
else:
|
|
418
|
+
node_roles[nid] = "passthrough"
|
|
419
|
+
|
|
420
|
+
nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
|
|
421
|
+
return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
|
|
422
|
+
|
|
423
|
+
def trace_call_chain(self, source: str, target: str, max_depth: int = 8) -> tuple[list[NodeData], list[EdgeData], dict[str, str], dict[str, str]]:
|
|
424
|
+
"""Find all call paths from source to target.
|
|
425
|
+
|
|
426
|
+
Uses nx.all_simple_paths on calls-only subgraph.
|
|
427
|
+
"""
|
|
428
|
+
if source not in self._g or target not in self._g:
|
|
429
|
+
return [], [], {}, {}
|
|
430
|
+
|
|
431
|
+
calls_graph = self._g.edge_subgraph(
|
|
432
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True) if k == "calls"]
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
path_nodes: set[str] = set()
|
|
436
|
+
try:
|
|
437
|
+
for path in nx.all_simple_paths(calls_graph, source, target, cutoff=max_depth):
|
|
438
|
+
path_nodes.update(path)
|
|
439
|
+
except nx.NetworkXError:
|
|
440
|
+
pass
|
|
441
|
+
|
|
442
|
+
if not path_nodes:
|
|
443
|
+
# Try reverse direction
|
|
444
|
+
try:
|
|
445
|
+
for path in nx.all_simple_paths(calls_graph, target, source, cutoff=max_depth):
|
|
446
|
+
path_nodes.update(path)
|
|
447
|
+
except nx.NetworkXError:
|
|
448
|
+
pass
|
|
449
|
+
|
|
450
|
+
if not path_nodes:
|
|
451
|
+
return [], [], {}, {}
|
|
452
|
+
|
|
453
|
+
relevant_edges: list[EdgeData] = []
|
|
454
|
+
edge_roles: dict[str, str] = {}
|
|
455
|
+
for e in self.all_edges():
|
|
456
|
+
if e.edge_type == EdgeType.CONTAINS:
|
|
457
|
+
continue
|
|
458
|
+
if e.source in path_nodes and e.target in path_nodes:
|
|
459
|
+
relevant_edges.append(e)
|
|
460
|
+
key = f"{e.source}|{e.target}"
|
|
461
|
+
edge_roles.setdefault(key, "flow")
|
|
462
|
+
|
|
463
|
+
node_roles: dict[str, str] = {}
|
|
464
|
+
for nid in path_nodes:
|
|
465
|
+
if nid == source:
|
|
466
|
+
node_roles[nid] = "origin"
|
|
467
|
+
elif nid == target:
|
|
468
|
+
node_roles[nid] = "destination"
|
|
469
|
+
else:
|
|
470
|
+
node_roles[nid] = "passthrough"
|
|
471
|
+
|
|
472
|
+
nodes = [self.get_node(nid) for nid in path_nodes if self.get_node(nid)]
|
|
473
|
+
return nodes, relevant_edges, node_roles, edge_roles # type: ignore[return-value]
|
|
474
|
+
|
|
475
|
+
# ── Impact & Dependency (Phase 2) ────────────────────────────────
|
|
476
|
+
|
|
477
|
+
def impact_of(self, node_id: str) -> set[str]:
|
|
478
|
+
"""Everything downstream — if I change this node, what's affected?
|
|
479
|
+
|
|
480
|
+
Uses edge-subgraph excluding CONTAINS so we only follow
|
|
481
|
+
real data/control flow (calls, reads, writes, returns).
|
|
482
|
+
"""
|
|
483
|
+
if node_id not in self._g:
|
|
484
|
+
return set()
|
|
485
|
+
flow_graph = self._g.edge_subgraph(
|
|
486
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True)
|
|
487
|
+
if k not in ("contains", "inherits")]
|
|
488
|
+
)
|
|
489
|
+
if node_id not in flow_graph:
|
|
490
|
+
return set()
|
|
491
|
+
return nx.descendants(flow_graph, node_id)
|
|
492
|
+
|
|
493
|
+
def feeds_into(self, node_id: str) -> set[str]:
|
|
494
|
+
"""Everything upstream — what does this node depend on?"""
|
|
495
|
+
if node_id not in self._g:
|
|
496
|
+
return set()
|
|
497
|
+
return nx.ancestors(self._g, node_id)
|
|
498
|
+
|
|
499
|
+
def path_between(self, source: str, target: str) -> list[str]:
|
|
500
|
+
"""Shortest dependency chain from source to target."""
|
|
501
|
+
try:
|
|
502
|
+
return nx.shortest_path(self._g, source, target)
|
|
503
|
+
except (nx.NetworkXNoPath, nx.NodeNotFound):
|
|
504
|
+
# Try reverse
|
|
505
|
+
try:
|
|
506
|
+
return nx.shortest_path(self._g, target, source)
|
|
507
|
+
except (nx.NetworkXNoPath, nx.NodeNotFound):
|
|
508
|
+
return []
|
|
509
|
+
|
|
510
|
+
def all_paths_between(self, source: str, target: str, max_depth: int = 8) -> list[list[str]]:
|
|
511
|
+
"""Every route from source to target."""
|
|
512
|
+
paths: list[list[str]] = []
|
|
513
|
+
try:
|
|
514
|
+
paths.extend(nx.all_simple_paths(self._g, source, target, cutoff=max_depth))
|
|
515
|
+
except (nx.NetworkXError, nx.NodeNotFound):
|
|
516
|
+
pass
|
|
517
|
+
if not paths:
|
|
518
|
+
try:
|
|
519
|
+
paths.extend(nx.all_simple_paths(self._g, target, source, cutoff=max_depth))
|
|
520
|
+
except (nx.NetworkXError, nx.NodeNotFound):
|
|
521
|
+
pass
|
|
522
|
+
return paths
|
|
523
|
+
|
|
524
|
+
def are_connected(self, a: str, b: str) -> bool:
|
|
525
|
+
"""Can data/control flow from a to b (or b to a)?"""
|
|
526
|
+
try:
|
|
527
|
+
return nx.has_path(self._g, a, b) or nx.has_path(self._g, b, a)
|
|
528
|
+
except nx.NodeNotFound:
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
# ── Architecture Health (Phase 3) ────────────────────────────────
|
|
532
|
+
|
|
533
|
+
def find_cycles(self) -> list[list[str]]:
|
|
534
|
+
"""Find circular dependencies (via calls and imports edges only)."""
|
|
535
|
+
calls_graph = self._g.edge_subgraph(
|
|
536
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True)
|
|
537
|
+
if k in ("calls", "imports")]
|
|
538
|
+
)
|
|
539
|
+
try:
|
|
540
|
+
return list(nx.simple_cycles(calls_graph))
|
|
541
|
+
except nx.NetworkXError:
|
|
542
|
+
return []
|
|
543
|
+
|
|
544
|
+
def has_circular_deps(self) -> bool:
|
|
545
|
+
"""Quick check: are there any circular dependencies?"""
|
|
546
|
+
calls_graph = self._g.edge_subgraph(
|
|
547
|
+
[(u, v, k) for u, v, k in self._g.edges(keys=True)
|
|
548
|
+
if k in ("calls", "imports")]
|
|
549
|
+
)
|
|
550
|
+
return not nx.is_directed_acyclic_graph(calls_graph)
|
|
551
|
+
|
|
552
|
+
def critical_nodes(self, top_n: int = 20) -> list[tuple[str, float]]:
|
|
553
|
+
"""Most important nodes by PageRank."""
|
|
554
|
+
try:
|
|
555
|
+
scores = nx.pagerank(self._g)
|
|
556
|
+
return sorted(scores.items(), key=lambda x: -x[1])[:top_n]
|
|
557
|
+
except (nx.NetworkXError, ImportError, ModuleNotFoundError):
|
|
558
|
+
return []
|
|
559
|
+
|
|
560
|
+
def bottlenecks(self, top_n: int = 20) -> list[tuple[str, float]]:
|
|
561
|
+
"""Nodes everything flows through — high betweenness centrality."""
|
|
562
|
+
try:
|
|
563
|
+
scores = nx.betweenness_centrality(self._g)
|
|
564
|
+
return sorted(scores.items(), key=lambda x: -x[1])[:top_n]
|
|
565
|
+
except (nx.NetworkXError, ImportError, ModuleNotFoundError):
|
|
566
|
+
return []
|
|
567
|
+
|
|
568
|
+
def most_coupled(self, top_n: int = 20) -> list[tuple[str, int]]:
|
|
569
|
+
"""Nodes with highest fan-in + fan-out."""
|
|
570
|
+
return sorted(
|
|
571
|
+
((n, self._g.in_degree(n) + self._g.out_degree(n)) for n in self._g.nodes()),
|
|
572
|
+
key=lambda x: -x[1],
|
|
573
|
+
)[:top_n]
|
|
574
|
+
|
|
575
|
+
def find_clusters(self) -> list[set[str]]:
|
|
576
|
+
"""Isolated groups of code — weakly connected components."""
|
|
577
|
+
return [c for c in nx.weakly_connected_components(self._g)]
|
|
578
|
+
|
|
579
|
+
def circular_clusters(self) -> list[set[str]]:
|
|
580
|
+
"""Groups of mutually dependent code — strongly connected components > 1."""
|
|
581
|
+
return [c for c in nx.strongly_connected_components(self._g) if len(c) > 1]
|
|
582
|
+
|
|
583
|
+
def dependency_layers(self) -> list[list[str]]:
|
|
584
|
+
"""Topological ordering — natural dependency layers."""
|
|
585
|
+
try:
|
|
586
|
+
condensed = nx.condensation(self._g)
|
|
587
|
+
return [list(gen) for gen in nx.topological_generations(condensed)]
|
|
588
|
+
except nx.NetworkXError:
|
|
589
|
+
return []
|
|
590
|
+
|
|
591
|
+
def coupling_between(self, module_a: str, module_b: str) -> dict[str, Any]:
|
|
592
|
+
"""How tightly coupled are two modules? Cross-module edges."""
|
|
593
|
+
a_nodes = {n for n in self._g.nodes() if n.startswith(module_a)}
|
|
594
|
+
b_nodes = {n for n in self._g.nodes() if n.startswith(module_b)}
|
|
595
|
+
cross: list[dict[str, str]] = []
|
|
596
|
+
for e in self.all_edges():
|
|
597
|
+
if e.edge_type == EdgeType.CONTAINS:
|
|
598
|
+
continue
|
|
599
|
+
a_to_b = e.source in a_nodes and e.target in b_nodes
|
|
600
|
+
b_to_a = e.source in b_nodes and e.target in a_nodes
|
|
601
|
+
if a_to_b or b_to_a:
|
|
602
|
+
cross.append({"source": e.source, "target": e.target, "type": e.edge_type.value})
|
|
603
|
+
return {"edge_count": len(cross), "edges": cross}
|
|
604
|
+
|
|
605
|
+
# ── Enhanced Dead Code (Phase 4) ─────────────────────────────────
|
|
606
|
+
|
|
607
|
+
def truly_dead(self, entry_points: list[str] | None = None) -> list[str]:
|
|
608
|
+
"""Find code unreachable from any entry point via graph reachability."""
|
|
609
|
+
if entry_points is None:
|
|
610
|
+
entry_points = [n.id for n in self.all_nodes() if n.symbol_type == SymbolType.MODULE]
|
|
611
|
+
reachable: set[str] = set()
|
|
612
|
+
for ep in entry_points:
|
|
613
|
+
if ep in self._g:
|
|
614
|
+
reachable |= nx.descendants(self._g, ep) | {ep}
|
|
615
|
+
return [n.id for n in self.all_nodes(include_proposed=False) if n.id not in reachable]
|
|
616
|
+
|
|
617
|
+
def functions_returning(self, type_hint: str) -> list[NodeData]:
|
|
618
|
+
"""Find functions whose return annotation matches a string."""
|
|
619
|
+
results = []
|
|
620
|
+
for n in self._node_data.values():
|
|
621
|
+
if n.symbol_type in (SymbolType.FUNCTION, SymbolType.METHOD):
|
|
622
|
+
if n.signature and f"-> {type_hint}" in (n.metadata.get("return_annotation", "")):
|
|
623
|
+
results.append(n)
|
|
624
|
+
# Fallback: check signature metadata
|
|
625
|
+
if n.metadata.get("return_annotation") == type_hint:
|
|
626
|
+
results.append(n)
|
|
627
|
+
return results
|
|
628
|
+
|
|
629
|
+
def unreachable_from(self, entry_point: str) -> list[NodeData]:
|
|
630
|
+
"""Find all nodes NOT reachable from a given entry point via calls."""
|
|
631
|
+
if entry_point not in self._g:
|
|
632
|
+
return []
|
|
633
|
+
reachable = set(nx.descendants(self._g, entry_point)) | {entry_point}
|
|
634
|
+
return [
|
|
635
|
+
n for n in self._node_data.values()
|
|
636
|
+
if n.id not in reachable
|
|
637
|
+
and n.symbol_type in (SymbolType.FUNCTION, SymbolType.METHOD)
|
|
638
|
+
]
|
|
639
|
+
|
|
640
|
+
# ── Proposed / Hypothetical ──────────────────────────────────────
|
|
641
|
+
|
|
642
|
+
def propose_function(
|
|
643
|
+
self,
|
|
644
|
+
name: str,
|
|
645
|
+
module: str,
|
|
646
|
+
calls: list[str] | None = None,
|
|
647
|
+
called_by: list[str] | None = None,
|
|
648
|
+
signature: str | None = None,
|
|
649
|
+
) -> NodeData:
|
|
650
|
+
"""Add a hypothetical function to the graph."""
|
|
651
|
+
qname = f"{module}.{name}"
|
|
652
|
+
node = NodeData(
|
|
653
|
+
id=qname,
|
|
654
|
+
name=name,
|
|
655
|
+
qualified_name=qname,
|
|
656
|
+
symbol_type=SymbolType.FUNCTION,
|
|
657
|
+
is_proposed=True,
|
|
658
|
+
signature=signature or f"def {name}(...)",
|
|
659
|
+
)
|
|
660
|
+
self.add_node(node)
|
|
661
|
+
|
|
662
|
+
for callee in (calls or []):
|
|
663
|
+
edge = EdgeData(
|
|
664
|
+
source=qname, target=callee,
|
|
665
|
+
edge_type=EdgeType.CALLS, is_proposed=True,
|
|
666
|
+
)
|
|
667
|
+
self.add_edge(edge)
|
|
668
|
+
|
|
669
|
+
for caller in (called_by or []):
|
|
670
|
+
edge = EdgeData(
|
|
671
|
+
source=caller, target=qname,
|
|
672
|
+
edge_type=EdgeType.CALLS, is_proposed=True,
|
|
673
|
+
)
|
|
674
|
+
self.add_edge(edge)
|
|
675
|
+
|
|
676
|
+
return node
|
|
677
|
+
|
|
678
|
+
def clear_proposed(self) -> None:
|
|
679
|
+
"""Remove all hypothetical nodes and edges."""
|
|
680
|
+
for nid in list(self._proposed_nodes.keys()):
|
|
681
|
+
self._g.remove_node(nid)
|
|
682
|
+
self._proposed_nodes.clear()
|
|
683
|
+
self._proposed_edges.clear()
|
|
684
|
+
|
|
685
|
+
# ── Snapshot for frontend ────────────────────────────────────────
|
|
686
|
+
|
|
687
|
+
def snapshot(self, view: ViewState | None = None) -> GraphSnapshot:
|
|
688
|
+
"""Generate a filtered snapshot based on the current ViewState."""
|
|
689
|
+
if view is None:
|
|
690
|
+
view = ViewState()
|
|
691
|
+
|
|
692
|
+
nodes = self._filter_nodes(view)
|
|
693
|
+
node_ids = {n.id for n in nodes}
|
|
694
|
+
edges = self._filter_edges(view, node_ids)
|
|
695
|
+
|
|
696
|
+
# Remap highlights and trace roles to visible ancestors at the current zoom level
|
|
697
|
+
if view.highlighted_node_ids:
|
|
698
|
+
remapped: set[str] = set()
|
|
699
|
+
remapped_node_roles: dict[str, str] = {}
|
|
700
|
+
remapped_edge_roles: dict[str, str] = {}
|
|
701
|
+
# Priority for role merging when multiple children collapse into one ancestor
|
|
702
|
+
role_priority = {"origin": 0, "mutator": 1, "passthrough": 2, "destination": 3}
|
|
703
|
+
|
|
704
|
+
for hid in view.highlighted_node_ids:
|
|
705
|
+
if hid in node_ids:
|
|
706
|
+
remapped.add(hid)
|
|
707
|
+
if hid in view.trace_node_roles:
|
|
708
|
+
self._merge_role(remapped_node_roles, hid, view.trace_node_roles[hid], role_priority)
|
|
709
|
+
else:
|
|
710
|
+
ancestor = self._ancestor_at_zoom(hid, node_ids)
|
|
711
|
+
if ancestor:
|
|
712
|
+
remapped.add(ancestor)
|
|
713
|
+
if hid in view.trace_node_roles:
|
|
714
|
+
self._merge_role(remapped_node_roles, ancestor, view.trace_node_roles[hid], role_priority)
|
|
715
|
+
|
|
716
|
+
# Remap edge roles to ancestor edges
|
|
717
|
+
for ekey, role in view.trace_edge_roles.items():
|
|
718
|
+
src, tgt = ekey.split("|", 1)
|
|
719
|
+
new_src = src if src in node_ids else (self._ancestor_at_zoom(src, node_ids) or src)
|
|
720
|
+
new_tgt = tgt if tgt in node_ids else (self._ancestor_at_zoom(tgt, node_ids) or tgt)
|
|
721
|
+
new_key = f"{new_src}|{new_tgt}"
|
|
722
|
+
# Write takes priority over read
|
|
723
|
+
if new_key not in remapped_edge_roles or role == "write":
|
|
724
|
+
remapped_edge_roles[new_key] = role
|
|
725
|
+
|
|
726
|
+
view = view.model_copy(update={
|
|
727
|
+
"highlighted_node_ids": list(remapped),
|
|
728
|
+
"trace_node_roles": remapped_node_roles,
|
|
729
|
+
"trace_edge_roles": remapped_edge_roles,
|
|
730
|
+
})
|
|
731
|
+
|
|
732
|
+
return GraphSnapshot(nodes=nodes, edges=edges, view=view)
|
|
733
|
+
|
|
734
|
+
@staticmethod
|
|
735
|
+
def _merge_role(roles: dict[str, str], node_id: str, role: str, priority: dict[str, int]) -> None:
|
|
736
|
+
"""Merge a role into the dict, keeping the highest-priority (lowest number) role."""
|
|
737
|
+
if node_id not in roles or priority.get(role, 99) < priority.get(roles[node_id], 99):
|
|
738
|
+
roles[node_id] = role
|
|
739
|
+
|
|
740
|
+
def _filter_nodes(self, view: ViewState) -> list[NodeData]:
|
|
741
|
+
all_nodes = self.all_nodes(include_proposed=view.show_proposed)
|
|
742
|
+
|
|
743
|
+
# Zoom level filter — each level shows ONLY its symbol types
|
|
744
|
+
type_filter: set[SymbolType] = set()
|
|
745
|
+
if view.zoom_level == "module":
|
|
746
|
+
type_filter = {SymbolType.MODULE}
|
|
747
|
+
elif view.zoom_level == "class":
|
|
748
|
+
type_filter = {SymbolType.CLASS}
|
|
749
|
+
elif view.zoom_level == "function":
|
|
750
|
+
type_filter = {SymbolType.FUNCTION, SymbolType.METHOD}
|
|
751
|
+
elif view.zoom_level == "variable":
|
|
752
|
+
type_filter = {SymbolType.VARIABLE, SymbolType.PARAMETER}
|
|
753
|
+
elif view.zoom_level == "all":
|
|
754
|
+
type_filter = set(SymbolType)
|
|
755
|
+
else:
|
|
756
|
+
type_filter = {SymbolType.FUNCTION, SymbolType.METHOD}
|
|
757
|
+
|
|
758
|
+
nodes = [n for n in all_nodes if n.symbol_type in type_filter]
|
|
759
|
+
|
|
760
|
+
if not view.show_dead:
|
|
761
|
+
nodes = [n for n in nodes if not n.is_dead]
|
|
762
|
+
|
|
763
|
+
# Focus node — only show subgraph around it
|
|
764
|
+
if view.focus_node:
|
|
765
|
+
sub_nodes, _ = self.subgraph_around(
|
|
766
|
+
view.focus_node, depth=view.focus_depth
|
|
767
|
+
)
|
|
768
|
+
sub_ids = {n.id for n in sub_nodes}
|
|
769
|
+
nodes = [n for n in nodes if n.id in sub_ids]
|
|
770
|
+
|
|
771
|
+
# Explicit visible list overrides
|
|
772
|
+
if view.visible_node_ids:
|
|
773
|
+
vis = set(view.visible_node_ids)
|
|
774
|
+
nodes = [n for n in nodes if n.id in vis]
|
|
775
|
+
|
|
776
|
+
return nodes
|
|
777
|
+
|
|
778
|
+
def _ancestor_at_zoom(self, node_id: str, visible_ids: set[str]) -> str | None:
|
|
779
|
+
"""Walk up the containment hierarchy to find a visible ancestor."""
|
|
780
|
+
current = node_id
|
|
781
|
+
while current:
|
|
782
|
+
if current in visible_ids:
|
|
783
|
+
return current
|
|
784
|
+
# Go up one level: "a.b.c.d" -> "a.b.c"
|
|
785
|
+
if "." in current:
|
|
786
|
+
current = current.rsplit(".", 1)[0]
|
|
787
|
+
else:
|
|
788
|
+
break
|
|
789
|
+
return None
|
|
790
|
+
|
|
791
|
+
def _filter_edges(self, view: ViewState, node_ids: set[str]) -> list[EdgeData]:
|
|
792
|
+
all_edges = self.all_edges(include_proposed=view.show_proposed)
|
|
793
|
+
|
|
794
|
+
if not view.show_dead:
|
|
795
|
+
all_edges = [e for e in all_edges if not e.is_dead]
|
|
796
|
+
|
|
797
|
+
# At coarser zoom levels, aggregate edges up to visible ancestors
|
|
798
|
+
aggregated: dict[tuple[str, str, str], EdgeData] = {}
|
|
799
|
+
for e in all_edges:
|
|
800
|
+
if e.edge_type not in view.visible_edge_types:
|
|
801
|
+
continue
|
|
802
|
+
# Skip containment edges at module/class zoom — they're implicit
|
|
803
|
+
if e.edge_type == EdgeType.CONTAINS:
|
|
804
|
+
if e.source in node_ids and e.target in node_ids:
|
|
805
|
+
aggregated[(e.source, e.target, e.edge_type.value)] = e
|
|
806
|
+
continue
|
|
807
|
+
|
|
808
|
+
src = e.source if e.source in node_ids else self._ancestor_at_zoom(e.source, node_ids)
|
|
809
|
+
tgt = e.target if e.target in node_ids else self._ancestor_at_zoom(e.target, node_ids)
|
|
810
|
+
|
|
811
|
+
if src and tgt and src != tgt:
|
|
812
|
+
key = (src, tgt, e.edge_type.value)
|
|
813
|
+
if key not in aggregated:
|
|
814
|
+
aggregated[key] = EdgeData(
|
|
815
|
+
source=src,
|
|
816
|
+
target=tgt,
|
|
817
|
+
edge_type=e.edge_type,
|
|
818
|
+
is_dead=e.is_dead,
|
|
819
|
+
is_proposed=e.is_proposed,
|
|
820
|
+
)
|
|
821
|
+
|
|
822
|
+
return list(aggregated.values())
|