interlinked-mapper 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- interlinked/__init__.py +3 -0
- interlinked/analyzer/__init__.py +7 -0
- interlinked/analyzer/dead_code.py +137 -0
- interlinked/analyzer/graph.py +822 -0
- interlinked/analyzer/parser.py +1141 -0
- interlinked/analyzer/similarity.py +486 -0
- interlinked/cli.py +136 -0
- interlinked/commander/__init__.py +6 -0
- interlinked/commander/llm.py +304 -0
- interlinked/commander/query.py +966 -0
- interlinked/commander/repl.py +50 -0
- interlinked/mcp_server.py +324 -0
- interlinked/models.py +107 -0
- interlinked/visualizer/__init__.py +1 -0
- interlinked/visualizer/layouts.py +181 -0
- interlinked/visualizer/server.py +428 -0
- interlinked_mapper-0.1.0.dist-info/METADATA +26 -0
- interlinked_mapper-0.1.0.dist-info/RECORD +21 -0
- interlinked_mapper-0.1.0.dist-info/WHEEL +5 -0
- interlinked_mapper-0.1.0.dist-info/entry_points.txt +2 -0
- interlinked_mapper-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1141 @@
|
|
|
1
|
+
"""AST-based parser that extracts symbols and relationships from Python source.
|
|
2
|
+
|
|
3
|
+
Architecture:
|
|
4
|
+
Pass 1 (_SymbolVisitor) -- Extracts all nodes and **raw** edges. Edge targets
|
|
5
|
+
are the literal names from the AST (e.g. "self.state.zoom_level", "n.id",
|
|
6
|
+
"graph"). No resolution happens here.
|
|
7
|
+
Pass 2 (_TypeInferencer) -- Collects type annotations from the AST, then
|
|
8
|
+
resolves every raw edge target using the full type map. Handles self/cls,
|
|
9
|
+
dotted attribute chains, typed loop variables, assignment propagation.
|
|
10
|
+
Pass 3 (Structural inference) -- For any unresolved dotted name like "n.id",
|
|
11
|
+
builds a reverse index (field_name -> classes), intersects all field
|
|
12
|
+
accesses for a variable, and infers its type from the unique match.
|
|
13
|
+
Pass 4 (Progressive truncation + drop) -- For any remaining unresolved edge,
|
|
14
|
+
progressively strips attrs from the right until a known node is hit.
|
|
15
|
+
If nothing resolves, the edge is external and is dropped.
|
|
16
|
+
Pass 5 (CodeGraph.build_from) -- Final short-name to qualified-name resolution
|
|
17
|
+
for bare names (cross-module calls).
|
|
18
|
+
|
|
19
|
+
No hardcoded external module or builtin method lists. Resolution is entirely
|
|
20
|
+
dynamic: if a target resolves to a project node, it's kept; otherwise it's
|
|
21
|
+
progressively truncated or dropped.
|
|
22
|
+
|
|
23
|
+
Extracts:
|
|
24
|
+
Nodes -- modules, classes, functions/methods, variables (module/class/instance
|
|
25
|
+
scope), parameters, local variables (function scope).
|
|
26
|
+
Edges -- contains, calls, imports, inherits, reads, writes, returns.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import ast
|
|
32
|
+
import builtins
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
from interlinked.models import NodeData, EdgeData, SymbolType, EdgeType
|
|
37
|
+
|
|
38
|
+
# Python builtins we should never create nodes/edges for
|
|
39
|
+
_BUILTINS: frozenset[str] = frozenset(dir(builtins)) | frozenset({
|
|
40
|
+
"None", "True", "False", "__name__", "__file__", "__doc__",
|
|
41
|
+
"__all__", "__spec__", "__loader__", "__package__", "__builtins__",
|
|
42
|
+
})
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_project(root: str | Path) -> tuple[list[NodeData], list[EdgeData]]:
|
|
47
|
+
"""Walk a Python project directory and extract all symbols and edges."""
|
|
48
|
+
root = Path(root).resolve()
|
|
49
|
+
nodes: list[NodeData] = []
|
|
50
|
+
edges: list[EdgeData] = []
|
|
51
|
+
|
|
52
|
+
py_files = sorted(root.rglob("*.py"))
|
|
53
|
+
|
|
54
|
+
# Pass 1: extract all symbols and raw (unresolved) edges
|
|
55
|
+
trees: list[tuple[ast.Module, str, str]] = []
|
|
56
|
+
for py_file in py_files:
|
|
57
|
+
try:
|
|
58
|
+
source = py_file.read_text(encoding="utf-8", errors="replace")
|
|
59
|
+
tree = ast.parse(source, filename=str(py_file))
|
|
60
|
+
except SyntaxError:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
rel_path = py_file.relative_to(root)
|
|
64
|
+
module_qname = _path_to_module(rel_path)
|
|
65
|
+
trees.append((tree, module_qname, str(py_file)))
|
|
66
|
+
|
|
67
|
+
file_nodes, file_edges = _extract_from_module(
|
|
68
|
+
tree, source, module_qname, str(py_file)
|
|
69
|
+
)
|
|
70
|
+
nodes.extend(file_nodes)
|
|
71
|
+
edges.extend(file_edges)
|
|
72
|
+
|
|
73
|
+
# Pass 2: type inference from annotations
|
|
74
|
+
node_ids = {n.id for n in nodes}
|
|
75
|
+
|
|
76
|
+
# Build class/type short name -> qualified name index
|
|
77
|
+
type_index: dict[str, str] = {}
|
|
78
|
+
for n in nodes:
|
|
79
|
+
if n.symbol_type in (SymbolType.CLASS, SymbolType.MODULE):
|
|
80
|
+
type_index[n.name] = n.id
|
|
81
|
+
parts = n.qualified_name.split(".")
|
|
82
|
+
for i in range(len(parts)):
|
|
83
|
+
suffix = ".".join(parts[i:])
|
|
84
|
+
type_index.setdefault(suffix, n.id)
|
|
85
|
+
|
|
86
|
+
inferencer = _TypeInferencer(type_index, node_ids)
|
|
87
|
+
for tree, module_qname, _fp in trees:
|
|
88
|
+
inferencer.collect_types(tree, module_qname)
|
|
89
|
+
|
|
90
|
+
# Pass 3: structural type inference — infer types from field access patterns
|
|
91
|
+
inferencer.infer_structural_types(edges)
|
|
92
|
+
|
|
93
|
+
# Build name_index for bare-name filtering (same index build_from uses)
|
|
94
|
+
name_index: dict[str, list[str]] = {}
|
|
95
|
+
for n in nodes:
|
|
96
|
+
name_index.setdefault(n.name, []).append(n.id)
|
|
97
|
+
parts = n.qualified_name.split(".")
|
|
98
|
+
for i in range(1, len(parts)):
|
|
99
|
+
suffix = ".".join(parts[i:])
|
|
100
|
+
name_index.setdefault(suffix, []).append(n.id)
|
|
101
|
+
|
|
102
|
+
# Pass 4: resolve all data-flow edges, progressive truncation, drop external
|
|
103
|
+
#
|
|
104
|
+
# Edge type handling:
|
|
105
|
+
# CALLS / IMPORTS — ALWAYS keep. Even unresolved external calls like
|
|
106
|
+
# nx.all_simple_paths() are critical for auditing what the code does.
|
|
107
|
+
# READS / WRITES — resolve or drop. External attribute accesses like
|
|
108
|
+
# node.lineno (ast module) are not useful; project data flow must resolve.
|
|
109
|
+
# RETURNS — resolve or drop (same as reads/writes).
|
|
110
|
+
# CONTAINS / INHERITS — pass through unchanged.
|
|
111
|
+
resolved_edges: list[EdgeData] = []
|
|
112
|
+
for e in edges:
|
|
113
|
+
# Structural edges — always keep
|
|
114
|
+
if e.edge_type not in (EdgeType.READS, EdgeType.WRITES, EdgeType.CALLS, EdgeType.RETURNS):
|
|
115
|
+
resolved_edges.append(e)
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
# CALLS — always keep (external calls are audit-critical), but filter builtins
|
|
119
|
+
if e.edge_type == EdgeType.CALLS:
|
|
120
|
+
raw_target = e.target
|
|
121
|
+
# Filter builtin function calls (len, str, isinstance, etc.)
|
|
122
|
+
callee_root = raw_target.split(".")[0]
|
|
123
|
+
if callee_root in _BUILTINS:
|
|
124
|
+
continue
|
|
125
|
+
resolved = inferencer.resolve(raw_target, e.source)
|
|
126
|
+
if resolved and resolved in node_ids:
|
|
127
|
+
resolved_edges.append(EdgeData(
|
|
128
|
+
source=e.source, target=resolved,
|
|
129
|
+
edge_type=e.edge_type, is_dead=e.is_dead,
|
|
130
|
+
is_proposed=e.is_proposed, line=e.line,
|
|
131
|
+
metadata=e.metadata,
|
|
132
|
+
))
|
|
133
|
+
else:
|
|
134
|
+
# Keep the raw call target — external library calls visible to auditors
|
|
135
|
+
resolved_edges.append(e)
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
# READS / WRITES / RETURNS — resolve, truncate, or drop
|
|
139
|
+
raw_target = e.target
|
|
140
|
+
resolved = inferencer.resolve(raw_target, e.source)
|
|
141
|
+
if resolved is None:
|
|
142
|
+
continue # filtered out (builtin like len, str, True)
|
|
143
|
+
|
|
144
|
+
# If resolved is a known node, keep
|
|
145
|
+
if resolved in node_ids:
|
|
146
|
+
if resolved != raw_target:
|
|
147
|
+
e = EdgeData(
|
|
148
|
+
source=e.source, target=resolved,
|
|
149
|
+
edge_type=e.edge_type, is_dead=e.is_dead,
|
|
150
|
+
is_proposed=e.is_proposed, line=e.line,
|
|
151
|
+
metadata=e.metadata,
|
|
152
|
+
)
|
|
153
|
+
resolved_edges.append(e)
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# Progressive truncation: strip from right until we hit a known node
|
|
157
|
+
if "." in resolved:
|
|
158
|
+
parts = resolved.split(".")
|
|
159
|
+
found = False
|
|
160
|
+
for i in range(len(parts), 0, -1):
|
|
161
|
+
candidate = ".".join(parts[:i])
|
|
162
|
+
if candidate in node_ids:
|
|
163
|
+
resolved_edges.append(EdgeData(
|
|
164
|
+
source=e.source, target=candidate,
|
|
165
|
+
edge_type=e.edge_type, is_dead=e.is_dead,
|
|
166
|
+
is_proposed=e.is_proposed, line=e.line,
|
|
167
|
+
metadata=e.metadata,
|
|
168
|
+
))
|
|
169
|
+
found = True
|
|
170
|
+
break
|
|
171
|
+
if found:
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
# Bare names — keep only if they match a project symbol short name
|
|
175
|
+
if "." not in resolved:
|
|
176
|
+
if resolved in name_index:
|
|
177
|
+
resolved_edges.append(EdgeData(
|
|
178
|
+
source=e.source, target=resolved,
|
|
179
|
+
edge_type=e.edge_type, is_dead=e.is_dead,
|
|
180
|
+
is_proposed=e.is_proposed, line=e.line,
|
|
181
|
+
metadata=e.metadata,
|
|
182
|
+
))
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
# Dotted read/write that didn't resolve — external, drop it
|
|
186
|
+
|
|
187
|
+
return nodes, resolved_edges
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _path_to_module(rel_path: Path) -> str:
|
|
191
|
+
"""Convert a relative file path to a dotted module name."""
|
|
192
|
+
parts = list(rel_path.parts)
|
|
193
|
+
if parts[-1] == "__init__.py":
|
|
194
|
+
parts = parts[:-1]
|
|
195
|
+
else:
|
|
196
|
+
parts[-1] = parts[-1].removesuffix(".py")
|
|
197
|
+
return ".".join(parts) if parts else "__root__"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _extract_from_module(
|
|
201
|
+
tree: ast.Module,
|
|
202
|
+
source: str,
|
|
203
|
+
module_qname: str,
|
|
204
|
+
file_path: str,
|
|
205
|
+
) -> tuple[list[NodeData], list[EdgeData]]:
|
|
206
|
+
"""Extract nodes and edges from a single parsed module."""
|
|
207
|
+
nodes: list[NodeData] = []
|
|
208
|
+
edges: list[EdgeData] = []
|
|
209
|
+
|
|
210
|
+
mod_docstring = ast.get_docstring(tree)
|
|
211
|
+
nodes.append(NodeData(
|
|
212
|
+
id=module_qname,
|
|
213
|
+
name=module_qname.split(".")[-1],
|
|
214
|
+
qualified_name=module_qname,
|
|
215
|
+
symbol_type=SymbolType.MODULE,
|
|
216
|
+
file_path=file_path,
|
|
217
|
+
line_start=1,
|
|
218
|
+
line_end=len(source.splitlines()),
|
|
219
|
+
docstring=mod_docstring,
|
|
220
|
+
))
|
|
221
|
+
|
|
222
|
+
visitor = _SymbolVisitor(module_qname, file_path, nodes, edges)
|
|
223
|
+
visitor.visit(tree)
|
|
224
|
+
|
|
225
|
+
return nodes, edges
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
# ---------------------------------------------------------------------------
|
|
229
|
+
# Pass 1: AST visitor -- node creation + raw edge emission
|
|
230
|
+
# ---------------------------------------------------------------------------
|
|
231
|
+
|
|
232
|
+
class _SymbolVisitor(ast.NodeVisitor):
|
|
233
|
+
"""Walks the AST, creates graph nodes, and emits raw (unresolved) edges.
|
|
234
|
+
|
|
235
|
+
Edge targets are the literal AST names: "self.state", "n.id", "graph".
|
|
236
|
+
Resolution is deferred entirely to pass 2 (_TypeInferencer).
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
module_qname: str,
|
|
242
|
+
file_path: str,
|
|
243
|
+
nodes: list[NodeData],
|
|
244
|
+
edges: list[EdgeData],
|
|
245
|
+
):
|
|
246
|
+
self._module = module_qname
|
|
247
|
+
self._file = file_path
|
|
248
|
+
self._nodes = nodes
|
|
249
|
+
self._edges = edges
|
|
250
|
+
self._scope_stack: list[str] = [module_qname]
|
|
251
|
+
self._node_ids: set[str] = set()
|
|
252
|
+
|
|
253
|
+
@property
|
|
254
|
+
def _current_scope(self) -> str:
|
|
255
|
+
return self._scope_stack[-1]
|
|
256
|
+
|
|
257
|
+
def _add_node(self, node: NodeData) -> None:
|
|
258
|
+
if node.id not in self._node_ids:
|
|
259
|
+
self._nodes.append(node)
|
|
260
|
+
self._node_ids.add(node.id)
|
|
261
|
+
|
|
262
|
+
def _is_inside_class(self) -> bool:
|
|
263
|
+
return any(
|
|
264
|
+
n.symbol_type == SymbolType.CLASS
|
|
265
|
+
for n in self._nodes
|
|
266
|
+
if n.id == self._current_scope
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def _class_scope(self) -> str | None:
|
|
270
|
+
for scope in reversed(self._scope_stack):
|
|
271
|
+
if any(n.id == scope and n.symbol_type == SymbolType.CLASS for n in self._nodes):
|
|
272
|
+
return scope
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
# -- Classes -----------------------------------------------------------
|
|
276
|
+
|
|
277
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
278
|
+
qname = f"{self._current_scope}.{node.name}"
|
|
279
|
+
self._add_node(NodeData(
|
|
280
|
+
id=qname, name=node.name, qualified_name=qname,
|
|
281
|
+
symbol_type=SymbolType.CLASS, file_path=self._file,
|
|
282
|
+
line_start=node.lineno, line_end=node.end_lineno,
|
|
283
|
+
docstring=ast.get_docstring(node),
|
|
284
|
+
))
|
|
285
|
+
self._edges.append(EdgeData(
|
|
286
|
+
source=self._current_scope, target=qname,
|
|
287
|
+
edge_type=EdgeType.CONTAINS, line=node.lineno,
|
|
288
|
+
))
|
|
289
|
+
for base in node.bases:
|
|
290
|
+
base_name = _name_from_node(base)
|
|
291
|
+
if base_name:
|
|
292
|
+
self._edges.append(EdgeData(
|
|
293
|
+
source=qname, target=base_name,
|
|
294
|
+
edge_type=EdgeType.INHERITS, line=node.lineno,
|
|
295
|
+
))
|
|
296
|
+
self._scope_stack.append(qname)
|
|
297
|
+
self.generic_visit(node)
|
|
298
|
+
self._scope_stack.pop()
|
|
299
|
+
|
|
300
|
+
# -- Functions / Methods -----------------------------------------------
|
|
301
|
+
|
|
302
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
303
|
+
self._handle_funcdef(node)
|
|
304
|
+
|
|
305
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
306
|
+
self._handle_funcdef(node)
|
|
307
|
+
|
|
308
|
+
def _handle_funcdef(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
|
309
|
+
qname = f"{self._current_scope}.{node.name}"
|
|
310
|
+
is_method = self._is_inside_class()
|
|
311
|
+
sym_type = SymbolType.METHOD if is_method else SymbolType.FUNCTION
|
|
312
|
+
sig = _signature_from_funcdef(node)
|
|
313
|
+
|
|
314
|
+
self._add_node(NodeData(
|
|
315
|
+
id=qname, name=node.name, qualified_name=qname,
|
|
316
|
+
symbol_type=sym_type, file_path=self._file,
|
|
317
|
+
line_start=node.lineno, line_end=node.end_lineno,
|
|
318
|
+
docstring=ast.get_docstring(node), signature=sig,
|
|
319
|
+
))
|
|
320
|
+
self._edges.append(EdgeData(
|
|
321
|
+
source=self._current_scope, target=qname,
|
|
322
|
+
edge_type=EdgeType.CONTAINS, line=node.lineno,
|
|
323
|
+
))
|
|
324
|
+
|
|
325
|
+
self._extract_parameters(node, qname)
|
|
326
|
+
|
|
327
|
+
if node.name == "__init__" and is_method:
|
|
328
|
+
self._extract_instance_attrs(node)
|
|
329
|
+
|
|
330
|
+
self._scope_stack.append(qname)
|
|
331
|
+
self._extract_calls(node, qname)
|
|
332
|
+
self._extract_variable_access(node, qname)
|
|
333
|
+
self._extract_returns(node, qname)
|
|
334
|
+
self.generic_visit(node)
|
|
335
|
+
self._scope_stack.pop()
|
|
336
|
+
|
|
337
|
+
# -- Imports -----------------------------------------------------------
|
|
338
|
+
|
|
339
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
340
|
+
for alias in node.names:
|
|
341
|
+
self._edges.append(EdgeData(
|
|
342
|
+
source=self._module, target=alias.name,
|
|
343
|
+
edge_type=EdgeType.IMPORTS, line=node.lineno,
|
|
344
|
+
))
|
|
345
|
+
|
|
346
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
347
|
+
base = node.module or ""
|
|
348
|
+
for alias in node.names:
|
|
349
|
+
target = f"{base}.{alias.name}" if base else alias.name
|
|
350
|
+
self._edges.append(EdgeData(
|
|
351
|
+
source=self._module, target=target,
|
|
352
|
+
edge_type=EdgeType.IMPORTS, line=node.lineno,
|
|
353
|
+
))
|
|
354
|
+
|
|
355
|
+
# -- Assignments at module / class scope --------------------------------
|
|
356
|
+
|
|
357
|
+
def visit_Assign(self, node: ast.Assign) -> None:
|
|
358
|
+
if self._current_scope == self._module or self._is_inside_class():
|
|
359
|
+
for target in node.targets:
|
|
360
|
+
for name in _assigned_names(target):
|
|
361
|
+
qname = f"{self._current_scope}.{name}"
|
|
362
|
+
self._add_node(NodeData(
|
|
363
|
+
id=qname, name=name, qualified_name=qname,
|
|
364
|
+
symbol_type=SymbolType.VARIABLE, file_path=self._file,
|
|
365
|
+
line_start=node.lineno, line_end=node.end_lineno,
|
|
366
|
+
))
|
|
367
|
+
self._edges.append(EdgeData(
|
|
368
|
+
source=self._current_scope, target=qname,
|
|
369
|
+
edge_type=EdgeType.CONTAINS, line=node.lineno,
|
|
370
|
+
))
|
|
371
|
+
self.generic_visit(node)
|
|
372
|
+
|
|
373
|
+
def visit_AnnAssign(self, node: ast.AnnAssign) -> None:
|
|
374
|
+
if self._current_scope == self._module or self._is_inside_class():
|
|
375
|
+
if node.target:
|
|
376
|
+
name = _name_from_node(node.target)
|
|
377
|
+
if name and "." not in name:
|
|
378
|
+
qname = f"{self._current_scope}.{name}"
|
|
379
|
+
self._add_node(NodeData(
|
|
380
|
+
id=qname, name=name, qualified_name=qname,
|
|
381
|
+
symbol_type=SymbolType.VARIABLE, file_path=self._file,
|
|
382
|
+
line_start=node.lineno, line_end=node.end_lineno,
|
|
383
|
+
))
|
|
384
|
+
self._edges.append(EdgeData(
|
|
385
|
+
source=self._current_scope, target=qname,
|
|
386
|
+
edge_type=EdgeType.CONTAINS, line=node.lineno,
|
|
387
|
+
))
|
|
388
|
+
self.generic_visit(node)
|
|
389
|
+
|
|
390
|
+
# -- Internal helpers --------------------------------------------------
|
|
391
|
+
|
|
392
|
+
def _extract_instance_attrs(self, init_node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
|
393
|
+
"""Create VARIABLE nodes for self.X = ... and self.X: T = ... in __init__."""
|
|
394
|
+
class_scope = self._class_scope()
|
|
395
|
+
if not class_scope:
|
|
396
|
+
return
|
|
397
|
+
for node in ast.walk(init_node):
|
|
398
|
+
targets: list[ast.AST] = []
|
|
399
|
+
if isinstance(node, ast.Assign):
|
|
400
|
+
targets = node.targets
|
|
401
|
+
elif isinstance(node, ast.AnnAssign) and node.target:
|
|
402
|
+
targets = [node.target]
|
|
403
|
+
for target in targets:
|
|
404
|
+
if isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name):
|
|
405
|
+
if target.value.id == "self":
|
|
406
|
+
attr_qname = f"{class_scope}.{target.attr}"
|
|
407
|
+
self._add_node(NodeData(
|
|
408
|
+
id=attr_qname, name=target.attr,
|
|
409
|
+
qualified_name=attr_qname,
|
|
410
|
+
symbol_type=SymbolType.VARIABLE,
|
|
411
|
+
file_path=self._file,
|
|
412
|
+
line_start=node.lineno,
|
|
413
|
+
line_end=getattr(node, "end_lineno", None),
|
|
414
|
+
))
|
|
415
|
+
self._edges.append(EdgeData(
|
|
416
|
+
source=class_scope, target=attr_qname,
|
|
417
|
+
edge_type=EdgeType.CONTAINS, line=node.lineno,
|
|
418
|
+
))
|
|
419
|
+
|
|
420
|
+
def _extract_parameters(self, func_node: ast.FunctionDef | ast.AsyncFunctionDef, func_qname: str) -> None:
|
|
421
|
+
args = func_node.args
|
|
422
|
+
all_args: list[ast.arg] = (
|
|
423
|
+
args.posonlyargs + args.args + args.kwonlyargs
|
|
424
|
+
)
|
|
425
|
+
if args.vararg:
|
|
426
|
+
all_args.append(args.vararg)
|
|
427
|
+
if args.kwarg:
|
|
428
|
+
all_args.append(args.kwarg)
|
|
429
|
+
for arg in all_args:
|
|
430
|
+
if arg.arg in ("self", "cls"):
|
|
431
|
+
continue
|
|
432
|
+
param_qname = f"{func_qname}.{arg.arg}"
|
|
433
|
+
self._add_node(NodeData(
|
|
434
|
+
id=param_qname, name=arg.arg, qualified_name=param_qname,
|
|
435
|
+
symbol_type=SymbolType.PARAMETER, file_path=self._file,
|
|
436
|
+
line_start=func_node.lineno,
|
|
437
|
+
))
|
|
438
|
+
self._edges.append(EdgeData(
|
|
439
|
+
source=func_qname, target=param_qname,
|
|
440
|
+
edge_type=EdgeType.CONTAINS, line=func_node.lineno,
|
|
441
|
+
))
|
|
442
|
+
|
|
443
|
+
def _extract_calls(self, func_node: ast.AST, caller_qname: str) -> None:
|
|
444
|
+
"""Emit raw CALLS edges. Targets are unresolved (e.g. 'self.add_node')."""
|
|
445
|
+
for node in ast.walk(func_node):
|
|
446
|
+
if not isinstance(node, ast.Call):
|
|
447
|
+
continue
|
|
448
|
+
callee = _name_from_node(node.func)
|
|
449
|
+
if not callee:
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
arg_names: list[str] = []
|
|
453
|
+
for arg in node.args:
|
|
454
|
+
aname = _name_from_node(arg)
|
|
455
|
+
if aname:
|
|
456
|
+
arg_names.append(aname)
|
|
457
|
+
kwarg_names: dict[str, str] = {}
|
|
458
|
+
for kw in node.keywords:
|
|
459
|
+
if kw.arg:
|
|
460
|
+
vname = _name_from_node(kw.value)
|
|
461
|
+
if vname:
|
|
462
|
+
kwarg_names[kw.arg] = vname
|
|
463
|
+
|
|
464
|
+
metadata: dict[str, Any] = {}
|
|
465
|
+
if arg_names:
|
|
466
|
+
metadata["args"] = arg_names
|
|
467
|
+
if kwarg_names:
|
|
468
|
+
metadata["kwargs"] = kwarg_names
|
|
469
|
+
|
|
470
|
+
self._edges.append(EdgeData(
|
|
471
|
+
source=caller_qname, target=callee,
|
|
472
|
+
edge_type=EdgeType.CALLS,
|
|
473
|
+
line=getattr(node, "lineno", None),
|
|
474
|
+
metadata=metadata,
|
|
475
|
+
))
|
|
476
|
+
|
|
477
|
+
def _extract_variable_access(self, func_node: ast.AST, scope_qname: str) -> None:
|
|
478
|
+
"""Emit raw READS/WRITES edges. Targets are unresolved."""
|
|
479
|
+
param_names: set[str] = set()
|
|
480
|
+
local_names: set[str] = set()
|
|
481
|
+
|
|
482
|
+
if isinstance(func_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
483
|
+
for arg in (func_node.args.posonlyargs + func_node.args.args +
|
|
484
|
+
func_node.args.kwonlyargs):
|
|
485
|
+
if arg.arg not in ("self", "cls"):
|
|
486
|
+
param_names.add(arg.arg)
|
|
487
|
+
if func_node.args.vararg:
|
|
488
|
+
param_names.add(func_node.args.vararg.arg)
|
|
489
|
+
if func_node.args.kwarg:
|
|
490
|
+
param_names.add(func_node.args.kwarg.arg)
|
|
491
|
+
|
|
492
|
+
# Collect local assignment targets
|
|
493
|
+
for node in ast.walk(func_node):
|
|
494
|
+
if isinstance(node, ast.Assign):
|
|
495
|
+
for t in node.targets:
|
|
496
|
+
for name in _assigned_names(t):
|
|
497
|
+
if name not in ("self", "cls") and name not in _BUILTINS:
|
|
498
|
+
local_names.add(name)
|
|
499
|
+
elif isinstance(node, ast.AugAssign):
|
|
500
|
+
name = _name_from_node(node.target)
|
|
501
|
+
if name and name not in ("self", "cls") and name not in _BUILTINS and "." not in name:
|
|
502
|
+
local_names.add(name)
|
|
503
|
+
elif isinstance(node, ast.AnnAssign) and node.value and node.target:
|
|
504
|
+
name = _name_from_node(node.target)
|
|
505
|
+
if name and name not in ("self", "cls") and name not in _BUILTINS and "." not in name:
|
|
506
|
+
local_names.add(name)
|
|
507
|
+
# For-loop targets
|
|
508
|
+
elif isinstance(node, ast.For):
|
|
509
|
+
if isinstance(node.target, ast.Name):
|
|
510
|
+
local_names.add(node.target.id)
|
|
511
|
+
elif isinstance(node.target, (ast.Tuple, ast.List)):
|
|
512
|
+
for elt in node.target.elts:
|
|
513
|
+
if isinstance(elt, ast.Name):
|
|
514
|
+
local_names.add(elt.id)
|
|
515
|
+
|
|
516
|
+
# Comprehension loop variables (listcomp, setcomp, genexpr, dictcomp)
|
|
517
|
+
elif isinstance(node, (ast.ListComp, ast.SetComp, ast.GeneratorExp, ast.DictComp)):
|
|
518
|
+
for gen in node.generators:
|
|
519
|
+
if isinstance(gen.target, ast.Name):
|
|
520
|
+
local_names.add(gen.target.id)
|
|
521
|
+
elif isinstance(gen.target, (ast.Tuple, ast.List)):
|
|
522
|
+
for elt in gen.target.elts:
|
|
523
|
+
if isinstance(elt, ast.Name):
|
|
524
|
+
local_names.add(elt.id)
|
|
525
|
+
|
|
526
|
+
# Create local variable nodes (not params -- those already exist)
|
|
527
|
+
for lname in local_names:
|
|
528
|
+
if lname not in param_names:
|
|
529
|
+
lvar_qname = f"{scope_qname}.{lname}"
|
|
530
|
+
self._add_node(NodeData(
|
|
531
|
+
id=lvar_qname, name=lname, qualified_name=lvar_qname,
|
|
532
|
+
symbol_type=SymbolType.VARIABLE, file_path=self._file,
|
|
533
|
+
line_start=getattr(func_node, "lineno", None),
|
|
534
|
+
))
|
|
535
|
+
self._edges.append(EdgeData(
|
|
536
|
+
source=scope_qname, target=lvar_qname,
|
|
537
|
+
edge_type=EdgeType.CONTAINS,
|
|
538
|
+
line=getattr(func_node, "lineno", None),
|
|
539
|
+
))
|
|
540
|
+
|
|
541
|
+
known_locals = param_names | local_names
|
|
542
|
+
|
|
543
|
+
# Emit raw reads/writes
|
|
544
|
+
for node in ast.walk(func_node):
|
|
545
|
+
if isinstance(node, ast.Assign):
|
|
546
|
+
for target in node.targets:
|
|
547
|
+
self._emit_raw_write(target, scope_qname, known_locals, node.lineno)
|
|
548
|
+
elif isinstance(node, ast.AugAssign):
|
|
549
|
+
self._emit_raw_write(node.target, scope_qname, known_locals, node.lineno)
|
|
550
|
+
# AugAssign also reads
|
|
551
|
+
name = _name_from_node(node.target)
|
|
552
|
+
if name:
|
|
553
|
+
raw = self._raw_target(name, scope_qname, known_locals)
|
|
554
|
+
if raw:
|
|
555
|
+
self._edges.append(EdgeData(
|
|
556
|
+
source=scope_qname, target=raw,
|
|
557
|
+
edge_type=EdgeType.READS, line=node.lineno,
|
|
558
|
+
))
|
|
559
|
+
elif isinstance(node, ast.AnnAssign) and node.value and node.target:
|
|
560
|
+
self._emit_raw_write(node.target, scope_qname, known_locals, node.lineno)
|
|
561
|
+
|
|
562
|
+
elif isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
|
|
563
|
+
if node.id in _BUILTINS or node.id in ("self", "cls"):
|
|
564
|
+
continue
|
|
565
|
+
raw = self._raw_target(node.id, scope_qname, known_locals)
|
|
566
|
+
if raw:
|
|
567
|
+
self._edges.append(EdgeData(
|
|
568
|
+
source=scope_qname, target=raw,
|
|
569
|
+
edge_type=EdgeType.READS, line=node.lineno,
|
|
570
|
+
))
|
|
571
|
+
|
|
572
|
+
elif isinstance(node, ast.Attribute) and isinstance(node.ctx, ast.Load):
|
|
573
|
+
dotted = _name_from_node(node)
|
|
574
|
+
if dotted and dotted.split(".")[0] not in _BUILTINS:
|
|
575
|
+
self._edges.append(EdgeData(
|
|
576
|
+
source=scope_qname, target=dotted,
|
|
577
|
+
edge_type=EdgeType.READS, line=node.lineno,
|
|
578
|
+
))
|
|
579
|
+
|
|
580
|
+
def _emit_raw_write(self, target: ast.AST, scope_qname: str, known_locals: set[str], lineno: int) -> None:
|
|
581
|
+
if isinstance(target, ast.Name):
|
|
582
|
+
if target.id in _BUILTINS or target.id in ("self", "cls"):
|
|
583
|
+
return
|
|
584
|
+
raw = self._raw_target(target.id, scope_qname, known_locals)
|
|
585
|
+
if raw:
|
|
586
|
+
self._edges.append(EdgeData(
|
|
587
|
+
source=scope_qname, target=raw,
|
|
588
|
+
edge_type=EdgeType.WRITES, line=lineno,
|
|
589
|
+
))
|
|
590
|
+
elif isinstance(target, ast.Attribute):
|
|
591
|
+
dotted = _name_from_node(target)
|
|
592
|
+
if dotted and dotted.split(".")[0] not in _BUILTINS:
|
|
593
|
+
self._edges.append(EdgeData(
|
|
594
|
+
source=scope_qname, target=dotted,
|
|
595
|
+
edge_type=EdgeType.WRITES, line=lineno,
|
|
596
|
+
))
|
|
597
|
+
elif isinstance(target, (ast.Tuple, ast.List)):
|
|
598
|
+
for elt in target.elts:
|
|
599
|
+
self._emit_raw_write(elt, scope_qname, known_locals, lineno)
|
|
600
|
+
|
|
601
|
+
@staticmethod
|
|
602
|
+
def _raw_target(name: str, scope_qname: str, known_locals: set[str]) -> str | None:
|
|
603
|
+
"""Return the raw edge target for a bare name.
|
|
604
|
+
|
|
605
|
+
Known locals/params get scope-qualified so they match their node IDs.
|
|
606
|
+
Everything else is left bare for the resolver.
|
|
607
|
+
"""
|
|
608
|
+
if name in _BUILTINS:
|
|
609
|
+
return None
|
|
610
|
+
if name in known_locals:
|
|
611
|
+
return f"{scope_qname}.{name}"
|
|
612
|
+
return name
|
|
613
|
+
|
|
614
|
+
def _extract_returns(self, func_node: ast.AST, func_qname: str) -> None:
|
|
615
|
+
for node in ast.walk(func_node):
|
|
616
|
+
if isinstance(node, ast.Return) and node.value:
|
|
617
|
+
ret_name = _name_from_node(node.value)
|
|
618
|
+
if ret_name:
|
|
619
|
+
self._edges.append(EdgeData(
|
|
620
|
+
source=func_qname, target=ret_name,
|
|
621
|
+
edge_type=EdgeType.RETURNS, line=node.lineno,
|
|
622
|
+
))
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
|
|
626
|
+
# ---------------------------------------------------------------------------
|
|
627
|
+
# Pass 2: Type inference and unified resolution
|
|
628
|
+
# ---------------------------------------------------------------------------
|
|
629
|
+
|
|
630
|
+
class _TypeInferencer:
|
|
631
|
+
"""Collects type annotations and resolves ALL edge targets.
|
|
632
|
+
|
|
633
|
+
Single point of resolution for every name pattern:
|
|
634
|
+
- "self.X" -> Class.X (via scope)
|
|
635
|
+
- "self.X.Y" -> resolve X's type, then Type.Y
|
|
636
|
+
- "n.id" -> look up n's type, then NodeData.id
|
|
637
|
+
- "graph.all_nodes" -> look up graph's type, then CodeGraph.all_nodes
|
|
638
|
+
- bare "result" -> already scope-qualified by visitor
|
|
639
|
+
- "ast.Name" -> filtered (external)
|
|
640
|
+
"""
|
|
641
|
+
|
|
642
|
+
def __init__(self, type_index: dict[str, str], node_ids: set[str]) -> None:
|
|
643
|
+
self._type_index = type_index
|
|
644
|
+
self._node_ids = node_ids
|
|
645
|
+
# (func_qname, var_name) -> class_qname
|
|
646
|
+
self._var_types: dict[tuple[str, str], str] = {}
|
|
647
|
+
# func_qname -> return type annotation AST
|
|
648
|
+
self._return_types: dict[str, ast.AST] = {}
|
|
649
|
+
|
|
650
|
+
# -- Annotation helpers ------------------------------------------------
|
|
651
|
+
|
|
652
|
+
def _resolve_annotation(self, ann: ast.AST) -> str | None:
|
|
653
|
+
"""Extract the resolved class qname from a type annotation."""
|
|
654
|
+
if isinstance(ann, ast.Name):
|
|
655
|
+
return self._type_index.get(ann.id)
|
|
656
|
+
if isinstance(ann, ast.Attribute):
|
|
657
|
+
dotted = _name_from_node(ann)
|
|
658
|
+
return self._type_index.get(dotted) if dotted else None
|
|
659
|
+
if isinstance(ann, ast.Subscript):
|
|
660
|
+
return self._resolve_annotation(ann.value)
|
|
661
|
+
if isinstance(ann, ast.BinOp) and isinstance(ann.op, ast.BitOr):
|
|
662
|
+
return self._resolve_annotation(ann.left) or self._resolve_annotation(ann.right)
|
|
663
|
+
if isinstance(ann, ast.Constant) and isinstance(ann.value, str):
|
|
664
|
+
return self._type_index.get(ann.value)
|
|
665
|
+
return None
|
|
666
|
+
|
|
667
|
+
def _resolve_subscript_inner(self, ann: ast.AST) -> str | None:
|
|
668
|
+
"""For list[NodeData] or set[X], resolve the element type."""
|
|
669
|
+
if isinstance(ann, ast.Subscript):
|
|
670
|
+
sl = ann.slice
|
|
671
|
+
if isinstance(sl, ast.Name):
|
|
672
|
+
return self._type_index.get(sl.id)
|
|
673
|
+
if isinstance(sl, ast.Attribute):
|
|
674
|
+
dotted = _name_from_node(sl)
|
|
675
|
+
return self._type_index.get(dotted) if dotted else None
|
|
676
|
+
# dict[K, V] -- return V for .values() iteration
|
|
677
|
+
if isinstance(sl, ast.Tuple) and len(sl.elts) >= 2:
|
|
678
|
+
return self._resolve_annotation(sl.elts[-1])
|
|
679
|
+
# Handle X | None wrapping
|
|
680
|
+
if isinstance(ann, ast.BinOp) and isinstance(ann.op, ast.BitOr):
|
|
681
|
+
return self._resolve_subscript_inner(ann.left) or self._resolve_subscript_inner(ann.right)
|
|
682
|
+
return None
|
|
683
|
+
|
|
684
|
+
# -- Type collection ---------------------------------------------------
|
|
685
|
+
|
|
686
|
+
def collect_types(self, tree: ast.Module, module_qname: str) -> None:
|
|
687
|
+
"""Walk an AST and collect all type information.
|
|
688
|
+
|
|
689
|
+
Two sub-passes per module:
|
|
690
|
+
1. Collect ALL return type annotations (so method call assignments
|
|
691
|
+
can look up return types regardless of definition order).
|
|
692
|
+
2. Collect param types, local types, assignments, for-loop types.
|
|
693
|
+
"""
|
|
694
|
+
# Precompute func node id -> qualified name in one walk (O(n) not O(n²))
|
|
695
|
+
func_qnames = self._build_func_qname_map(tree, module_qname)
|
|
696
|
+
|
|
697
|
+
# Sub-pass 1: return types for every function in this module
|
|
698
|
+
for node in ast.walk(tree):
|
|
699
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
700
|
+
fq = func_qnames.get(id(node))
|
|
701
|
+
if fq and node.returns:
|
|
702
|
+
self._return_types[fq] = node.returns
|
|
703
|
+
|
|
704
|
+
# Sub-pass 2: everything else
|
|
705
|
+
for node in ast.walk(tree):
|
|
706
|
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
707
|
+
continue
|
|
708
|
+
|
|
709
|
+
func_qname = func_qnames.get(id(node))
|
|
710
|
+
if not func_qname:
|
|
711
|
+
continue
|
|
712
|
+
|
|
713
|
+
# Parameter annotations
|
|
714
|
+
param_annotations: dict[str, ast.AST] = {}
|
|
715
|
+
for arg in (node.args.posonlyargs + node.args.args + node.args.kwonlyargs):
|
|
716
|
+
if arg.arg in ("self", "cls") or not arg.annotation:
|
|
717
|
+
continue
|
|
718
|
+
param_annotations[arg.arg] = arg.annotation
|
|
719
|
+
resolved = self._resolve_annotation(arg.annotation)
|
|
720
|
+
if resolved:
|
|
721
|
+
self._var_types[(func_qname, arg.arg)] = resolved
|
|
722
|
+
|
|
723
|
+
# Local annotations and constructor assignments
|
|
724
|
+
local_annotations: dict[str, ast.AST] = {}
|
|
725
|
+
for child in ast.walk(node):
|
|
726
|
+
if isinstance(child, ast.AnnAssign) and child.target:
|
|
727
|
+
name = _name_from_node(child.target)
|
|
728
|
+
if not name or name in ("self", "cls"):
|
|
729
|
+
continue
|
|
730
|
+
if name.startswith("self."):
|
|
731
|
+
attr = name.split(".", 1)[1]
|
|
732
|
+
resolved = self._resolve_annotation(child.annotation)
|
|
733
|
+
if resolved:
|
|
734
|
+
self._var_types[(func_qname, attr)] = resolved
|
|
735
|
+
elif "." not in name:
|
|
736
|
+
local_annotations[name] = child.annotation
|
|
737
|
+
resolved = self._resolve_annotation(child.annotation)
|
|
738
|
+
if resolved:
|
|
739
|
+
self._var_types[(func_qname, name)] = resolved
|
|
740
|
+
|
|
741
|
+
if isinstance(child, ast.Assign) and len(child.targets) == 1:
|
|
742
|
+
target = child.targets[0]
|
|
743
|
+
if isinstance(child.value, ast.Call):
|
|
744
|
+
callee = _name_from_node(child.value.func)
|
|
745
|
+
if callee:
|
|
746
|
+
# Case A: constructor call — x = CodeGraph()
|
|
747
|
+
resolved = self._type_index.get(callee)
|
|
748
|
+
if resolved:
|
|
749
|
+
if isinstance(target, ast.Name):
|
|
750
|
+
self._var_types[(func_qname, target.id)] = resolved
|
|
751
|
+
elif isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name):
|
|
752
|
+
if target.value.id in ("self", "cls"):
|
|
753
|
+
self._var_types[(func_qname, target.attr)] = resolved
|
|
754
|
+
|
|
755
|
+
# Case B: method call — x = obj.method()
|
|
756
|
+
elif "." in callee and isinstance(target, ast.Name):
|
|
757
|
+
obj_name, method = callee.rsplit(".", 1)
|
|
758
|
+
cls = self._resolve_var_type(obj_name, func_qname)
|
|
759
|
+
if cls:
|
|
760
|
+
method_qname = f"{cls}.{method}"
|
|
761
|
+
ret_ann = self._return_types.get(method_qname)
|
|
762
|
+
if ret_ann:
|
|
763
|
+
ret_type = self._resolve_annotation(ret_ann)
|
|
764
|
+
if ret_type:
|
|
765
|
+
self._var_types[(func_qname, target.id)] = ret_type
|
|
766
|
+
local_annotations[target.id] = ret_ann
|
|
767
|
+
|
|
768
|
+
# Case C: assignment type propagation
|
|
769
|
+
# self.x = param or x = other_typed_var
|
|
770
|
+
elif isinstance(child.value, ast.Name):
|
|
771
|
+
rhs_name = child.value.id
|
|
772
|
+
rhs_type = self._resolve_var_type(rhs_name, func_qname)
|
|
773
|
+
if not rhs_type:
|
|
774
|
+
# Check param annotations directly
|
|
775
|
+
rhs_ann = param_annotations.get(rhs_name)
|
|
776
|
+
if rhs_ann:
|
|
777
|
+
rhs_type = self._resolve_annotation(rhs_ann)
|
|
778
|
+
# Also store raw annotation for subscript extraction
|
|
779
|
+
if isinstance(target, ast.Name):
|
|
780
|
+
local_annotations[target.id] = rhs_ann
|
|
781
|
+
if rhs_type:
|
|
782
|
+
if isinstance(target, ast.Name):
|
|
783
|
+
self._var_types[(func_qname, target.id)] = rhs_type
|
|
784
|
+
elif isinstance(target, ast.Attribute) and isinstance(target.value, ast.Name):
|
|
785
|
+
if target.value.id in ("self", "cls"):
|
|
786
|
+
self._var_types[(func_qname, target.attr)] = rhs_type
|
|
787
|
+
|
|
788
|
+
# For-loop and comprehension element type inference
|
|
789
|
+
for child in ast.walk(node):
|
|
790
|
+
if isinstance(child, ast.For) and isinstance(child.target, ast.Name):
|
|
791
|
+
elem_type = self._infer_iter_element_type(
|
|
792
|
+
child.iter, func_qname, local_annotations, param_annotations,
|
|
793
|
+
)
|
|
794
|
+
if elem_type:
|
|
795
|
+
self._var_types[(func_qname, child.target.id)] = elem_type
|
|
796
|
+
|
|
797
|
+
elif isinstance(child, (ast.ListComp, ast.SetComp, ast.GeneratorExp, ast.DictComp)):
|
|
798
|
+
for gen in child.generators:
|
|
799
|
+
if isinstance(gen.target, ast.Name):
|
|
800
|
+
elem_type = self._infer_iter_element_type(
|
|
801
|
+
gen.iter, func_qname, local_annotations, param_annotations,
|
|
802
|
+
)
|
|
803
|
+
if elem_type:
|
|
804
|
+
self._var_types[(func_qname, gen.target.id)] = elem_type
|
|
805
|
+
|
|
806
|
+
def _infer_iter_element_type(
|
|
807
|
+
self,
|
|
808
|
+
it: ast.AST,
|
|
809
|
+
func_qname: str,
|
|
810
|
+
local_annotations: dict[str, ast.AST],
|
|
811
|
+
param_annotations: dict[str, ast.AST],
|
|
812
|
+
) -> str | None:
|
|
813
|
+
"""Infer element type from an iterator expression."""
|
|
814
|
+
|
|
815
|
+
# Case 1: for x in local_var -- check annotations for list[X]
|
|
816
|
+
if isinstance(it, ast.Name):
|
|
817
|
+
for ann_map in (local_annotations, param_annotations):
|
|
818
|
+
ann = ann_map.get(it.id)
|
|
819
|
+
if ann:
|
|
820
|
+
# Strip X | None
|
|
821
|
+
actual = ann
|
|
822
|
+
if isinstance(ann, ast.BinOp) and isinstance(ann.op, ast.BitOr):
|
|
823
|
+
actual = ann.left
|
|
824
|
+
inner = self._resolve_subscript_inner(actual)
|
|
825
|
+
if inner:
|
|
826
|
+
return inner
|
|
827
|
+
|
|
828
|
+
# Case 2: for x in obj.method() -- resolve obj type, look up method return
|
|
829
|
+
if isinstance(it, ast.Call):
|
|
830
|
+
callee = _name_from_node(it.func)
|
|
831
|
+
if callee and "." in callee:
|
|
832
|
+
obj_name, method = callee.rsplit(".", 1)
|
|
833
|
+
cls = self._resolve_var_type(obj_name, func_qname)
|
|
834
|
+
if cls:
|
|
835
|
+
method_qname = f"{cls}.{method}"
|
|
836
|
+
ret_ann = self._return_types.get(method_qname)
|
|
837
|
+
if ret_ann:
|
|
838
|
+
inner = self._resolve_subscript_inner(ret_ann)
|
|
839
|
+
if inner:
|
|
840
|
+
return inner
|
|
841
|
+
|
|
842
|
+
# Case 3: for x in obj.values() on dict[K, V]
|
|
843
|
+
if isinstance(it, ast.Call) and isinstance(it.func, ast.Attribute):
|
|
844
|
+
if it.func.attr == "values":
|
|
845
|
+
obj = _name_from_node(it.func.value)
|
|
846
|
+
if obj:
|
|
847
|
+
for ann_map in (local_annotations, param_annotations):
|
|
848
|
+
ann = ann_map.get(obj)
|
|
849
|
+
if ann and isinstance(ann, ast.Subscript):
|
|
850
|
+
sl = ann.slice
|
|
851
|
+
if isinstance(sl, ast.Tuple) and len(sl.elts) >= 2:
|
|
852
|
+
return self._resolve_annotation(sl.elts[-1])
|
|
853
|
+
|
|
854
|
+
return None
|
|
855
|
+
|
|
856
|
+
def _resolve_var_type(self, name: str, func_qname: str) -> str | None:
|
|
857
|
+
"""Look up a variable's type, handling 'self'/'cls', dotted chains, and scope walking.
|
|
858
|
+
|
|
859
|
+
Handles:
|
|
860
|
+
- 'self' / 'cls' -> enclosing class
|
|
861
|
+
- 'graph' -> param/local type lookup
|
|
862
|
+
- 'self.graph' -> resolve self to class, then look up 'graph' attr type
|
|
863
|
+
"""
|
|
864
|
+
if name in ("self", "cls"):
|
|
865
|
+
parts = func_qname.split(".")
|
|
866
|
+
for i in range(len(parts) - 1, 0, -1):
|
|
867
|
+
candidate = ".".join(parts[:i])
|
|
868
|
+
if candidate in self._type_index.values():
|
|
869
|
+
return candidate
|
|
870
|
+
return None
|
|
871
|
+
|
|
872
|
+
# Handle dotted names: self.graph, self._node_data, etc.
|
|
873
|
+
if "." in name:
|
|
874
|
+
parts = name.split(".")
|
|
875
|
+
root_type = self._resolve_var_type(parts[0], func_qname)
|
|
876
|
+
if root_type:
|
|
877
|
+
# Walk the chain resolving each attribute's type
|
|
878
|
+
current_type = root_type
|
|
879
|
+
for attr in parts[1:]:
|
|
880
|
+
attr_type = self._find_attr_type(current_type, attr)
|
|
881
|
+
if attr_type:
|
|
882
|
+
current_type = attr_type
|
|
883
|
+
else:
|
|
884
|
+
return None
|
|
885
|
+
return current_type
|
|
886
|
+
return None
|
|
887
|
+
|
|
888
|
+
scope = func_qname
|
|
889
|
+
while scope:
|
|
890
|
+
key = (scope, name)
|
|
891
|
+
if key in self._var_types:
|
|
892
|
+
return self._var_types[key]
|
|
893
|
+
if "." in scope:
|
|
894
|
+
scope = scope.rsplit(".", 1)[0]
|
|
895
|
+
else:
|
|
896
|
+
break
|
|
897
|
+
return None
|
|
898
|
+
|
|
899
|
+
# -- Structural type inference -----------------------------------------
|
|
900
|
+
|
|
901
|
+
def infer_structural_types(self, edges: list[EdgeData]) -> None:
|
|
902
|
+
"""Infer variable types from field access patterns (reverse index).
|
|
903
|
+
|
|
904
|
+
For untyped variable 'n' where we see n.id, n.symbol_type, n.qualified_name,
|
|
905
|
+
build a reverse index field_name -> {classes that have that field as a child
|
|
906
|
+
node}, intersect all accessed fields, and if there's a unique class match,
|
|
907
|
+
type 'n' as that class.
|
|
908
|
+
"""
|
|
909
|
+
# Build reverse index: field_name -> set of parent class qnames
|
|
910
|
+
field_to_classes: dict[str, set[str]] = {}
|
|
911
|
+
for nid in self._node_ids:
|
|
912
|
+
parts = nid.rsplit(".", 1)
|
|
913
|
+
if len(parts) == 2:
|
|
914
|
+
parent, field = parts
|
|
915
|
+
# Only consider class children (not module-level or function locals)
|
|
916
|
+
if parent in self._node_ids:
|
|
917
|
+
field_to_classes.setdefault(field, set()).add(parent)
|
|
918
|
+
|
|
919
|
+
# Collect field accesses per (scope, variable) for untyped variables
|
|
920
|
+
# An edge target like "n.id" from scope S means variable 'n' in S accesses field 'id'
|
|
921
|
+
var_fields: dict[tuple[str, str], set[str]] = {} # (scope, var) -> {field1, field2, ...}
|
|
922
|
+
for e in edges:
|
|
923
|
+
if e.edge_type not in (EdgeType.READS, EdgeType.WRITES):
|
|
924
|
+
continue
|
|
925
|
+
t = e.target
|
|
926
|
+
if "." not in t:
|
|
927
|
+
continue
|
|
928
|
+
parts = t.split(".")
|
|
929
|
+
if parts[0] in ("self", "cls"):
|
|
930
|
+
continue
|
|
931
|
+
var_name = parts[0]
|
|
932
|
+
field = parts[1] # first attribute access
|
|
933
|
+
key = (e.source, var_name)
|
|
934
|
+
# Only care about variables we haven't already typed
|
|
935
|
+
if key not in self._var_types:
|
|
936
|
+
var_fields.setdefault(key, set()).add(field)
|
|
937
|
+
|
|
938
|
+
# Intersect: for each untyped var, find classes that have ALL its accessed fields
|
|
939
|
+
for (scope, var_name), fields in var_fields.items():
|
|
940
|
+
if not fields:
|
|
941
|
+
continue
|
|
942
|
+
# Find classes that have ALL these fields as children
|
|
943
|
+
candidate_classes: set[str] | None = None
|
|
944
|
+
for field in fields:
|
|
945
|
+
classes_with_field = field_to_classes.get(field)
|
|
946
|
+
if classes_with_field is None:
|
|
947
|
+
candidate_classes = set()
|
|
948
|
+
break
|
|
949
|
+
if candidate_classes is None:
|
|
950
|
+
candidate_classes = set(classes_with_field)
|
|
951
|
+
else:
|
|
952
|
+
candidate_classes &= classes_with_field
|
|
953
|
+
if not candidate_classes:
|
|
954
|
+
break
|
|
955
|
+
|
|
956
|
+
if candidate_classes and len(candidate_classes) == 1:
|
|
957
|
+
cls = next(iter(candidate_classes))
|
|
958
|
+
self._var_types[(scope, var_name)] = cls
|
|
959
|
+
|
|
960
|
+
# -- Unified resolution ------------------------------------------------
|
|
961
|
+
|
|
962
|
+
def resolve(self, target: str, source: str) -> str | None:
|
|
963
|
+
"""Resolve a raw edge target to a qualified node ID.
|
|
964
|
+
|
|
965
|
+
Returns None if the target should be filtered out (builtin).
|
|
966
|
+
Returns the resolved target (may be progressively truncated).
|
|
967
|
+
"""
|
|
968
|
+
# Already a known node
|
|
969
|
+
if target in self._node_ids:
|
|
970
|
+
return target
|
|
971
|
+
|
|
972
|
+
# Filter builtins
|
|
973
|
+
root = target.split(".")[0]
|
|
974
|
+
if root in _BUILTINS:
|
|
975
|
+
return None
|
|
976
|
+
|
|
977
|
+
# Bare name (no dots) -- leave for build_from
|
|
978
|
+
if "." not in target:
|
|
979
|
+
return target
|
|
980
|
+
|
|
981
|
+
# Dotted name -- resolve through type system
|
|
982
|
+
parts = target.split(".")
|
|
983
|
+
first = parts[0]
|
|
984
|
+
|
|
985
|
+
# Handle self.X / cls.X chains
|
|
986
|
+
if first in ("self", "cls"):
|
|
987
|
+
cls_qname = self._resolve_var_type(first, source)
|
|
988
|
+
if cls_qname:
|
|
989
|
+
resolved = self._resolve_attr_chain(cls_qname, parts[1:])
|
|
990
|
+
if resolved:
|
|
991
|
+
return resolved
|
|
992
|
+
return target
|
|
993
|
+
|
|
994
|
+
# Handle typed variable chains: graph.all_nodes, n.id, etc.
|
|
995
|
+
var_type = self._resolve_var_type(first, source)
|
|
996
|
+
if var_type:
|
|
997
|
+
resolved = self._resolve_attr_chain(var_type, parts[1:])
|
|
998
|
+
if resolved:
|
|
999
|
+
return resolved
|
|
1000
|
+
|
|
1001
|
+
# Untyped root -- return raw for progressive truncation later
|
|
1002
|
+
return target
|
|
1003
|
+
|
|
1004
|
+
def _resolve_attr_chain(self, class_qname: str, attrs: list[str]) -> str | None:
|
|
1005
|
+
"""Resolve an attribute chain against a known class.
|
|
1006
|
+
|
|
1007
|
+
Returns the deepest resolvable node ID, or None if nothing resolves.
|
|
1008
|
+
Uses progressive truncation: tries full chain first, then strips from
|
|
1009
|
+
the right until a known node is found.
|
|
1010
|
+
|
|
1011
|
+
Example: class_qname='CodeGraph', attrs=['_edges', 'append']
|
|
1012
|
+
1. Try 'CodeGraph._edges.append' -- not a node
|
|
1013
|
+
2. Try 'CodeGraph._edges' -- IS a node -> return it
|
|
1014
|
+
"""
|
|
1015
|
+
if not attrs:
|
|
1016
|
+
return class_qname if class_qname in self._node_ids else None
|
|
1017
|
+
|
|
1018
|
+
# Try full chain first
|
|
1019
|
+
full = class_qname + "." + ".".join(attrs)
|
|
1020
|
+
if full in self._node_ids:
|
|
1021
|
+
return full
|
|
1022
|
+
|
|
1023
|
+
# Try type-based resolution for deeper chains
|
|
1024
|
+
first = attrs[0]
|
|
1025
|
+
first_resolved = f"{class_qname}.{first}"
|
|
1026
|
+
|
|
1027
|
+
if len(attrs) > 1:
|
|
1028
|
+
attr_type = self._find_attr_type(class_qname, first)
|
|
1029
|
+
if attr_type:
|
|
1030
|
+
deeper = self._resolve_attr_chain(attr_type, attrs[1:])
|
|
1031
|
+
if deeper and deeper in self._node_ids:
|
|
1032
|
+
return deeper
|
|
1033
|
+
|
|
1034
|
+
# Progressive truncation: strip from right until we hit a node
|
|
1035
|
+
for i in range(len(attrs), 0, -1):
|
|
1036
|
+
candidate = class_qname + "." + ".".join(attrs[:i])
|
|
1037
|
+
if candidate in self._node_ids:
|
|
1038
|
+
return candidate
|
|
1039
|
+
|
|
1040
|
+
# The class itself is a node
|
|
1041
|
+
if class_qname in self._node_ids:
|
|
1042
|
+
return class_qname
|
|
1043
|
+
|
|
1044
|
+
return first_resolved # best-effort
|
|
1045
|
+
|
|
1046
|
+
def _find_attr_type(self, class_qname: str, attr_name: str) -> str | None:
|
|
1047
|
+
"""Find the type of a class attribute via __init__ or class-level scope."""
|
|
1048
|
+
for scope in (f"{class_qname}.__init__", class_qname):
|
|
1049
|
+
key = (scope, attr_name)
|
|
1050
|
+
if key in self._var_types:
|
|
1051
|
+
return self._var_types[key]
|
|
1052
|
+
return None
|
|
1053
|
+
|
|
1054
|
+
@staticmethod
|
|
1055
|
+
def _build_func_qname_map(tree: ast.Module, module_qname: str) -> dict[int, str]:
|
|
1056
|
+
"""Single-pass precomputation: map func node id() -> qualified name.
|
|
1057
|
+
|
|
1058
|
+
Replaces the O(n²) _find_func_qname which did a full AST walk per function.
|
|
1059
|
+
"""
|
|
1060
|
+
result: dict[int, str] = {}
|
|
1061
|
+
class _Mapper(ast.NodeVisitor):
|
|
1062
|
+
def __init__(self):
|
|
1063
|
+
self.stack = [module_qname]
|
|
1064
|
+
def visit_ClassDef(self, node):
|
|
1065
|
+
self.stack.append(f"{self.stack[-1]}.{node.name}")
|
|
1066
|
+
self.generic_visit(node)
|
|
1067
|
+
self.stack.pop()
|
|
1068
|
+
def visit_FunctionDef(self, node):
|
|
1069
|
+
qname = f"{self.stack[-1]}.{node.name}"
|
|
1070
|
+
result[id(node)] = qname
|
|
1071
|
+
self.stack.append(qname)
|
|
1072
|
+
self.generic_visit(node)
|
|
1073
|
+
self.stack.pop()
|
|
1074
|
+
def visit_AsyncFunctionDef(self, node):
|
|
1075
|
+
self.visit_FunctionDef(node)
|
|
1076
|
+
_Mapper().visit(tree)
|
|
1077
|
+
return result
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
# ---------------------------------------------------------------------------
|
|
1081
|
+
# Utility functions
|
|
1082
|
+
# ---------------------------------------------------------------------------
|
|
1083
|
+
|
|
1084
|
+
def _name_from_node(node: ast.AST) -> str | None:
|
|
1085
|
+
"""Extract a dotted name from an AST node."""
|
|
1086
|
+
if isinstance(node, ast.Name):
|
|
1087
|
+
return node.id
|
|
1088
|
+
if isinstance(node, ast.Attribute):
|
|
1089
|
+
parent = _name_from_node(node.value)
|
|
1090
|
+
if parent:
|
|
1091
|
+
return f"{parent}.{node.attr}"
|
|
1092
|
+
return None
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
def _assigned_names(target: ast.AST) -> list[str]:
|
|
1096
|
+
"""Get flat list of names from an assignment target."""
|
|
1097
|
+
if isinstance(target, ast.Name):
|
|
1098
|
+
return [target.id]
|
|
1099
|
+
if isinstance(target, (ast.Tuple, ast.List)):
|
|
1100
|
+
names = []
|
|
1101
|
+
for elt in target.elts:
|
|
1102
|
+
names.extend(_assigned_names(elt))
|
|
1103
|
+
return names
|
|
1104
|
+
return []
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
def _signature_from_funcdef(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
1108
|
+
"""Reconstruct a human-readable signature string."""
|
|
1109
|
+
args = node.args
|
|
1110
|
+
parts: list[str] = []
|
|
1111
|
+
|
|
1112
|
+
for a in args.posonlyargs:
|
|
1113
|
+
parts.append(a.arg)
|
|
1114
|
+
if args.posonlyargs:
|
|
1115
|
+
parts.append("/")
|
|
1116
|
+
|
|
1117
|
+
n_defaults = len(args.defaults)
|
|
1118
|
+
n_regular = len(args.args)
|
|
1119
|
+
for i, a in enumerate(args.args):
|
|
1120
|
+
default_idx = i - (n_regular - n_defaults)
|
|
1121
|
+
if default_idx >= 0:
|
|
1122
|
+
parts.append(f"{a.arg}=...")
|
|
1123
|
+
else:
|
|
1124
|
+
parts.append(a.arg)
|
|
1125
|
+
|
|
1126
|
+
if args.vararg:
|
|
1127
|
+
parts.append(f"*{args.vararg.arg}")
|
|
1128
|
+
elif args.kwonlyargs:
|
|
1129
|
+
parts.append("*")
|
|
1130
|
+
|
|
1131
|
+
for i, a in enumerate(args.kwonlyargs):
|
|
1132
|
+
if i < len(args.kw_defaults) and args.kw_defaults[i] is not None:
|
|
1133
|
+
parts.append(f"{a.arg}=...")
|
|
1134
|
+
else:
|
|
1135
|
+
parts.append(a.arg)
|
|
1136
|
+
|
|
1137
|
+
if args.kwarg:
|
|
1138
|
+
parts.append(f"**{args.kwarg.arg}")
|
|
1139
|
+
|
|
1140
|
+
prefix = "async def" if isinstance(node, ast.AsyncFunctionDef) else "def"
|
|
1141
|
+
return f"{prefix} {node.name}({', '.join(parts)})"
|