interlinked-mapper 0.3.7__tar.gz → 0.3.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/PKG-INFO +1 -1
  2. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/dead_code.py +46 -3
  3. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/graph.py +70 -23
  4. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/parser.py +227 -18
  5. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/similarity.py +7 -2
  6. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/commander/query.py +34 -4
  7. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/PKG-INFO +1 -1
  8. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/SOURCES.txt +2 -1
  9. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/pyproject.toml +1 -1
  10. interlinked_mapper-0.3.9/tests/test_accuracy.py +969 -0
  11. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/__init__.py +0 -0
  12. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/__init__.py +0 -0
  13. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/analyzer/embeddings.py +0 -0
  14. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/cli.py +0 -0
  15. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/commander/__init__.py +0 -0
  16. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/commander/llm.py +0 -0
  17. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/commander/repl.py +0 -0
  18. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/mcp_server.py +0 -0
  19. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/models.py +0 -0
  20. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/__init__.py +0 -0
  21. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/dist/assets/index-CyhrxsQU.css +0 -0
  22. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/dist/assets/index-Dh01aXoE.js +0 -0
  23. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/dist/index.html +0 -0
  24. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/index.html +0 -0
  25. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/index.html.d3-legacy +0 -0
  26. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/package-lock.json +0 -0
  27. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/package.json +0 -0
  28. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/App.tsx +0 -0
  29. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/graph/GraphCanvas.tsx +0 -0
  30. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/graph/nodePrograms.ts +0 -0
  31. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/index.css +0 -0
  32. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/main.tsx +0 -0
  33. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/state/graphStore.ts +0 -0
  34. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/state/sseClient.ts +0 -0
  35. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/theme.ts +0 -0
  36. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/types.ts +0 -0
  37. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/src/vite-env.d.ts +0 -0
  38. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/tsconfig.json +0 -0
  39. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/frontend/vite.config.ts +0 -0
  40. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/layouts.py +0 -0
  41. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked/visualizer/server.py +0 -0
  42. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/dependency_links.txt +0 -0
  43. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/entry_points.txt +0 -0
  44. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/requires.txt +0 -0
  45. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/interlinked_mapper.egg-info/top_level.txt +0 -0
  46. {interlinked_mapper-0.3.7 → interlinked_mapper-0.3.9}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: interlinked-mapper
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: A Python program topology explorer — visualize the shape of your codebase
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/austerecryptid/interlinked
@@ -23,7 +23,9 @@ Additionally detects:
23
23
 
24
24
  from __future__ import annotations
25
25
 
26
+ import ast
26
27
  from collections import deque
28
+ from pathlib import Path
27
29
 
28
30
  from interlinked.analyzer.graph import CodeGraph
29
31
  from interlinked.models import EdgeType, SymbolType
@@ -76,15 +78,27 @@ def detect_dead_code(graph: CodeGraph) -> list[str]:
76
78
  if base_short in _SERIALIZABLE_BASES:
77
79
  serializable_class_ids.add(cls_id)
78
80
 
81
+ # ── Parse __all__ from module source files ──────────────────────
82
+ # Symbols listed in __all__ are public API — always alive.
83
+ all_exports: set[str] = set()
84
+ for n in all_nodes:
85
+ if n.symbol_type == SymbolType.MODULE and n.file_path:
86
+ exported = _parse_dunder_all(n.file_path)
87
+ for name in exported:
88
+ all_exports.add(f"{n.id}.{name}")
89
+
79
90
  # ── Identify production entry points ──────────────────────────
80
91
  # Modules are roots — their scope-level code runs on import.
81
92
  # Dunder methods and framework hooks are implicitly invoked.
93
+ # Symbols in __all__ are public API exports.
82
94
  entry_points: set[str] = set()
83
95
  for n in all_nodes:
84
96
  if n.symbol_type == SymbolType.MODULE:
85
97
  entry_points.add(n.id)
86
98
  elif n.name in _EXEMPT_NAMES:
87
99
  entry_points.add(n.id)
100
+ elif n.id in all_exports:
101
+ entry_points.add(n.id)
88
102
 
89
103
  # ── Forward BFS from production entry points ──────────────────
90
104
  # When we reach a node, follow its calls/reads edges.
@@ -108,17 +122,20 @@ def detect_dead_code(graph: CodeGraph) -> list[str]:
108
122
  if child not in reachable:
109
123
  queue.append(child)
110
124
 
111
- # ── Mark unreachable functions/methods as dead ─────────────────
125
+ # ── Mark unreachable functions/methods/classes as dead ──────────
112
126
  dead: set[str] = set()
113
127
  for n in all_nodes:
114
- if n.symbol_type not in (SymbolType.FUNCTION, SymbolType.METHOD):
128
+ if n.symbol_type not in (SymbolType.FUNCTION, SymbolType.METHOD, SymbolType.CLASS):
115
129
  continue
116
130
  # Test functions are not dead — they're tests
117
- if n.name.startswith("test_"):
131
+ if n.name.startswith("test_") or n.name.startswith("Test"):
118
132
  continue
119
133
  # Exempt names are never dead
120
134
  if n.name in _EXEMPT_NAMES:
121
135
  continue
136
+ # __all__ exports are never dead
137
+ if n.id in all_exports:
138
+ continue
122
139
  # If not reachable from any production entry point → dead
123
140
  if n.id not in reachable:
124
141
  n.is_dead = True
@@ -200,3 +217,29 @@ def detect_dead_code(graph: CodeGraph) -> list[str]:
200
217
  e.is_dead = True
201
218
 
202
219
  return list(dead)
220
+
221
+
222
+ def _parse_dunder_all(file_path: str) -> list[str]:
223
+ """Extract names from a static ``__all__ = [...]`` assignment.
224
+
225
+ Only handles literal list/tuple assignments — dynamic __all__ (e.g.
226
+ comprehensions, += mutations) are not supported by design.
227
+ """
228
+ try:
229
+ source = Path(file_path).read_text(encoding="utf-8", errors="replace")
230
+ tree = ast.parse(source, filename=file_path)
231
+ except (SyntaxError, OSError):
232
+ return []
233
+
234
+ for node in ast.iter_child_nodes(tree):
235
+ if not isinstance(node, ast.Assign):
236
+ continue
237
+ for target in node.targets:
238
+ if isinstance(target, ast.Name) and target.id == "__all__":
239
+ if isinstance(node.value, (ast.List, ast.Tuple)):
240
+ return [
241
+ elt.value
242
+ for elt in node.value.elts
243
+ if isinstance(elt, ast.Constant) and isinstance(elt.value, str)
244
+ ]
245
+ return []
@@ -12,6 +12,27 @@ from interlinked.models import (
12
12
  )
13
13
 
14
14
 
15
+ # Method names so common on builtins (dict, list, set, str, etc.) that resolving
16
+ # them to project symbols by bare-name matching is almost always a false positive.
17
+ # e.g. `op_dict.items()` should NOT resolve to `ActionCost.items`.
18
+ _BUILTIN_METHOD_NAMES: frozenset[str] = frozenset({
19
+ # dict
20
+ "items", "keys", "values", "get", "pop", "update", "setdefault",
21
+ "clear", "copy",
22
+ # list / sequence
23
+ "append", "extend", "insert", "remove", "sort", "reverse",
24
+ "count", "index",
25
+ # set
26
+ "add", "discard", "union", "intersection", "difference",
27
+ "issubset", "issuperset",
28
+ # str
29
+ "strip", "split", "join", "replace", "startswith", "endswith",
30
+ "lower", "upper", "format", "encode", "decode",
31
+ # general
32
+ "close", "read", "write", "flush", "seek", "tell",
33
+ })
34
+
35
+
15
36
  class CodeGraph:
16
37
  """The core graph structure representing an entire Python project.
17
38
 
@@ -144,22 +165,23 @@ class CodeGraph:
144
165
 
145
166
  # Step 3: Build name index from ALL current nodes for edge resolution
146
167
  all_nodes = self.all_nodes(include_proposed=False)
147
- name_index: dict[str, list[str]] = {}
168
+ name_index: dict[str, set[str]] = {}
148
169
  for n in all_nodes:
149
- name_index.setdefault(n.name, []).append(n.id)
170
+ name_index.setdefault(n.name, set()).add(n.id)
150
171
  parts = n.qualified_name.split(".")
151
172
  for i in range(1, len(parts)):
152
173
  suffix = ".".join(parts[i:])
153
- name_index.setdefault(suffix, []).append(n.id)
174
+ name_index.setdefault(suffix, set()).add(n.id)
154
175
 
155
176
  node_ids = {n.id for n in all_nodes}
156
177
 
157
- # Step 4: Resolve and add new edges
178
+ # Step 4: Resolve and add new edges (skip external references)
158
179
  added_edges: list[EdgeData] = []
159
180
  for e in new_edges:
160
181
  resolved = self._resolve_edge(e, node_ids, name_index)
161
- self.add_edge(resolved)
162
- added_edges.append(resolved)
182
+ if resolved.source in node_ids and resolved.target in node_ids:
183
+ self.add_edge(resolved)
184
+ added_edges.append(resolved)
163
185
 
164
186
  return {
165
187
  "removed_nodes": removed["removed_nodes"],
@@ -174,51 +196,76 @@ class CodeGraph:
174
196
  for n in nodes:
175
197
  self.add_node(n)
176
198
 
177
- # Build a lookup: short name -> list of qualified IDs
178
- name_index: dict[str, list[str]] = {}
199
+ # Build a lookup: short name -> set of qualified IDs
200
+ # Sets prevent duplicates from suffix indexing (which caused
201
+ # _resolve_edge to see len>1 for single-node names and bail).
202
+ name_index: dict[str, set[str]] = {}
179
203
  for n in nodes:
180
- name_index.setdefault(n.name, []).append(n.id)
204
+ name_index.setdefault(n.name, set()).add(n.id)
181
205
  # Also index by qualified_name suffix fragments
182
206
  # e.g. "graph.CodeGraph" for "analyzer.graph.CodeGraph"
183
207
  parts = n.qualified_name.split(".")
184
208
  for i in range(1, len(parts)):
185
209
  suffix = ".".join(parts[i:])
186
- name_index.setdefault(suffix, []).append(n.id)
210
+ name_index.setdefault(suffix, set()).add(n.id)
187
211
 
188
212
  node_ids = {n.id for n in nodes}
189
213
 
190
214
  for e in edges:
191
215
  resolved = self._resolve_edge(e, node_ids, name_index)
216
+ # Source must be a known project node. Targets may be
217
+ # unresolved for CALLS/READS (inference gaps on untyped
218
+ # variables), but structural edges need both endpoints.
219
+ if resolved.source not in node_ids:
220
+ continue
192
221
  self.add_edge(resolved)
193
222
 
194
223
  @staticmethod
195
224
  def _resolve_edge(
196
225
  edge: EdgeData,
197
226
  node_ids: set[str],
198
- name_index: dict[str, list[str]],
227
+ name_index: dict[str, set[str]],
199
228
  ) -> EdgeData:
200
229
  """Try to resolve unqualified source/target names to known node IDs."""
201
230
  source = edge.source
202
231
  target = edge.target
203
232
 
204
233
  if source not in node_ids:
205
- candidates = name_index.get(source, [])
234
+ candidates = name_index.get(source, set())
206
235
  if len(candidates) == 1:
207
- source = candidates[0]
236
+ source = next(iter(candidates))
208
237
 
209
238
  if target not in node_ids:
210
- candidates = name_index.get(target, [])
239
+ # Never resolve bare builtin method names — they match too
240
+ # broadly (e.g. "items" matching ActionCost.items when the
241
+ # actual call is dict.items()).
242
+ if target in _BUILTIN_METHOD_NAMES:
243
+ return edge
244
+
245
+ candidates = name_index.get(target, set())
211
246
  if len(candidates) == 1:
212
- target = candidates[0]
247
+ target = next(iter(candidates))
213
248
  elif len(candidates) > 1:
214
- # Prefer a candidate in the same module as the source
215
- src_module = source.rsplit(".", 1)[0] if "." in source else source
216
- for c in candidates:
217
- if c.startswith(src_module):
218
- target = c
219
- break
220
- else:
221
- target = candidates[0]
249
+ # For CALLS edges, a bare name like `process()` in Python
250
+ # NEVER resolves to the same class you need `self.process()`
251
+ # for that. Exclude the source itself to prevent self-call
252
+ # artifacts, and prefer module-level over class-level.
253
+ filtered = candidates - {source}
254
+ if not filtered:
255
+ filtered = candidates
256
+
257
+ # Extract the top-level module from the source
258
+ src_parts = source.split(".")
259
+ src_module = src_parts[0] if src_parts else source
260
+
261
+ # Score candidates: prefer same module, then shorter paths
262
+ # (module-level functions are shorter than class methods)
263
+ best = None
264
+ for c in filtered:
265
+ if c.startswith(src_module + "."):
266
+ if best is None or c.count(".") < best.count("."):
267
+ best = c
268
+ target = best or next(iter(filtered))
222
269
 
223
270
  if source == edge.source and target == edge.target:
224
271
  return edge
@@ -42,6 +42,22 @@ _BUILTINS: frozenset[str] = frozenset(dir(builtins)) | frozenset({
42
42
  "__all__", "__spec__", "__loader__", "__package__", "__builtins__",
43
43
  })
44
44
 
45
+ # Known higher-order call patterns where a positional argument is the
46
+ # callable being invoked. Maps (attr_suffix) -> positional index of
47
+ # the callable arg. Also supports keyword argument names via _CALLABLE_KWARGS.
48
+ _CALLABLE_ARG_INDEX: dict[str, int] = {
49
+ "run_in_executor": 1, # loop.run_in_executor(executor, fn, ...)
50
+ "submit": 1, # executor.submit(fn, ...)
51
+ "map": 0, # map(fn, iterable) / pool.map(fn, iterable)
52
+ "apply": 0, # pool.apply(fn, ...)
53
+ "apply_async": 0, # pool.apply_async(fn, ...)
54
+ "partial": 0, # functools.partial(fn, ...)
55
+ }
56
+ _CALLABLE_KWARGS: dict[str, str] = {
57
+ "Thread": "target", # threading.Thread(target=fn)
58
+ "Process": "target", # multiprocessing.Process(target=fn)
59
+ }
60
+
45
61
 
46
62
 
47
63
  def parse_file(
@@ -74,7 +90,7 @@ def parse_file(
74
90
  return [], []
75
91
 
76
92
  # Pass 1: extract symbols and raw edges
77
- nodes, edges = _extract_from_module(tree, source, module_qname, str(file_path))
93
+ nodes, edges, _aliases = _extract_from_module(tree, source, module_qname, str(file_path))
78
94
 
79
95
  # Build combined node ID set and type index
80
96
  node_ids = {n.id for n in nodes}
@@ -186,6 +202,7 @@ def parse_project(root: str | Path) -> tuple[list[NodeData], list[EdgeData]]:
186
202
  root = Path(root).resolve()
187
203
  nodes: list[NodeData] = []
188
204
  edges: list[EdgeData] = []
205
+ all_import_aliases: dict[str, str] = {} # local_name -> import target
189
206
 
190
207
  # Skip directories that contain third-party or non-project Python files.
191
208
  # External references are resolved via import/AST analysis, not by parsing venv.
@@ -218,11 +235,12 @@ def parse_project(root: str | Path) -> tuple[list[NodeData], list[EdgeData]]:
218
235
  module_qname = _path_to_module(rel_path)
219
236
  trees.append((tree, module_qname, str(py_file)))
220
237
 
221
- file_nodes, file_edges = _extract_from_module(
238
+ file_nodes, file_edges, file_aliases = _extract_from_module(
222
239
  tree, source, module_qname, str(py_file)
223
240
  )
224
241
  nodes.extend(file_nodes)
225
242
  edges.extend(file_edges)
243
+ all_import_aliases.update(file_aliases)
226
244
 
227
245
  # Pass 2: type inference from annotations
228
246
  node_ids = {n.id for n in nodes}
@@ -253,6 +271,22 @@ def parse_project(root: str | Path) -> tuple[list[NodeData], list[EdgeData]]:
253
271
  suffix = ".".join(parts[i:])
254
272
  name_index.setdefault(suffix, []).append(n.id)
255
273
 
274
+ # Inject import aliases into name_index so aliased names resolve
275
+ # through the same path as their real targets.
276
+ # e.g. alias "process_data" -> "tests.fixtures.shadowing.process"
277
+ # name_index already has "process" -> [shadowing.process, ...]
278
+ # We find the target's suffix in name_index and copy its candidates.
279
+ for alias_name, alias_target in all_import_aliases.items():
280
+ if alias_name in name_index:
281
+ continue # don't clobber real nodes
282
+ # Try the full target, then progressively shorter suffixes
283
+ target_parts = alias_target.split(".")
284
+ for i in range(len(target_parts)):
285
+ suffix = ".".join(target_parts[i:])
286
+ if suffix in name_index:
287
+ name_index[alias_name] = name_index[suffix]
288
+ break
289
+
256
290
  # Pass 4: resolve all data-flow edges, progressive truncation, drop external
257
291
  #
258
292
  # Edge type handling:
@@ -264,8 +298,12 @@ def parse_project(root: str | Path) -> tuple[list[NodeData], list[EdgeData]]:
264
298
  # CONTAINS / INHERITS — pass through unchanged.
265
299
  resolved_edges: list[EdgeData] = []
266
300
  for e in edges:
267
- # Structural edges — always keep
301
+ # Structural edges — always keep, except external IMPORTS
268
302
  if e.edge_type not in (EdgeType.READS, EdgeType.WRITES, EdgeType.CALLS, EdgeType.RETURNS):
303
+ if e.edge_type == EdgeType.IMPORTS and e.target not in node_ids:
304
+ # Drop imports to external modules (asyncio, typing, etc.)
305
+ if not any(nid.startswith(e.target + ".") or nid == e.target for nid in node_ids):
306
+ continue
269
307
  resolved_edges.append(e)
270
308
  continue
271
309
 
@@ -381,7 +419,7 @@ def _extract_from_module(
381
419
  # Field(...) in Pydantic models, etc.
382
420
  visitor._extract_scope_level_calls(tree, module_qname)
383
421
 
384
- return nodes, edges
422
+ return nodes, edges, visitor._import_aliases
385
423
 
386
424
 
387
425
  # ---------------------------------------------------------------------------
@@ -408,6 +446,7 @@ class _SymbolVisitor(ast.NodeVisitor):
408
446
  self._edges = edges
409
447
  self._scope_stack: list[str] = [module_qname]
410
448
  self._node_ids: set[str] = set()
449
+ self._import_aliases: dict[str, str] = {} # local_name -> qualified target
411
450
 
412
451
  @property
413
452
  def _current_scope(self) -> str:
@@ -529,6 +568,8 @@ class _SymbolVisitor(ast.NodeVisitor):
529
568
  source=self._module, target=alias.name,
530
569
  edge_type=EdgeType.IMPORTS, line=node.lineno,
531
570
  ))
571
+ local_name = alias.asname or alias.name
572
+ self._import_aliases[local_name] = alias.name
532
573
 
533
574
  def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
534
575
  base = node.module or ""
@@ -538,6 +579,9 @@ class _SymbolVisitor(ast.NodeVisitor):
538
579
  source=self._module, target=target,
539
580
  edge_type=EdgeType.IMPORTS, line=node.lineno,
540
581
  ))
582
+ # Track aliases: 'from X import Y as Z' -> Z maps to X.Y
583
+ local_name = alias.asname or alias.name
584
+ self._import_aliases[local_name] = target
541
585
 
542
586
  # -- Assignments at module / class scope --------------------------------
543
587
 
@@ -654,10 +698,39 @@ class _SymbolVisitor(ast.NodeVisitor):
654
698
  ))
655
699
 
656
700
  def _extract_calls(self, func_node: ast.AST, caller_qname: str) -> None:
657
- """Emit raw CALLS edges. Targets are unresolved (e.g. 'self.add_node')."""
701
+ """Emit raw CALLS edges. Targets are unresolved (e.g. 'self.add_node').
702
+
703
+ Also detects known callable-passing patterns like
704
+ ``loop.run_in_executor(None, fn)`` and ``Thread(target=fn)``
705
+ and emits an additional CALLS edge to the callable argument.
706
+ """
658
707
  for node in ast.walk(func_node):
659
708
  if not isinstance(node, ast.Call):
660
709
  continue
710
+
711
+ # Detect super().method() — Call(func=Attr(value=Call(func=Name('super'))))
712
+ if (isinstance(node.func, ast.Attribute)
713
+ and isinstance(node.func.value, ast.Call)
714
+ and isinstance(node.func.value.func, ast.Name)
715
+ and node.func.value.func.id == "super"):
716
+ method_name = node.func.attr
717
+ # Find the enclosing class and its base classes from
718
+ # already-emitted inherits edges
719
+ class_scope = self._class_scope()
720
+ if class_scope:
721
+ for edge in self._edges:
722
+ if edge.source == class_scope and edge.edge_type == EdgeType.INHERITS:
723
+ # Emit call to parent.method — resolution will
724
+ # match it to the actual qualified name
725
+ super_target = f"{edge.target}.{method_name}"
726
+ self._edges.append(EdgeData(
727
+ source=caller_qname, target=super_target,
728
+ edge_type=EdgeType.CALLS,
729
+ line=getattr(node, "lineno", None),
730
+ ))
731
+ break # MRO: first base class
732
+ continue
733
+
661
734
  callee = _name_from_node(node.func)
662
735
  if not callee:
663
736
  continue
@@ -687,6 +760,34 @@ class _SymbolVisitor(ast.NodeVisitor):
687
760
  metadata=metadata,
688
761
  ))
689
762
 
763
+ # Detect callable-passing patterns and emit CALLS to the
764
+ # actual callable argument.
765
+ callee_tail = callee.rsplit(".", 1)[-1]
766
+
767
+ # Positional callable arg: e.g. run_in_executor(None, fn)
768
+ idx = _CALLABLE_ARG_INDEX.get(callee_tail)
769
+ if idx is not None and idx < len(node.args):
770
+ fn_name = _name_from_node(node.args[idx])
771
+ if fn_name and fn_name not in _BUILTINS:
772
+ self._edges.append(EdgeData(
773
+ source=caller_qname, target=fn_name,
774
+ edge_type=EdgeType.CALLS,
775
+ line=getattr(node, "lineno", None),
776
+ ))
777
+
778
+ # Keyword callable arg: e.g. Thread(target=fn)
779
+ kw_param = _CALLABLE_KWARGS.get(callee_tail)
780
+ if kw_param:
781
+ for kw in node.keywords:
782
+ if kw.arg == kw_param:
783
+ fn_name = _name_from_node(kw.value)
784
+ if fn_name and fn_name not in _BUILTINS:
785
+ self._edges.append(EdgeData(
786
+ source=caller_qname, target=fn_name,
787
+ edge_type=EdgeType.CALLS,
788
+ line=getattr(node, "lineno", None),
789
+ ))
790
+
690
791
  def _extract_variable_access(self, func_node: ast.AST, scope_qname: str) -> None:
691
792
  """Emit raw READS/WRITES edges. Targets are unresolved."""
692
793
  param_names: set[str] = set()
@@ -880,7 +981,12 @@ class _TypeInferencer:
880
981
  return None
881
982
 
882
983
  def _resolve_subscript_inner(self, ann: ast.AST) -> str | None:
883
- """For list[NodeData] or set[X], resolve the element type."""
984
+ """For list[X], set[X], Generator[Y,S,R], Iterator[X], resolve the element type.
985
+
986
+ For single-arg subscripts (list[X], Iterator[X]): returns X.
987
+ For multi-arg subscripts: tries first element (Generator[Yield,...]),
988
+ then last (dict[K, V]).
989
+ """
884
990
  if isinstance(ann, ast.Subscript):
885
991
  sl = ann.slice
886
992
  if isinstance(sl, ast.Name):
@@ -888,8 +994,12 @@ class _TypeInferencer:
888
994
  if isinstance(sl, ast.Attribute):
889
995
  dotted = _name_from_node(sl)
890
996
  return self._type_index.get(dotted) if dotted else None
891
- # dict[K, V] -- return V for .values() iteration
892
997
  if isinstance(sl, ast.Tuple) and len(sl.elts) >= 2:
998
+ # Try first element (Generator[Yield, Send, Return], Iterator[X])
999
+ first = self._resolve_annotation(sl.elts[0])
1000
+ if first:
1001
+ return first
1002
+ # Fall back to last element (dict[K, V])
893
1003
  return self._resolve_annotation(sl.elts[-1])
894
1004
  # Handle X | None wrapping
895
1005
  if isinstance(ann, ast.BinOp) and isinstance(ann.op, ast.BitOr):
@@ -968,17 +1078,14 @@ class _TypeInferencer:
968
1078
  self._var_types[(func_qname, target.attr)] = resolved
969
1079
 
970
1080
  # Case B: method call — x = obj.method()
1081
+ # Also handles Class.classmethod() and Class.staticmethod()
971
1082
  elif "." in callee and isinstance(target, ast.Name):
972
- obj_name, method = callee.rsplit(".", 1)
973
- cls = self._resolve_var_type(obj_name, func_qname)
974
- if cls:
975
- method_qname = f"{cls}.{method}"
976
- ret_ann = self._return_types.get(method_qname)
977
- if ret_ann:
978
- ret_type = self._resolve_annotation(ret_ann)
979
- if ret_type:
980
- self._var_types[(func_qname, target.id)] = ret_type
981
- local_annotations[target.id] = ret_ann
1083
+ ret_ann = self._lookup_return_type(callee, func_qname)
1084
+ if ret_ann:
1085
+ ret_type = self._resolve_annotation(ret_ann)
1086
+ if ret_type:
1087
+ self._var_types[(func_qname, target.id)] = ret_type
1088
+ local_annotations[target.id] = ret_ann
982
1089
 
983
1090
  # Case C: assignment type propagation
984
1091
  # self.x = param or x = other_typed_var
@@ -1018,6 +1125,31 @@ class _TypeInferencer:
1018
1125
  if elem_type:
1019
1126
  self._var_types[(func_qname, gen.target.id)] = elem_type
1020
1127
 
1128
+ # Except-as variable typing:
1129
+ # `except AppError as e` -> e is typed as AppError
1130
+ elif isinstance(child, ast.ExceptHandler):
1131
+ if child.name and child.type:
1132
+ exc_name = _name_from_node(child.type)
1133
+ if exc_name:
1134
+ exc_class = self._type_index.get(exc_name)
1135
+ if exc_class:
1136
+ self._var_types[(func_qname, child.name)] = exc_class
1137
+
1138
+ # With / async-with as-variable typing:
1139
+ # `with X() as var` -> var's type is X.__enter__ return annotation
1140
+ # `async with X() as var` -> X.__aenter__ return annotation
1141
+ elif isinstance(child, (ast.With, ast.AsyncWith)):
1142
+ is_async = isinstance(child, ast.AsyncWith)
1143
+ for item in child.items:
1144
+ if item.optional_vars and isinstance(item.optional_vars, ast.Name):
1145
+ var_name = item.optional_vars.id
1146
+ cm_type = self._infer_context_manager_type(
1147
+ item.context_expr, func_qname, is_async,
1148
+ local_annotations, param_annotations,
1149
+ )
1150
+ if cm_type:
1151
+ self._var_types[(func_qname, var_name)] = cm_type
1152
+
1021
1153
  def _infer_iter_element_type(
1022
1154
  self,
1023
1155
  it: ast.AST,
@@ -1040,7 +1172,7 @@ class _TypeInferencer:
1040
1172
  if inner:
1041
1173
  return inner
1042
1174
 
1043
- # Case 2: for x in obj.method() -- resolve obj type, look up method return
1175
+ # Case 2a: for x in obj.method() -- resolve obj type, look up method return
1044
1176
  if isinstance(it, ast.Call):
1045
1177
  callee = _name_from_node(it.func)
1046
1178
  if callee and "." in callee:
@@ -1054,6 +1186,14 @@ class _TypeInferencer:
1054
1186
  if inner:
1055
1187
  return inner
1056
1188
 
1189
+ # Case 2b: for x in fn() -- standalone function with Generator[X] return
1190
+ if callee:
1191
+ ret_ann = self._lookup_return_type(callee, func_qname)
1192
+ if ret_ann:
1193
+ inner = self._resolve_subscript_inner(ret_ann)
1194
+ if inner:
1195
+ return inner
1196
+
1057
1197
  # Case 3: for x in obj.values() on dict[K, V]
1058
1198
  if isinstance(it, ast.Call) and isinstance(it.func, ast.Attribute):
1059
1199
  if it.func.attr == "values":
@@ -1068,6 +1208,75 @@ class _TypeInferencer:
1068
1208
 
1069
1209
  return None
1070
1210
 
1211
+ def _lookup_return_type(self, callee: str, caller_qname: str) -> ast.AST | None:
1212
+ """Look up a function's return type annotation by name.
1213
+
1214
+ Handles bare names (suffix match), qualified names (direct), and
1215
+ Class.method patterns (via type resolution).
1216
+ """
1217
+ # Direct qualified name match
1218
+ ret = self._return_types.get(callee)
1219
+ if ret:
1220
+ return ret
1221
+
1222
+ # Dotted: Class.method or obj.method
1223
+ if "." in callee:
1224
+ obj_name, method = callee.rsplit(".", 1)
1225
+ cls = self._resolve_var_type(obj_name, caller_qname)
1226
+ if not cls:
1227
+ cls = self._type_index.get(obj_name)
1228
+ if cls:
1229
+ ret = self._return_types.get(f"{cls}.{method}")
1230
+ if ret:
1231
+ return ret
1232
+
1233
+ # Bare name: suffix match against _return_types keys
1234
+ suffix = "." + callee
1235
+ for qname, ann in self._return_types.items():
1236
+ if qname == callee or qname.endswith(suffix):
1237
+ return ann
1238
+
1239
+ return None
1240
+
1241
+ def _infer_context_manager_type(
1242
+ self,
1243
+ ctx_expr: ast.AST,
1244
+ func_qname: str,
1245
+ is_async: bool,
1246
+ local_annotations: dict[str, ast.AST],
1247
+ param_annotations: dict[str, ast.AST],
1248
+ ) -> str | None:
1249
+ """Infer the type of the `as` variable in a with/async-with statement.
1250
+
1251
+ `with X() as var` -> var's type is X.__enter__ return annotation.
1252
+ `async with X() as var` -> X.__aenter__ return annotation.
1253
+ `with expr as var` where expr is a typed local -> same logic.
1254
+ """
1255
+ enter_method = "__aenter__" if is_async else "__enter__"
1256
+
1257
+ # Determine the context manager's class
1258
+ cm_class: str | None = None
1259
+
1260
+ if isinstance(ctx_expr, ast.Call):
1261
+ # `with SyncPool() as conn` or `with SyncPool(...) as conn`
1262
+ callee = _name_from_node(ctx_expr.func)
1263
+ if callee:
1264
+ cm_class = self._type_index.get(callee)
1265
+ elif isinstance(ctx_expr, ast.Name):
1266
+ # `with pool as conn` where pool is a typed variable
1267
+ cm_class = self._resolve_var_type(ctx_expr.id, func_qname)
1268
+
1269
+ if not cm_class:
1270
+ return None
1271
+
1272
+ # Look up __enter__/__aenter__ return annotation on the CM class
1273
+ enter_qname = f"{cm_class}.{enter_method}"
1274
+ ret_ann = self._return_types.get(enter_qname)
1275
+ if ret_ann:
1276
+ return self._resolve_annotation(ret_ann)
1277
+
1278
+ return None
1279
+
1071
1280
  def _resolve_var_type(self, name: str, func_qname: str) -> str | None:
1072
1281
  """Look up a variable's type, handling 'self'/'cls', dotted chains, and scope walking.
1073
1282
 
@@ -257,8 +257,13 @@ def get_rich_context(graph: CodeGraph, node: NodeData) -> dict:
257
257
  context["callers"] = [{"id": n.id, "name": n.name} for n in callers[:20]]
258
258
  context["callees"] = [{"id": n.id, "name": n.name} for n in callees[:20]]
259
259
 
260
- # Fingerprint
261
- context["fingerprint"] = node.metadata.get("fingerprint")
260
+ # Fingerprint — slim version (drop ast_tree, minhash, ast_node_counts)
261
+ fp = node.metadata.get("fingerprint")
262
+ if fp and isinstance(fp, dict):
263
+ _heavy = {"ast_tree", "minhash", "ast_node_counts", "source_snippet"}
264
+ context["fingerprint"] = {k: v for k, v in fp.items() if k not in _heavy}
265
+ else:
266
+ context["fingerprint"] = fp
262
267
 
263
268
  return context
264
269