codemap-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. analysis/__init__.py +1 -0
  2. analysis/architecture/__init__.py +1 -0
  3. analysis/architecture/architecture_engine.py +155 -0
  4. analysis/architecture/dependency_cycles.py +103 -0
  5. analysis/architecture/risk_radar.py +220 -0
  6. analysis/call_graph/__init__.py +1 -0
  7. analysis/call_graph/call_extractor.py +91 -0
  8. analysis/call_graph/call_graph_builder.py +1 -0
  9. analysis/call_graph/call_resolver.py +56 -0
  10. analysis/call_graph/context_models.py +1 -0
  11. analysis/call_graph/cross_file_resolver.py +122 -0
  12. analysis/call_graph/execution_tracker.py +1 -0
  13. analysis/call_graph/flow_builder.py +1 -0
  14. analysis/call_graph/models.py +1 -0
  15. analysis/core/__init__.py +1 -0
  16. analysis/core/ast_context.py +1 -0
  17. analysis/core/ast_parser.py +8 -0
  18. analysis/core/class_extractor.py +35 -0
  19. analysis/core/function_extractor.py +16 -0
  20. analysis/core/import_extractor.py +43 -0
  21. analysis/explain/__init__.py +1 -0
  22. analysis/explain/docstring_extractor.py +45 -0
  23. analysis/explain/explain_runner.py +177 -0
  24. analysis/explain/repo_summary_generator.py +138 -0
  25. analysis/explain/return_analyzer.py +114 -0
  26. analysis/explain/risk_flags.py +1 -0
  27. analysis/explain/signature_extractor.py +104 -0
  28. analysis/explain/summary_generator.py +282 -0
  29. analysis/graph/__init__.py +1 -0
  30. analysis/graph/callgraph_index.py +117 -0
  31. analysis/graph/entrypoint_detector.py +1 -0
  32. analysis/graph/impact_analyzer.py +210 -0
  33. analysis/indexing/__init__.py +1 -0
  34. analysis/indexing/import_resolver.py +156 -0
  35. analysis/indexing/symbol_index.py +150 -0
  36. analysis/runners/__init__.py +1 -0
  37. analysis/runners/phase4_runner.py +137 -0
  38. analysis/utils/__init__.py +1 -0
  39. analysis/utils/ast_helpers.py +1 -0
  40. analysis/utils/cache_manager.py +659 -0
  41. analysis/utils/path_resolver.py +1 -0
  42. analysis/utils/repo_fetcher.py +469 -0
  43. cli.py +1728 -0
  44. codemap_cli.py +11 -0
  45. codemap_python-0.1.0.dist-info/METADATA +399 -0
  46. codemap_python-0.1.0.dist-info/RECORD +58 -0
  47. codemap_python-0.1.0.dist-info/WHEEL +5 -0
  48. codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
  49. codemap_python-0.1.0.dist-info/top_level.txt +5 -0
  50. security_utils.py +51 -0
  51. ui/__init__.py +1 -0
  52. ui/app.py +2160 -0
  53. ui/device_id.py +27 -0
  54. ui/static/app.js +2703 -0
  55. ui/static/styles.css +1268 -0
  56. ui/templates/index.html +231 -0
  57. ui/utils/__init__.py +1 -0
  58. ui/utils/registry_manager.py +190 -0
@@ -0,0 +1,210 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from collections import defaultdict, deque
6
+ from typing import Any, Dict, List, Optional, Set, Tuple
7
+
8
+
9
+ def _load_json(path: str, default: Any) -> Any:
10
+ if not os.path.exists(path):
11
+ return default
12
+ try:
13
+ with open(path, "r", encoding="utf-8") as f:
14
+ return json.load(f)
15
+ except Exception:
16
+ return default
17
+
18
+
19
+ def load_resolved_calls(cache_dir: str) -> List[dict]:
20
+ data = _load_json(os.path.join(cache_dir, "resolved_calls.json"), [])
21
+ return data if isinstance(data, list) else []
22
+
23
+
24
+ def build_adjacency(resolved_calls: List[dict]) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]:
25
+ forward: Dict[str, Set[str]] = defaultdict(set)
26
+ backward: Dict[str, Set[str]] = defaultdict(set)
27
+ for row in resolved_calls:
28
+ if not isinstance(row, dict):
29
+ continue
30
+ caller = str(row.get("caller_fqn", "") or "")
31
+ callee = str(row.get("callee_fqn", "") or "")
32
+ if not caller or not callee:
33
+ continue
34
+ forward[caller].add(callee)
35
+ backward[callee].add(caller)
36
+ forward.setdefault(callee, set())
37
+ backward.setdefault(caller, set())
38
+ return forward, backward
39
+
40
+
41
+ def infer_repo_prefix(cache_dir: str) -> str:
42
+ arch = _load_json(os.path.join(cache_dir, "architecture_metrics.json"), {})
43
+ if isinstance(arch, dict):
44
+ prefix = str(arch.get("repo_prefix", "") or "")
45
+ if prefix:
46
+ return prefix
47
+ return ""
48
+
49
+
50
+ def resolve_target(target: str, repo_prefix: str, resolved_calls: List[dict], arch_metrics: Dict[str, Any]) -> Dict[str, Any]:
51
+ raw = str(target or "").strip()
52
+ if not raw:
53
+ return {"type": "symbol", "value": "", "start_nodes": []}
54
+
55
+ if "." in raw:
56
+ return {"type": "symbol", "value": raw, "start_nodes": [raw]}
57
+
58
+ match_suffix = raw.replace("\\", "/")
59
+ start_nodes: Set[str] = set()
60
+ for row in resolved_calls:
61
+ if not isinstance(row, dict):
62
+ continue
63
+ file_path = str(row.get("file", "") or "").replace("\\", "/")
64
+ if file_path.endswith(match_suffix):
65
+ caller = str(row.get("caller_fqn", "") or "")
66
+ if caller:
67
+ start_nodes.add(caller)
68
+
69
+ if not start_nodes and repo_prefix:
70
+ symbols = arch_metrics.get("symbols", {}) if isinstance(arch_metrics, dict) else {}
71
+ for fqn, meta in (symbols.items() if isinstance(symbols, dict) else []):
72
+ if not isinstance(meta, dict):
73
+ continue
74
+ loc = meta.get("location", {}) if isinstance(meta.get("location"), dict) else {}
75
+ file_path = str(loc.get("file", "") or "").replace("\\", "/")
76
+ if file_path.endswith(match_suffix):
77
+ start_nodes.add(str(fqn))
78
+
79
+ return {"type": "file", "value": raw, "start_nodes": sorted(start_nodes)}
80
+
81
+
82
+ def _node_details(
83
+ fqn: str,
84
+ symbols: Dict[str, Any],
85
+ resolved_calls: List[dict],
86
+ ) -> Dict[str, Any]:
87
+ sym = symbols.get(fqn, {}) if isinstance(symbols, dict) and isinstance(symbols.get(fqn), dict) else {}
88
+ fan_in = int(sym.get("fan_in", 0) or 0)
89
+ fan_out = int(sym.get("fan_out", 0) or 0)
90
+ loc = sym.get("location", {}) if isinstance(sym.get("location"), dict) else {}
91
+ file_path = str(loc.get("file", "") or "")
92
+ line = int(loc.get("start_line", 1) or 1)
93
+
94
+ if not file_path:
95
+ for row in resolved_calls:
96
+ if not isinstance(row, dict):
97
+ continue
98
+ if str(row.get("caller_fqn", "") or "") == fqn or str(row.get("callee_fqn", "") or "") == fqn:
99
+ file_path = str(row.get("file", "") or "")
100
+ line = int(row.get("line", 1) or 1)
101
+ break
102
+
103
+ return {
104
+ "fqn": fqn,
105
+ "fan_in": fan_in,
106
+ "fan_out": fan_out,
107
+ "file": file_path,
108
+ "line": line,
109
+ }
110
+
111
+
112
+ def _bfs(
113
+ starts: List[str],
114
+ adjacency: Dict[str, Set[str]],
115
+ depth: int,
116
+ max_nodes: int,
117
+ ) -> Tuple[Dict[str, int], List[Dict[str, str]], bool]:
118
+ visited_dist: Dict[str, int] = {}
119
+ edges: List[Dict[str, str]] = []
120
+ truncated = False
121
+
122
+ q: deque[Tuple[str, int]] = deque()
123
+ for s in starts:
124
+ q.append((s, 0))
125
+
126
+ while q:
127
+ node, dist = q.popleft()
128
+ if dist >= depth:
129
+ continue
130
+ neighbors = sorted(adjacency.get(node, set()))
131
+ for nxt in neighbors:
132
+ edges.append({"from": node, "to": nxt})
133
+ next_dist = dist + 1
134
+ prev = visited_dist.get(nxt)
135
+ if prev is None or next_dist < prev:
136
+ if len(visited_dist) >= max_nodes:
137
+ truncated = True
138
+ continue
139
+ visited_dist[nxt] = next_dist
140
+ q.append((nxt, next_dist))
141
+
142
+ return visited_dist, edges, truncated
143
+
144
+
145
+ def summarize_impacted_files(nodes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
146
+ counts: Dict[str, int] = defaultdict(int)
147
+ for n in nodes:
148
+ file_path = str(n.get("file", "") or "")
149
+ if file_path:
150
+ counts[file_path] += 1
151
+ rows = [{"file": f, "count": c} for f, c in counts.items()]
152
+ rows.sort(key=lambda r: (-int(r["count"]), r["file"]))
153
+ return rows
154
+
155
+
156
+ def compute_impact(cache_dir: str, target: str, depth: int = 2, max_nodes: int = 200) -> dict:
157
+ depth = max(1, int(depth))
158
+ max_nodes = max(10, int(max_nodes))
159
+
160
+ resolved_calls = load_resolved_calls(cache_dir)
161
+ arch = _load_json(os.path.join(cache_dir, "architecture_metrics.json"), {})
162
+ if not isinstance(arch, dict):
163
+ arch = {}
164
+
165
+ repo_prefix = infer_repo_prefix(cache_dir)
166
+ symbols = arch.get("symbols", {}) if isinstance(arch.get("symbols"), dict) else {}
167
+ forward, backward = build_adjacency(resolved_calls)
168
+
169
+ target_info = resolve_target(target=target, repo_prefix=repo_prefix, resolved_calls=resolved_calls, arch_metrics=arch)
170
+ starts = [str(s) for s in (target_info.get("start_nodes") or []) if str(s)]
171
+
172
+ up_map, up_edges_raw, up_trunc = _bfs(starts=starts, adjacency=backward, depth=depth, max_nodes=max_nodes)
173
+ down_map, down_edges_raw, down_trunc = _bfs(starts=starts, adjacency=forward, depth=depth, max_nodes=max_nodes)
174
+
175
+ upstream_nodes = []
176
+ for fqn, dist in sorted(up_map.items(), key=lambda kv: (kv[1], kv[0])):
177
+ row = _node_details(fqn, symbols, resolved_calls)
178
+ row["distance"] = int(dist)
179
+ upstream_nodes.append(row)
180
+
181
+ downstream_nodes = []
182
+ for fqn, dist in sorted(down_map.items(), key=lambda kv: (kv[1], kv[0])):
183
+ row = _node_details(fqn, symbols, resolved_calls)
184
+ row["distance"] = int(dist)
185
+ downstream_nodes.append(row)
186
+
187
+ upstream_edges = [{"from": e["to"], "to": e["from"]} for e in up_edges_raw]
188
+ downstream_edges = list(down_edges_raw)
189
+
190
+ return {
191
+ "ok": True,
192
+ "repo_prefix": repo_prefix,
193
+ "target": {"type": target_info.get("type", "symbol"), "value": str(target_info.get("value", "") or "")},
194
+ "depth": depth,
195
+ "max_nodes": max_nodes,
196
+ "upstream": {
197
+ "nodes": upstream_nodes,
198
+ "edges": upstream_edges,
199
+ "truncated": bool(up_trunc),
200
+ },
201
+ "downstream": {
202
+ "nodes": downstream_nodes,
203
+ "edges": downstream_edges,
204
+ "truncated": bool(down_trunc),
205
+ },
206
+ "impacted_files": {
207
+ "upstream": summarize_impacted_files(upstream_nodes),
208
+ "downstream": summarize_impacted_files(downstream_nodes),
209
+ },
210
+ }
@@ -0,0 +1 @@
1
+ # Indexing modules - Phase 4: Global knowledge layer
@@ -0,0 +1,156 @@
1
+ # Resolves imports → actual files
2
+ from dataclasses import dataclass
3
+ from typing import Dict, Optional, List
4
+
5
+ from analysis.indexing.symbol_index import SymbolIndex
6
+
7
+
8
+ # ----------------------------
9
+ # Data Model
10
+ # ----------------------------
11
+
12
+ @dataclass
13
+ class ResolvedImport:
14
+ """
15
+ Represents a resolved import in a single file.
16
+ """
17
+ alias: str # local name used in file
18
+ module: str # full module path
19
+ symbol: Optional[str] # imported symbol (None for plain import)
20
+
21
+
22
+ # ----------------------------
23
+ # Import Resolver
24
+ # ----------------------------
25
+
26
+ class ImportResolver:
27
+ """
28
+ Resolves raw import statements into fully-qualified references.
29
+ Also stores per-module resolved import maps for later lookup.
30
+ """
31
+
32
+ def __init__(self, symbol_index: SymbolIndex):
33
+ self.symbol_index = symbol_index
34
+ # ✅ NEW: cache { module_name -> { alias -> ResolvedImport } }
35
+ self._imports_by_module: Dict[str, Dict[str, ResolvedImport]] = {}
36
+
37
+ # ----------------------------
38
+ # Public API
39
+ # ----------------------------
40
+ def resolve_imports(
41
+ self,
42
+ imports: List[dict],
43
+ current_module: str
44
+ ) -> Dict[str, ResolvedImport]:
45
+ """
46
+ Resolves all imports for a single file.
47
+
48
+ Returns:
49
+ Dict[alias -> ResolvedImport]
50
+ """
51
+ resolved: Dict[str, ResolvedImport] = {}
52
+
53
+ for imp in imports:
54
+ if imp["type"] == "import":
55
+ self._handle_import(imp, resolved)
56
+
57
+ elif imp["type"] == "from_import":
58
+ self._handle_from_import(imp, current_module, resolved)
59
+
60
+ return resolved
61
+
62
+ def index_module_imports(self, module_name: str, imports: List[dict]) -> Dict[str, ResolvedImport]:
63
+ """
64
+ ✅ NEW: Resolve and store imports for a module.
65
+ This is what your runner should call once per file.
66
+ """
67
+ resolved = self.resolve_imports(imports, module_name)
68
+ self._imports_by_module[module_name] = resolved
69
+ return resolved
70
+
71
+ def get_imports(self, module_name: str) -> Dict[str, ResolvedImport]:
72
+ """
73
+ ✅ NEW: Fetch resolved import map for a module.
74
+ Returns empty dict if module was never indexed.
75
+ """
76
+ return self._imports_by_module.get(module_name, {})
77
+
78
+ def clear_module(self, module_name: str):
79
+ """
80
+ ✅ Optional helper: remove cached imports for one module.
81
+ Useful later for incremental indexing in VS Code.
82
+ """
83
+ if module_name in self._imports_by_module:
84
+ del self._imports_by_module[module_name]
85
+
86
+ def clear(self):
87
+ """
88
+ ✅ Optional helper: clears all cached imports.
89
+ """
90
+ self._imports_by_module.clear()
91
+
92
+ # ----------------------------
93
+ # Internal Helpers
94
+ # ----------------------------
95
+ def _handle_import(self, imp: dict, resolved: Dict[str, ResolvedImport]):
96
+ """
97
+ Handles: import a.b.c as x
98
+ """
99
+ module = imp["module"]
100
+ alias = imp["alias"] if imp.get("alias") else module.split(".")[-1]
101
+
102
+ resolved[alias] = ResolvedImport(
103
+ alias=alias,
104
+ module=module,
105
+ symbol=None
106
+ )
107
+
108
+ def _handle_from_import(
109
+ self,
110
+ imp: dict,
111
+ current_module: str,
112
+ resolved: Dict[str, ResolvedImport]
113
+ ):
114
+ """
115
+ Handles: from a.b import c as d
116
+ Also supports relative imports.
117
+ """
118
+ base_module = imp["module"]
119
+ name = imp["name"]
120
+ alias = imp["alias"] if imp.get("alias") else name
121
+ level = imp.get("level", 0)
122
+
123
+ # Resolve relative imports
124
+ if level > 0:
125
+ base_module = self._resolve_relative_module(
126
+ current_module=current_module,
127
+ level=level,
128
+ target_module=base_module
129
+ )
130
+
131
+ resolved[alias] = ResolvedImport(
132
+ alias=alias,
133
+ module=base_module,
134
+ symbol=name
135
+ )
136
+
137
+ def _resolve_relative_module(
138
+ self,
139
+ current_module: str,
140
+ level: int,
141
+ target_module: Optional[str]
142
+ ) -> str:
143
+ """
144
+ Resolves relative imports like:
145
+ from ..utils import helper
146
+ """
147
+ parts = current_module.split(".")
148
+
149
+ if level > len(parts):
150
+ return target_module or ""
151
+
152
+ base = parts[:-level]
153
+ if target_module:
154
+ return ".".join(base + [target_module])
155
+
156
+ return ".".join(base)
@@ -0,0 +1,150 @@
1
+ import ast
2
+ from dataclasses import dataclass, field
3
+ from enum import Enum
4
+ from typing import Dict, List, Optional, Tuple
5
+
6
+
7
+ class SymbolKind(Enum):
8
+ FUNCTION = "function"
9
+ CLASS = "class"
10
+ METHOD = "method"
11
+ MODULE = "module"
12
+ BUILTIN = "builtin"
13
+ EXTERNAL = "external"
14
+
15
+
16
+ @dataclass
17
+ class SymbolInfo:
18
+ name: str
19
+ qualified_name: str
20
+ kind: SymbolKind
21
+ module: str
22
+ file_path: str
23
+ start_line: int
24
+ end_line: int
25
+ class_name: Optional[str] = None
26
+ metadata: Dict = field(default_factory=dict)
27
+
28
+
29
+ class SymbolIndex:
30
+ """Global registry of all symbols across the codebase."""
31
+
32
+ def __init__(self):
33
+ self._symbols: List[SymbolInfo] = []
34
+ self._by_name: Dict[str, List[SymbolInfo]] = {}
35
+ self._by_fqn: Dict[Tuple[str, str], SymbolInfo] = {}
36
+
37
+ def add_symbol(self, symbol: SymbolInfo):
38
+ key = (symbol.module, symbol.qualified_name)
39
+ if key in self._by_fqn:
40
+ return
41
+ self._symbols.append(symbol)
42
+ self._by_fqn[key] = symbol
43
+ self._by_name.setdefault(symbol.name, []).append(symbol)
44
+
45
+ def index_file(self, ast_tree: ast.AST, module: str, file_path: str):
46
+ max_line = 1
47
+ for node in ast.walk(ast_tree):
48
+ ln = getattr(node, "lineno", None)
49
+ end_ln = getattr(node, "end_lineno", None)
50
+ if isinstance(ln, int):
51
+ max_line = max(max_line, ln)
52
+ if isinstance(end_ln, int):
53
+ max_line = max(max_line, end_ln)
54
+
55
+ self.add_symbol(
56
+ SymbolInfo(
57
+ name="<module>",
58
+ qualified_name="<module>",
59
+ kind=SymbolKind.MODULE,
60
+ module=module,
61
+ file_path=file_path,
62
+ start_line=1,
63
+ end_line=max_line,
64
+ )
65
+ )
66
+
67
+ for node in ast.walk(ast_tree):
68
+ if isinstance(node, ast.FunctionDef):
69
+ symbol = SymbolInfo(
70
+ name=node.name,
71
+ qualified_name=node.name,
72
+ kind=SymbolKind.FUNCTION,
73
+ module=module,
74
+ file_path=file_path,
75
+ start_line=int(getattr(node, "lineno", 1) or 1),
76
+ end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1)) or getattr(node, "lineno", 1)),
77
+ )
78
+ self.add_symbol(symbol)
79
+
80
+ elif isinstance(node, ast.ClassDef):
81
+ class_symbol = SymbolInfo(
82
+ name=node.name,
83
+ qualified_name=node.name,
84
+ kind=SymbolKind.CLASS,
85
+ module=module,
86
+ file_path=file_path,
87
+ start_line=int(getattr(node, "lineno", 1) or 1),
88
+ end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1)) or getattr(node, "lineno", 1)),
89
+ )
90
+ self.add_symbol(class_symbol)
91
+
92
+ for item in node.body:
93
+ if isinstance(item, ast.FunctionDef):
94
+ method_symbol = SymbolInfo(
95
+ name=item.name,
96
+ qualified_name=f"{node.name}.{item.name}",
97
+ kind=SymbolKind.METHOD,
98
+ module=module,
99
+ file_path=file_path,
100
+ start_line=int(getattr(item, "lineno", 1) or 1),
101
+ end_line=int(getattr(item, "end_lineno", getattr(item, "lineno", 1)) or getattr(item, "lineno", 1)),
102
+ class_name=node.name,
103
+ )
104
+ self.add_symbol(method_symbol)
105
+
106
+ def load_snapshot(self, snapshot: List[Dict]):
107
+ self.clear()
108
+ if not isinstance(snapshot, list):
109
+ return
110
+ for row in snapshot:
111
+ if not isinstance(row, dict):
112
+ continue
113
+ kind_raw = str(row.get("kind", "function") or "function").lower()
114
+ try:
115
+ kind = SymbolKind(kind_raw)
116
+ except Exception:
117
+ kind = SymbolKind.FUNCTION
118
+ sym = SymbolInfo(
119
+ name=str(row.get("name", "") or ""),
120
+ qualified_name=str(row.get("qualified_name", "") or ""),
121
+ kind=kind,
122
+ module=str(row.get("module", "") or ""),
123
+ file_path=str(row.get("file_path", "") or ""),
124
+ start_line=int(row.get("start_line", 1) or 1),
125
+ end_line=int(row.get("end_line", row.get("start_line", 1)) or row.get("start_line", 1)),
126
+ class_name=row.get("class_name"),
127
+ metadata=row.get("metadata", {}) if isinstance(row.get("metadata"), dict) else {},
128
+ )
129
+ if sym.module and sym.qualified_name:
130
+ self.add_symbol(sym)
131
+
132
+ def get_by_name(self, name: str) -> List[SymbolInfo]:
133
+ return self._by_name.get(name, [])
134
+
135
+ def get(self, module: str, qualified_name: str) -> Optional[SymbolInfo]:
136
+ return self._by_fqn.get((module, qualified_name))
137
+
138
+ def all_symbols(self) -> List[SymbolInfo]:
139
+ return list(self._symbols)
140
+
141
+ def remove_by_file(self, file_path: str):
142
+ keep = [s for s in self._symbols if s.file_path != file_path]
143
+ self.clear()
144
+ for s in keep:
145
+ self.add_symbol(s)
146
+
147
+ def clear(self):
148
+ self._symbols.clear()
149
+ self._by_name.clear()
150
+ self._by_fqn.clear()
@@ -0,0 +1 @@
1
+ # Phase controllers
@@ -0,0 +1,137 @@
1
+ # Orchestrates Phase-4 pipeline
2
+ from __future__ import annotations
3
+
4
+ from typing import Optional, Dict, Any, List
5
+
6
+ import os
7
+ import ast
8
+ import json
9
+ from analysis.indexing.symbol_index import SymbolIndex
10
+ from analysis.indexing.import_resolver import ImportResolver
11
+ from analysis.call_graph.cross_file_resolver import CrossFileResolver
12
+ from analysis.call_graph.call_extractor import extract_function_calls
13
+ from analysis.core.import_extractor import extract_imports
14
+ from analysis.graph.callgraph_index import build_caller_fqn
15
+
16
+
17
+ PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
18
+
19
+
20
+ def collect_python_files(root_dir: str) -> List[str]:
21
+ ignore_dirs = {".git", "__pycache__", ".codemap_cache", "node_modules", ".venv", "venv"}
22
+ py_files: List[str] = []
23
+ for root, dirs, files in os.walk(root_dir):
24
+ dirs[:] = [d for d in dirs if d not in ignore_dirs]
25
+ for file in files:
26
+ if file.endswith(".py") and not file.startswith("__"):
27
+ py_files.append(os.path.join(root, file))
28
+ return py_files
29
+
30
+
31
+ def parse_ast(file_path: str):
32
+ with open(file_path, "r", encoding="utf-8") as f:
33
+ return ast.parse(f.read())
34
+
35
+
36
+ def file_to_module(file_path: str, repo_root: str) -> str:
37
+ repo_root = os.path.abspath(repo_root)
38
+ file_path = os.path.abspath(file_path)
39
+
40
+ rel = os.path.relpath(file_path, repo_root).replace(os.sep, ".")
41
+ if rel.endswith(".py"):
42
+ rel = rel[:-3]
43
+
44
+ repo_name = os.path.basename(repo_root.rstrip("\\/"))
45
+ return f"{repo_name}.{rel}"
46
+
47
+
48
+ def _symbol_snapshot(symbol_index: SymbolIndex) -> List[Dict[str, Any]]:
49
+ rows: List[Dict[str, Any]] = []
50
+ for sym in symbol_index.all_symbols():
51
+ rows.append(
52
+ {
53
+ "name": str(sym.name),
54
+ "qualified_name": str(sym.qualified_name),
55
+ "kind": str(getattr(sym.kind, "value", str(sym.kind))),
56
+ "module": str(sym.module),
57
+ "file_path": str(sym.file_path),
58
+ "start_line": int(sym.start_line),
59
+ "end_line": int(sym.end_line),
60
+ "class_name": sym.class_name,
61
+ "metadata": dict(sym.metadata or {}),
62
+ }
63
+ )
64
+ return rows
65
+
66
+
67
+ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_rebuild: bool = False) -> Dict[str, Any]:
68
+ analysis_root = os.path.dirname(os.path.dirname(__file__))
69
+
70
+ if repo_dir is None:
71
+ repo_dir = os.path.join(analysis_root, "testing_repo")
72
+
73
+ if output_dir is None:
74
+ output_dir = os.path.join(analysis_root, "output")
75
+
76
+ os.makedirs(output_dir, exist_ok=True)
77
+
78
+ python_files = collect_python_files(repo_dir)
79
+ symbol_index = SymbolIndex()
80
+ file_module_map: Dict[str, str] = {}
81
+
82
+ for file_path in python_files:
83
+ module_path = file_to_module(file_path, repo_dir)
84
+ file_module_map[file_path] = module_path
85
+ tree = parse_ast(file_path)
86
+ symbol_index.index_file(tree, module_path, file_path)
87
+
88
+ import_resolver = ImportResolver(symbol_index)
89
+ for file_path in python_files:
90
+ module_path = file_module_map[file_path]
91
+ imports = extract_imports(file_path)
92
+ import_resolver.index_module_imports(module_path, imports)
93
+
94
+ all_calls = []
95
+ for file_path in python_files:
96
+ all_calls.extend(extract_function_calls(file_path))
97
+
98
+ cross_resolver = CrossFileResolver(symbol_index, import_resolver)
99
+ resolved_calls = []
100
+ for call in all_calls:
101
+ call_file = call.get("file")
102
+ current_module = file_module_map.get(call_file)
103
+ symbol = cross_resolver.resolve_call(call, current_module)
104
+ caller_fqn = build_caller_fqn(call, current_module)
105
+ callee_fqn = f"{symbol.module}.{symbol.qualified_name}" if symbol else None
106
+ resolved_calls.append({
107
+ **call,
108
+ "caller_fqn": caller_fqn,
109
+ "callee_fqn": callee_fqn,
110
+ "resolved_target": callee_fqn,
111
+ })
112
+
113
+ resolved_calls_path = os.path.join(output_dir, "resolved_calls.json")
114
+ with open(resolved_calls_path, "w", encoding="utf-8") as f:
115
+ json.dump(resolved_calls, f, indent=2)
116
+
117
+ return {
118
+ "resolved_calls_path": resolved_calls_path,
119
+ "total_calls": len(resolved_calls),
120
+ "incremental": False,
121
+ "reindexed_files": len(python_files),
122
+ "impacted_files": len(python_files),
123
+ "symbol_snapshot": _symbol_snapshot(symbol_index),
124
+ "imports_snapshot": {},
125
+ "file_module_map": file_module_map,
126
+ "force_rebuild": bool(force_rebuild),
127
+ }
128
+
129
+
130
+ def main():
131
+ result = run()
132
+ print(f"Saved: {result['resolved_calls_path']}")
133
+ print(f"Total calls: {result['total_calls']}")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
@@ -0,0 +1 @@
1
+ # Utils package
@@ -0,0 +1 @@
1
+ # AST name helpers