codemap-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +1 -0
- analysis/architecture/__init__.py +1 -0
- analysis/architecture/architecture_engine.py +155 -0
- analysis/architecture/dependency_cycles.py +103 -0
- analysis/architecture/risk_radar.py +220 -0
- analysis/call_graph/__init__.py +1 -0
- analysis/call_graph/call_extractor.py +91 -0
- analysis/call_graph/call_graph_builder.py +1 -0
- analysis/call_graph/call_resolver.py +56 -0
- analysis/call_graph/context_models.py +1 -0
- analysis/call_graph/cross_file_resolver.py +122 -0
- analysis/call_graph/execution_tracker.py +1 -0
- analysis/call_graph/flow_builder.py +1 -0
- analysis/call_graph/models.py +1 -0
- analysis/core/__init__.py +1 -0
- analysis/core/ast_context.py +1 -0
- analysis/core/ast_parser.py +8 -0
- analysis/core/class_extractor.py +35 -0
- analysis/core/function_extractor.py +16 -0
- analysis/core/import_extractor.py +43 -0
- analysis/explain/__init__.py +1 -0
- analysis/explain/docstring_extractor.py +45 -0
- analysis/explain/explain_runner.py +177 -0
- analysis/explain/repo_summary_generator.py +138 -0
- analysis/explain/return_analyzer.py +114 -0
- analysis/explain/risk_flags.py +1 -0
- analysis/explain/signature_extractor.py +104 -0
- analysis/explain/summary_generator.py +282 -0
- analysis/graph/__init__.py +1 -0
- analysis/graph/callgraph_index.py +117 -0
- analysis/graph/entrypoint_detector.py +1 -0
- analysis/graph/impact_analyzer.py +210 -0
- analysis/indexing/__init__.py +1 -0
- analysis/indexing/import_resolver.py +156 -0
- analysis/indexing/symbol_index.py +150 -0
- analysis/runners/__init__.py +1 -0
- analysis/runners/phase4_runner.py +137 -0
- analysis/utils/__init__.py +1 -0
- analysis/utils/ast_helpers.py +1 -0
- analysis/utils/cache_manager.py +659 -0
- analysis/utils/path_resolver.py +1 -0
- analysis/utils/repo_fetcher.py +469 -0
- cli.py +1728 -0
- codemap_cli.py +11 -0
- codemap_python-0.1.0.dist-info/METADATA +399 -0
- codemap_python-0.1.0.dist-info/RECORD +58 -0
- codemap_python-0.1.0.dist-info/WHEEL +5 -0
- codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
- codemap_python-0.1.0.dist-info/top_level.txt +5 -0
- security_utils.py +51 -0
- ui/__init__.py +1 -0
- ui/app.py +2160 -0
- ui/device_id.py +27 -0
- ui/static/app.js +2703 -0
- ui/static/styles.css +1268 -0
- ui/templates/index.html +231 -0
- ui/utils/__init__.py +1 -0
- ui/utils/registry_manager.py +190 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from collections import defaultdict, deque
|
|
6
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _load_json(path: str, default: Any) -> Any:
|
|
10
|
+
if not os.path.exists(path):
|
|
11
|
+
return default
|
|
12
|
+
try:
|
|
13
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
14
|
+
return json.load(f)
|
|
15
|
+
except Exception:
|
|
16
|
+
return default
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def load_resolved_calls(cache_dir: str) -> List[dict]:
|
|
20
|
+
data = _load_json(os.path.join(cache_dir, "resolved_calls.json"), [])
|
|
21
|
+
return data if isinstance(data, list) else []
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def build_adjacency(resolved_calls: List[dict]) -> Tuple[Dict[str, Set[str]], Dict[str, Set[str]]]:
|
|
25
|
+
forward: Dict[str, Set[str]] = defaultdict(set)
|
|
26
|
+
backward: Dict[str, Set[str]] = defaultdict(set)
|
|
27
|
+
for row in resolved_calls:
|
|
28
|
+
if not isinstance(row, dict):
|
|
29
|
+
continue
|
|
30
|
+
caller = str(row.get("caller_fqn", "") or "")
|
|
31
|
+
callee = str(row.get("callee_fqn", "") or "")
|
|
32
|
+
if not caller or not callee:
|
|
33
|
+
continue
|
|
34
|
+
forward[caller].add(callee)
|
|
35
|
+
backward[callee].add(caller)
|
|
36
|
+
forward.setdefault(callee, set())
|
|
37
|
+
backward.setdefault(caller, set())
|
|
38
|
+
return forward, backward
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def infer_repo_prefix(cache_dir: str) -> str:
|
|
42
|
+
arch = _load_json(os.path.join(cache_dir, "architecture_metrics.json"), {})
|
|
43
|
+
if isinstance(arch, dict):
|
|
44
|
+
prefix = str(arch.get("repo_prefix", "") or "")
|
|
45
|
+
if prefix:
|
|
46
|
+
return prefix
|
|
47
|
+
return ""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def resolve_target(target: str, repo_prefix: str, resolved_calls: List[dict], arch_metrics: Dict[str, Any]) -> Dict[str, Any]:
|
|
51
|
+
raw = str(target or "").strip()
|
|
52
|
+
if not raw:
|
|
53
|
+
return {"type": "symbol", "value": "", "start_nodes": []}
|
|
54
|
+
|
|
55
|
+
if "." in raw:
|
|
56
|
+
return {"type": "symbol", "value": raw, "start_nodes": [raw]}
|
|
57
|
+
|
|
58
|
+
match_suffix = raw.replace("\\", "/")
|
|
59
|
+
start_nodes: Set[str] = set()
|
|
60
|
+
for row in resolved_calls:
|
|
61
|
+
if not isinstance(row, dict):
|
|
62
|
+
continue
|
|
63
|
+
file_path = str(row.get("file", "") or "").replace("\\", "/")
|
|
64
|
+
if file_path.endswith(match_suffix):
|
|
65
|
+
caller = str(row.get("caller_fqn", "") or "")
|
|
66
|
+
if caller:
|
|
67
|
+
start_nodes.add(caller)
|
|
68
|
+
|
|
69
|
+
if not start_nodes and repo_prefix:
|
|
70
|
+
symbols = arch_metrics.get("symbols", {}) if isinstance(arch_metrics, dict) else {}
|
|
71
|
+
for fqn, meta in (symbols.items() if isinstance(symbols, dict) else []):
|
|
72
|
+
if not isinstance(meta, dict):
|
|
73
|
+
continue
|
|
74
|
+
loc = meta.get("location", {}) if isinstance(meta.get("location"), dict) else {}
|
|
75
|
+
file_path = str(loc.get("file", "") or "").replace("\\", "/")
|
|
76
|
+
if file_path.endswith(match_suffix):
|
|
77
|
+
start_nodes.add(str(fqn))
|
|
78
|
+
|
|
79
|
+
return {"type": "file", "value": raw, "start_nodes": sorted(start_nodes)}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _node_details(
|
|
83
|
+
fqn: str,
|
|
84
|
+
symbols: Dict[str, Any],
|
|
85
|
+
resolved_calls: List[dict],
|
|
86
|
+
) -> Dict[str, Any]:
|
|
87
|
+
sym = symbols.get(fqn, {}) if isinstance(symbols, dict) and isinstance(symbols.get(fqn), dict) else {}
|
|
88
|
+
fan_in = int(sym.get("fan_in", 0) or 0)
|
|
89
|
+
fan_out = int(sym.get("fan_out", 0) or 0)
|
|
90
|
+
loc = sym.get("location", {}) if isinstance(sym.get("location"), dict) else {}
|
|
91
|
+
file_path = str(loc.get("file", "") or "")
|
|
92
|
+
line = int(loc.get("start_line", 1) or 1)
|
|
93
|
+
|
|
94
|
+
if not file_path:
|
|
95
|
+
for row in resolved_calls:
|
|
96
|
+
if not isinstance(row, dict):
|
|
97
|
+
continue
|
|
98
|
+
if str(row.get("caller_fqn", "") or "") == fqn or str(row.get("callee_fqn", "") or "") == fqn:
|
|
99
|
+
file_path = str(row.get("file", "") or "")
|
|
100
|
+
line = int(row.get("line", 1) or 1)
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"fqn": fqn,
|
|
105
|
+
"fan_in": fan_in,
|
|
106
|
+
"fan_out": fan_out,
|
|
107
|
+
"file": file_path,
|
|
108
|
+
"line": line,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _bfs(
|
|
113
|
+
starts: List[str],
|
|
114
|
+
adjacency: Dict[str, Set[str]],
|
|
115
|
+
depth: int,
|
|
116
|
+
max_nodes: int,
|
|
117
|
+
) -> Tuple[Dict[str, int], List[Dict[str, str]], bool]:
|
|
118
|
+
visited_dist: Dict[str, int] = {}
|
|
119
|
+
edges: List[Dict[str, str]] = []
|
|
120
|
+
truncated = False
|
|
121
|
+
|
|
122
|
+
q: deque[Tuple[str, int]] = deque()
|
|
123
|
+
for s in starts:
|
|
124
|
+
q.append((s, 0))
|
|
125
|
+
|
|
126
|
+
while q:
|
|
127
|
+
node, dist = q.popleft()
|
|
128
|
+
if dist >= depth:
|
|
129
|
+
continue
|
|
130
|
+
neighbors = sorted(adjacency.get(node, set()))
|
|
131
|
+
for nxt in neighbors:
|
|
132
|
+
edges.append({"from": node, "to": nxt})
|
|
133
|
+
next_dist = dist + 1
|
|
134
|
+
prev = visited_dist.get(nxt)
|
|
135
|
+
if prev is None or next_dist < prev:
|
|
136
|
+
if len(visited_dist) >= max_nodes:
|
|
137
|
+
truncated = True
|
|
138
|
+
continue
|
|
139
|
+
visited_dist[nxt] = next_dist
|
|
140
|
+
q.append((nxt, next_dist))
|
|
141
|
+
|
|
142
|
+
return visited_dist, edges, truncated
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def summarize_impacted_files(nodes: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
146
|
+
counts: Dict[str, int] = defaultdict(int)
|
|
147
|
+
for n in nodes:
|
|
148
|
+
file_path = str(n.get("file", "") or "")
|
|
149
|
+
if file_path:
|
|
150
|
+
counts[file_path] += 1
|
|
151
|
+
rows = [{"file": f, "count": c} for f, c in counts.items()]
|
|
152
|
+
rows.sort(key=lambda r: (-int(r["count"]), r["file"]))
|
|
153
|
+
return rows
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def compute_impact(cache_dir: str, target: str, depth: int = 2, max_nodes: int = 200) -> dict:
|
|
157
|
+
depth = max(1, int(depth))
|
|
158
|
+
max_nodes = max(10, int(max_nodes))
|
|
159
|
+
|
|
160
|
+
resolved_calls = load_resolved_calls(cache_dir)
|
|
161
|
+
arch = _load_json(os.path.join(cache_dir, "architecture_metrics.json"), {})
|
|
162
|
+
if not isinstance(arch, dict):
|
|
163
|
+
arch = {}
|
|
164
|
+
|
|
165
|
+
repo_prefix = infer_repo_prefix(cache_dir)
|
|
166
|
+
symbols = arch.get("symbols", {}) if isinstance(arch.get("symbols"), dict) else {}
|
|
167
|
+
forward, backward = build_adjacency(resolved_calls)
|
|
168
|
+
|
|
169
|
+
target_info = resolve_target(target=target, repo_prefix=repo_prefix, resolved_calls=resolved_calls, arch_metrics=arch)
|
|
170
|
+
starts = [str(s) for s in (target_info.get("start_nodes") or []) if str(s)]
|
|
171
|
+
|
|
172
|
+
up_map, up_edges_raw, up_trunc = _bfs(starts=starts, adjacency=backward, depth=depth, max_nodes=max_nodes)
|
|
173
|
+
down_map, down_edges_raw, down_trunc = _bfs(starts=starts, adjacency=forward, depth=depth, max_nodes=max_nodes)
|
|
174
|
+
|
|
175
|
+
upstream_nodes = []
|
|
176
|
+
for fqn, dist in sorted(up_map.items(), key=lambda kv: (kv[1], kv[0])):
|
|
177
|
+
row = _node_details(fqn, symbols, resolved_calls)
|
|
178
|
+
row["distance"] = int(dist)
|
|
179
|
+
upstream_nodes.append(row)
|
|
180
|
+
|
|
181
|
+
downstream_nodes = []
|
|
182
|
+
for fqn, dist in sorted(down_map.items(), key=lambda kv: (kv[1], kv[0])):
|
|
183
|
+
row = _node_details(fqn, symbols, resolved_calls)
|
|
184
|
+
row["distance"] = int(dist)
|
|
185
|
+
downstream_nodes.append(row)
|
|
186
|
+
|
|
187
|
+
upstream_edges = [{"from": e["to"], "to": e["from"]} for e in up_edges_raw]
|
|
188
|
+
downstream_edges = list(down_edges_raw)
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
"ok": True,
|
|
192
|
+
"repo_prefix": repo_prefix,
|
|
193
|
+
"target": {"type": target_info.get("type", "symbol"), "value": str(target_info.get("value", "") or "")},
|
|
194
|
+
"depth": depth,
|
|
195
|
+
"max_nodes": max_nodes,
|
|
196
|
+
"upstream": {
|
|
197
|
+
"nodes": upstream_nodes,
|
|
198
|
+
"edges": upstream_edges,
|
|
199
|
+
"truncated": bool(up_trunc),
|
|
200
|
+
},
|
|
201
|
+
"downstream": {
|
|
202
|
+
"nodes": downstream_nodes,
|
|
203
|
+
"edges": downstream_edges,
|
|
204
|
+
"truncated": bool(down_trunc),
|
|
205
|
+
},
|
|
206
|
+
"impacted_files": {
|
|
207
|
+
"upstream": summarize_impacted_files(upstream_nodes),
|
|
208
|
+
"downstream": summarize_impacted_files(downstream_nodes),
|
|
209
|
+
},
|
|
210
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Indexing modules - Phase 4: Global knowledge layer
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# Resolves imports → actual files
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Dict, Optional, List
|
|
4
|
+
|
|
5
|
+
from analysis.indexing.symbol_index import SymbolIndex
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# ----------------------------
|
|
9
|
+
# Data Model
|
|
10
|
+
# ----------------------------
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ResolvedImport:
|
|
14
|
+
"""
|
|
15
|
+
Represents a resolved import in a single file.
|
|
16
|
+
"""
|
|
17
|
+
alias: str # local name used in file
|
|
18
|
+
module: str # full module path
|
|
19
|
+
symbol: Optional[str] # imported symbol (None for plain import)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# ----------------------------
|
|
23
|
+
# Import Resolver
|
|
24
|
+
# ----------------------------
|
|
25
|
+
|
|
26
|
+
class ImportResolver:
|
|
27
|
+
"""
|
|
28
|
+
Resolves raw import statements into fully-qualified references.
|
|
29
|
+
Also stores per-module resolved import maps for later lookup.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, symbol_index: SymbolIndex):
|
|
33
|
+
self.symbol_index = symbol_index
|
|
34
|
+
# ✅ NEW: cache { module_name -> { alias -> ResolvedImport } }
|
|
35
|
+
self._imports_by_module: Dict[str, Dict[str, ResolvedImport]] = {}
|
|
36
|
+
|
|
37
|
+
# ----------------------------
|
|
38
|
+
# Public API
|
|
39
|
+
# ----------------------------
|
|
40
|
+
def resolve_imports(
|
|
41
|
+
self,
|
|
42
|
+
imports: List[dict],
|
|
43
|
+
current_module: str
|
|
44
|
+
) -> Dict[str, ResolvedImport]:
|
|
45
|
+
"""
|
|
46
|
+
Resolves all imports for a single file.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Dict[alias -> ResolvedImport]
|
|
50
|
+
"""
|
|
51
|
+
resolved: Dict[str, ResolvedImport] = {}
|
|
52
|
+
|
|
53
|
+
for imp in imports:
|
|
54
|
+
if imp["type"] == "import":
|
|
55
|
+
self._handle_import(imp, resolved)
|
|
56
|
+
|
|
57
|
+
elif imp["type"] == "from_import":
|
|
58
|
+
self._handle_from_import(imp, current_module, resolved)
|
|
59
|
+
|
|
60
|
+
return resolved
|
|
61
|
+
|
|
62
|
+
def index_module_imports(self, module_name: str, imports: List[dict]) -> Dict[str, ResolvedImport]:
|
|
63
|
+
"""
|
|
64
|
+
✅ NEW: Resolve and store imports for a module.
|
|
65
|
+
This is what your runner should call once per file.
|
|
66
|
+
"""
|
|
67
|
+
resolved = self.resolve_imports(imports, module_name)
|
|
68
|
+
self._imports_by_module[module_name] = resolved
|
|
69
|
+
return resolved
|
|
70
|
+
|
|
71
|
+
def get_imports(self, module_name: str) -> Dict[str, ResolvedImport]:
|
|
72
|
+
"""
|
|
73
|
+
✅ NEW: Fetch resolved import map for a module.
|
|
74
|
+
Returns empty dict if module was never indexed.
|
|
75
|
+
"""
|
|
76
|
+
return self._imports_by_module.get(module_name, {})
|
|
77
|
+
|
|
78
|
+
def clear_module(self, module_name: str):
|
|
79
|
+
"""
|
|
80
|
+
✅ Optional helper: remove cached imports for one module.
|
|
81
|
+
Useful later for incremental indexing in VS Code.
|
|
82
|
+
"""
|
|
83
|
+
if module_name in self._imports_by_module:
|
|
84
|
+
del self._imports_by_module[module_name]
|
|
85
|
+
|
|
86
|
+
def clear(self):
|
|
87
|
+
"""
|
|
88
|
+
✅ Optional helper: clears all cached imports.
|
|
89
|
+
"""
|
|
90
|
+
self._imports_by_module.clear()
|
|
91
|
+
|
|
92
|
+
# ----------------------------
|
|
93
|
+
# Internal Helpers
|
|
94
|
+
# ----------------------------
|
|
95
|
+
def _handle_import(self, imp: dict, resolved: Dict[str, ResolvedImport]):
|
|
96
|
+
"""
|
|
97
|
+
Handles: import a.b.c as x
|
|
98
|
+
"""
|
|
99
|
+
module = imp["module"]
|
|
100
|
+
alias = imp["alias"] if imp.get("alias") else module.split(".")[-1]
|
|
101
|
+
|
|
102
|
+
resolved[alias] = ResolvedImport(
|
|
103
|
+
alias=alias,
|
|
104
|
+
module=module,
|
|
105
|
+
symbol=None
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _handle_from_import(
|
|
109
|
+
self,
|
|
110
|
+
imp: dict,
|
|
111
|
+
current_module: str,
|
|
112
|
+
resolved: Dict[str, ResolvedImport]
|
|
113
|
+
):
|
|
114
|
+
"""
|
|
115
|
+
Handles: from a.b import c as d
|
|
116
|
+
Also supports relative imports.
|
|
117
|
+
"""
|
|
118
|
+
base_module = imp["module"]
|
|
119
|
+
name = imp["name"]
|
|
120
|
+
alias = imp["alias"] if imp.get("alias") else name
|
|
121
|
+
level = imp.get("level", 0)
|
|
122
|
+
|
|
123
|
+
# Resolve relative imports
|
|
124
|
+
if level > 0:
|
|
125
|
+
base_module = self._resolve_relative_module(
|
|
126
|
+
current_module=current_module,
|
|
127
|
+
level=level,
|
|
128
|
+
target_module=base_module
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
resolved[alias] = ResolvedImport(
|
|
132
|
+
alias=alias,
|
|
133
|
+
module=base_module,
|
|
134
|
+
symbol=name
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
def _resolve_relative_module(
|
|
138
|
+
self,
|
|
139
|
+
current_module: str,
|
|
140
|
+
level: int,
|
|
141
|
+
target_module: Optional[str]
|
|
142
|
+
) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Resolves relative imports like:
|
|
145
|
+
from ..utils import helper
|
|
146
|
+
"""
|
|
147
|
+
parts = current_module.split(".")
|
|
148
|
+
|
|
149
|
+
if level > len(parts):
|
|
150
|
+
return target_module or ""
|
|
151
|
+
|
|
152
|
+
base = parts[:-level]
|
|
153
|
+
if target_module:
|
|
154
|
+
return ".".join(base + [target_module])
|
|
155
|
+
|
|
156
|
+
return ".".join(base)
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import ast
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SymbolKind(Enum):
|
|
8
|
+
FUNCTION = "function"
|
|
9
|
+
CLASS = "class"
|
|
10
|
+
METHOD = "method"
|
|
11
|
+
MODULE = "module"
|
|
12
|
+
BUILTIN = "builtin"
|
|
13
|
+
EXTERNAL = "external"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class SymbolInfo:
|
|
18
|
+
name: str
|
|
19
|
+
qualified_name: str
|
|
20
|
+
kind: SymbolKind
|
|
21
|
+
module: str
|
|
22
|
+
file_path: str
|
|
23
|
+
start_line: int
|
|
24
|
+
end_line: int
|
|
25
|
+
class_name: Optional[str] = None
|
|
26
|
+
metadata: Dict = field(default_factory=dict)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SymbolIndex:
|
|
30
|
+
"""Global registry of all symbols across the codebase."""
|
|
31
|
+
|
|
32
|
+
def __init__(self):
|
|
33
|
+
self._symbols: List[SymbolInfo] = []
|
|
34
|
+
self._by_name: Dict[str, List[SymbolInfo]] = {}
|
|
35
|
+
self._by_fqn: Dict[Tuple[str, str], SymbolInfo] = {}
|
|
36
|
+
|
|
37
|
+
def add_symbol(self, symbol: SymbolInfo):
|
|
38
|
+
key = (symbol.module, symbol.qualified_name)
|
|
39
|
+
if key in self._by_fqn:
|
|
40
|
+
return
|
|
41
|
+
self._symbols.append(symbol)
|
|
42
|
+
self._by_fqn[key] = symbol
|
|
43
|
+
self._by_name.setdefault(symbol.name, []).append(symbol)
|
|
44
|
+
|
|
45
|
+
def index_file(self, ast_tree: ast.AST, module: str, file_path: str):
|
|
46
|
+
max_line = 1
|
|
47
|
+
for node in ast.walk(ast_tree):
|
|
48
|
+
ln = getattr(node, "lineno", None)
|
|
49
|
+
end_ln = getattr(node, "end_lineno", None)
|
|
50
|
+
if isinstance(ln, int):
|
|
51
|
+
max_line = max(max_line, ln)
|
|
52
|
+
if isinstance(end_ln, int):
|
|
53
|
+
max_line = max(max_line, end_ln)
|
|
54
|
+
|
|
55
|
+
self.add_symbol(
|
|
56
|
+
SymbolInfo(
|
|
57
|
+
name="<module>",
|
|
58
|
+
qualified_name="<module>",
|
|
59
|
+
kind=SymbolKind.MODULE,
|
|
60
|
+
module=module,
|
|
61
|
+
file_path=file_path,
|
|
62
|
+
start_line=1,
|
|
63
|
+
end_line=max_line,
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
for node in ast.walk(ast_tree):
|
|
68
|
+
if isinstance(node, ast.FunctionDef):
|
|
69
|
+
symbol = SymbolInfo(
|
|
70
|
+
name=node.name,
|
|
71
|
+
qualified_name=node.name,
|
|
72
|
+
kind=SymbolKind.FUNCTION,
|
|
73
|
+
module=module,
|
|
74
|
+
file_path=file_path,
|
|
75
|
+
start_line=int(getattr(node, "lineno", 1) or 1),
|
|
76
|
+
end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1)) or getattr(node, "lineno", 1)),
|
|
77
|
+
)
|
|
78
|
+
self.add_symbol(symbol)
|
|
79
|
+
|
|
80
|
+
elif isinstance(node, ast.ClassDef):
|
|
81
|
+
class_symbol = SymbolInfo(
|
|
82
|
+
name=node.name,
|
|
83
|
+
qualified_name=node.name,
|
|
84
|
+
kind=SymbolKind.CLASS,
|
|
85
|
+
module=module,
|
|
86
|
+
file_path=file_path,
|
|
87
|
+
start_line=int(getattr(node, "lineno", 1) or 1),
|
|
88
|
+
end_line=int(getattr(node, "end_lineno", getattr(node, "lineno", 1)) or getattr(node, "lineno", 1)),
|
|
89
|
+
)
|
|
90
|
+
self.add_symbol(class_symbol)
|
|
91
|
+
|
|
92
|
+
for item in node.body:
|
|
93
|
+
if isinstance(item, ast.FunctionDef):
|
|
94
|
+
method_symbol = SymbolInfo(
|
|
95
|
+
name=item.name,
|
|
96
|
+
qualified_name=f"{node.name}.{item.name}",
|
|
97
|
+
kind=SymbolKind.METHOD,
|
|
98
|
+
module=module,
|
|
99
|
+
file_path=file_path,
|
|
100
|
+
start_line=int(getattr(item, "lineno", 1) or 1),
|
|
101
|
+
end_line=int(getattr(item, "end_lineno", getattr(item, "lineno", 1)) or getattr(item, "lineno", 1)),
|
|
102
|
+
class_name=node.name,
|
|
103
|
+
)
|
|
104
|
+
self.add_symbol(method_symbol)
|
|
105
|
+
|
|
106
|
+
def load_snapshot(self, snapshot: List[Dict]):
|
|
107
|
+
self.clear()
|
|
108
|
+
if not isinstance(snapshot, list):
|
|
109
|
+
return
|
|
110
|
+
for row in snapshot:
|
|
111
|
+
if not isinstance(row, dict):
|
|
112
|
+
continue
|
|
113
|
+
kind_raw = str(row.get("kind", "function") or "function").lower()
|
|
114
|
+
try:
|
|
115
|
+
kind = SymbolKind(kind_raw)
|
|
116
|
+
except Exception:
|
|
117
|
+
kind = SymbolKind.FUNCTION
|
|
118
|
+
sym = SymbolInfo(
|
|
119
|
+
name=str(row.get("name", "") or ""),
|
|
120
|
+
qualified_name=str(row.get("qualified_name", "") or ""),
|
|
121
|
+
kind=kind,
|
|
122
|
+
module=str(row.get("module", "") or ""),
|
|
123
|
+
file_path=str(row.get("file_path", "") or ""),
|
|
124
|
+
start_line=int(row.get("start_line", 1) or 1),
|
|
125
|
+
end_line=int(row.get("end_line", row.get("start_line", 1)) or row.get("start_line", 1)),
|
|
126
|
+
class_name=row.get("class_name"),
|
|
127
|
+
metadata=row.get("metadata", {}) if isinstance(row.get("metadata"), dict) else {},
|
|
128
|
+
)
|
|
129
|
+
if sym.module and sym.qualified_name:
|
|
130
|
+
self.add_symbol(sym)
|
|
131
|
+
|
|
132
|
+
def get_by_name(self, name: str) -> List[SymbolInfo]:
|
|
133
|
+
return self._by_name.get(name, [])
|
|
134
|
+
|
|
135
|
+
def get(self, module: str, qualified_name: str) -> Optional[SymbolInfo]:
|
|
136
|
+
return self._by_fqn.get((module, qualified_name))
|
|
137
|
+
|
|
138
|
+
def all_symbols(self) -> List[SymbolInfo]:
|
|
139
|
+
return list(self._symbols)
|
|
140
|
+
|
|
141
|
+
def remove_by_file(self, file_path: str):
|
|
142
|
+
keep = [s for s in self._symbols if s.file_path != file_path]
|
|
143
|
+
self.clear()
|
|
144
|
+
for s in keep:
|
|
145
|
+
self.add_symbol(s)
|
|
146
|
+
|
|
147
|
+
def clear(self):
|
|
148
|
+
self._symbols.clear()
|
|
149
|
+
self._by_name.clear()
|
|
150
|
+
self._by_fqn.clear()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Phase controllers
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Orchestrates Phase-4 pipeline
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import Optional, Dict, Any, List
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import ast
|
|
8
|
+
import json
|
|
9
|
+
from analysis.indexing.symbol_index import SymbolIndex
|
|
10
|
+
from analysis.indexing.import_resolver import ImportResolver
|
|
11
|
+
from analysis.call_graph.cross_file_resolver import CrossFileResolver
|
|
12
|
+
from analysis.call_graph.call_extractor import extract_function_calls
|
|
13
|
+
from analysis.core.import_extractor import extract_imports
|
|
14
|
+
from analysis.graph.callgraph_index import build_caller_fqn
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def collect_python_files(root_dir: str) -> List[str]:
|
|
21
|
+
ignore_dirs = {".git", "__pycache__", ".codemap_cache", "node_modules", ".venv", "venv"}
|
|
22
|
+
py_files: List[str] = []
|
|
23
|
+
for root, dirs, files in os.walk(root_dir):
|
|
24
|
+
dirs[:] = [d for d in dirs if d not in ignore_dirs]
|
|
25
|
+
for file in files:
|
|
26
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
27
|
+
py_files.append(os.path.join(root, file))
|
|
28
|
+
return py_files
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def parse_ast(file_path: str):
|
|
32
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
33
|
+
return ast.parse(f.read())
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def file_to_module(file_path: str, repo_root: str) -> str:
|
|
37
|
+
repo_root = os.path.abspath(repo_root)
|
|
38
|
+
file_path = os.path.abspath(file_path)
|
|
39
|
+
|
|
40
|
+
rel = os.path.relpath(file_path, repo_root).replace(os.sep, ".")
|
|
41
|
+
if rel.endswith(".py"):
|
|
42
|
+
rel = rel[:-3]
|
|
43
|
+
|
|
44
|
+
repo_name = os.path.basename(repo_root.rstrip("\\/"))
|
|
45
|
+
return f"{repo_name}.{rel}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _symbol_snapshot(symbol_index: SymbolIndex) -> List[Dict[str, Any]]:
|
|
49
|
+
rows: List[Dict[str, Any]] = []
|
|
50
|
+
for sym in symbol_index.all_symbols():
|
|
51
|
+
rows.append(
|
|
52
|
+
{
|
|
53
|
+
"name": str(sym.name),
|
|
54
|
+
"qualified_name": str(sym.qualified_name),
|
|
55
|
+
"kind": str(getattr(sym.kind, "value", str(sym.kind))),
|
|
56
|
+
"module": str(sym.module),
|
|
57
|
+
"file_path": str(sym.file_path),
|
|
58
|
+
"start_line": int(sym.start_line),
|
|
59
|
+
"end_line": int(sym.end_line),
|
|
60
|
+
"class_name": sym.class_name,
|
|
61
|
+
"metadata": dict(sym.metadata or {}),
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
return rows
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_rebuild: bool = False) -> Dict[str, Any]:
|
|
68
|
+
analysis_root = os.path.dirname(os.path.dirname(__file__))
|
|
69
|
+
|
|
70
|
+
if repo_dir is None:
|
|
71
|
+
repo_dir = os.path.join(analysis_root, "testing_repo")
|
|
72
|
+
|
|
73
|
+
if output_dir is None:
|
|
74
|
+
output_dir = os.path.join(analysis_root, "output")
|
|
75
|
+
|
|
76
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
77
|
+
|
|
78
|
+
python_files = collect_python_files(repo_dir)
|
|
79
|
+
symbol_index = SymbolIndex()
|
|
80
|
+
file_module_map: Dict[str, str] = {}
|
|
81
|
+
|
|
82
|
+
for file_path in python_files:
|
|
83
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
84
|
+
file_module_map[file_path] = module_path
|
|
85
|
+
tree = parse_ast(file_path)
|
|
86
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
87
|
+
|
|
88
|
+
import_resolver = ImportResolver(symbol_index)
|
|
89
|
+
for file_path in python_files:
|
|
90
|
+
module_path = file_module_map[file_path]
|
|
91
|
+
imports = extract_imports(file_path)
|
|
92
|
+
import_resolver.index_module_imports(module_path, imports)
|
|
93
|
+
|
|
94
|
+
all_calls = []
|
|
95
|
+
for file_path in python_files:
|
|
96
|
+
all_calls.extend(extract_function_calls(file_path))
|
|
97
|
+
|
|
98
|
+
cross_resolver = CrossFileResolver(symbol_index, import_resolver)
|
|
99
|
+
resolved_calls = []
|
|
100
|
+
for call in all_calls:
|
|
101
|
+
call_file = call.get("file")
|
|
102
|
+
current_module = file_module_map.get(call_file)
|
|
103
|
+
symbol = cross_resolver.resolve_call(call, current_module)
|
|
104
|
+
caller_fqn = build_caller_fqn(call, current_module)
|
|
105
|
+
callee_fqn = f"{symbol.module}.{symbol.qualified_name}" if symbol else None
|
|
106
|
+
resolved_calls.append({
|
|
107
|
+
**call,
|
|
108
|
+
"caller_fqn": caller_fqn,
|
|
109
|
+
"callee_fqn": callee_fqn,
|
|
110
|
+
"resolved_target": callee_fqn,
|
|
111
|
+
})
|
|
112
|
+
|
|
113
|
+
resolved_calls_path = os.path.join(output_dir, "resolved_calls.json")
|
|
114
|
+
with open(resolved_calls_path, "w", encoding="utf-8") as f:
|
|
115
|
+
json.dump(resolved_calls, f, indent=2)
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
"resolved_calls_path": resolved_calls_path,
|
|
119
|
+
"total_calls": len(resolved_calls),
|
|
120
|
+
"incremental": False,
|
|
121
|
+
"reindexed_files": len(python_files),
|
|
122
|
+
"impacted_files": len(python_files),
|
|
123
|
+
"symbol_snapshot": _symbol_snapshot(symbol_index),
|
|
124
|
+
"imports_snapshot": {},
|
|
125
|
+
"file_module_map": file_module_map,
|
|
126
|
+
"force_rebuild": bool(force_rebuild),
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def main():
|
|
131
|
+
result = run()
|
|
132
|
+
print(f"Saved: {result['resolved_calls_path']}")
|
|
133
|
+
print(f"Total calls: {result['total_calls']}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if __name__ == "__main__":
|
|
137
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Utils package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# AST name helpers
|