codemap-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +1 -0
- analysis/architecture/__init__.py +1 -0
- analysis/architecture/architecture_engine.py +155 -0
- analysis/architecture/dependency_cycles.py +103 -0
- analysis/architecture/risk_radar.py +220 -0
- analysis/call_graph/__init__.py +1 -0
- analysis/call_graph/call_extractor.py +91 -0
- analysis/call_graph/call_graph_builder.py +1 -0
- analysis/call_graph/call_resolver.py +56 -0
- analysis/call_graph/context_models.py +1 -0
- analysis/call_graph/cross_file_resolver.py +122 -0
- analysis/call_graph/execution_tracker.py +1 -0
- analysis/call_graph/flow_builder.py +1 -0
- analysis/call_graph/models.py +1 -0
- analysis/core/__init__.py +1 -0
- analysis/core/ast_context.py +1 -0
- analysis/core/ast_parser.py +8 -0
- analysis/core/class_extractor.py +35 -0
- analysis/core/function_extractor.py +16 -0
- analysis/core/import_extractor.py +43 -0
- analysis/explain/__init__.py +1 -0
- analysis/explain/docstring_extractor.py +45 -0
- analysis/explain/explain_runner.py +177 -0
- analysis/explain/repo_summary_generator.py +138 -0
- analysis/explain/return_analyzer.py +114 -0
- analysis/explain/risk_flags.py +1 -0
- analysis/explain/signature_extractor.py +104 -0
- analysis/explain/summary_generator.py +282 -0
- analysis/graph/__init__.py +1 -0
- analysis/graph/callgraph_index.py +117 -0
- analysis/graph/entrypoint_detector.py +1 -0
- analysis/graph/impact_analyzer.py +210 -0
- analysis/indexing/__init__.py +1 -0
- analysis/indexing/import_resolver.py +156 -0
- analysis/indexing/symbol_index.py +150 -0
- analysis/runners/__init__.py +1 -0
- analysis/runners/phase4_runner.py +137 -0
- analysis/utils/__init__.py +1 -0
- analysis/utils/ast_helpers.py +1 -0
- analysis/utils/cache_manager.py +659 -0
- analysis/utils/path_resolver.py +1 -0
- analysis/utils/repo_fetcher.py +469 -0
- cli.py +1728 -0
- codemap_cli.py +11 -0
- codemap_python-0.1.0.dist-info/METADATA +399 -0
- codemap_python-0.1.0.dist-info/RECORD +58 -0
- codemap_python-0.1.0.dist-info/WHEEL +5 -0
- codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
- codemap_python-0.1.0.dist-info/top_level.txt +5 -0
- security_utils.py +51 -0
- ui/__init__.py +1 -0
- ui/app.py +2160 -0
- ui/device_id.py +27 -0
- ui/static/app.js +2703 -0
- ui/static/styles.css +1268 -0
- ui/templates/index.html +231 -0
- ui/utils/__init__.py +1 -0
- ui/utils/registry_manager.py +190 -0
analysis/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Analysis package
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
ο»Ώ
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
ο»Ώfrom __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Dict, Any, Optional, Set
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _kind_for_fqn(fqn: str, repo_prefix: str) -> str:
|
|
8
|
+
value = str(fqn or "")
|
|
9
|
+
if value.startswith("builtins."):
|
|
10
|
+
return "builtin"
|
|
11
|
+
if repo_prefix and (value == repo_prefix or value.startswith(repo_prefix + ".")):
|
|
12
|
+
return "local"
|
|
13
|
+
return "external"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _symbol_locations(symbol_index) -> Dict[str, Dict[str, Any]]:
|
|
17
|
+
out: Dict[str, Dict[str, Any]] = {}
|
|
18
|
+
if symbol_index is None:
|
|
19
|
+
return out
|
|
20
|
+
try:
|
|
21
|
+
symbols = symbol_index.all_symbols()
|
|
22
|
+
except Exception:
|
|
23
|
+
return out
|
|
24
|
+
|
|
25
|
+
for sym in symbols:
|
|
26
|
+
try:
|
|
27
|
+
fqn = f"{sym.module}.{sym.qualified_name}"
|
|
28
|
+
out[fqn] = {
|
|
29
|
+
"file": str(getattr(sym, "file_path", "") or ""),
|
|
30
|
+
"start_line": int(getattr(sym, "start_line", 1) or 1),
|
|
31
|
+
"end_line": int(getattr(sym, "end_line", getattr(sym, "start_line", 1)) or 1),
|
|
32
|
+
}
|
|
33
|
+
except Exception:
|
|
34
|
+
continue
|
|
35
|
+
return out
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _infer_repo_prefix(nodes: Set[str]) -> str:
|
|
39
|
+
counts: Dict[str, int] = defaultdict(int)
|
|
40
|
+
for fqn in nodes:
|
|
41
|
+
s = str(fqn or "")
|
|
42
|
+
if not s or s.startswith("builtins."):
|
|
43
|
+
continue
|
|
44
|
+
first = s.split(".", 1)[0]
|
|
45
|
+
if first:
|
|
46
|
+
counts[first] += 1
|
|
47
|
+
if not counts:
|
|
48
|
+
return ""
|
|
49
|
+
return sorted(counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def compute_architecture_metrics(
|
|
53
|
+
callgraph,
|
|
54
|
+
symbol_index,
|
|
55
|
+
repo_prefix: Optional[str] = None,
|
|
56
|
+
top_k: int = 25,
|
|
57
|
+
fanout_threshold: int = 10,
|
|
58
|
+
fanin_threshold: int = 10,
|
|
59
|
+
) -> dict:
|
|
60
|
+
forward: Dict[str, Set[str]] = defaultdict(set)
|
|
61
|
+
reverse: Dict[str, Set[str]] = defaultdict(set)
|
|
62
|
+
all_nodes: Set[str] = set()
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
callers = callgraph.all_callers()
|
|
66
|
+
except Exception:
|
|
67
|
+
callers = []
|
|
68
|
+
|
|
69
|
+
for caller in callers:
|
|
70
|
+
s = str(caller or "")
|
|
71
|
+
all_nodes.add(s)
|
|
72
|
+
for cs in callgraph.callees_of(s):
|
|
73
|
+
callee = str(getattr(cs, "callee_fqn", "") or "")
|
|
74
|
+
if not callee:
|
|
75
|
+
continue
|
|
76
|
+
forward[s].add(callee)
|
|
77
|
+
reverse[callee].add(s)
|
|
78
|
+
all_nodes.add(callee)
|
|
79
|
+
|
|
80
|
+
locations = _symbol_locations(symbol_index)
|
|
81
|
+
all_nodes.update(locations.keys())
|
|
82
|
+
|
|
83
|
+
prefix = str(repo_prefix or "").strip() or _infer_repo_prefix(all_nodes)
|
|
84
|
+
|
|
85
|
+
symbols_payload: Dict[str, Dict[str, Any]] = {}
|
|
86
|
+
for fqn in sorted(all_nodes):
|
|
87
|
+
loc = locations.get(fqn, {"file": "", "start_line": 1, "end_line": 1})
|
|
88
|
+
symbols_payload[fqn] = {
|
|
89
|
+
"fan_in": len(reverse.get(fqn, set())),
|
|
90
|
+
"fan_out": len(forward.get(fqn, set())),
|
|
91
|
+
"kind": _kind_for_fqn(fqn, prefix),
|
|
92
|
+
"location": {
|
|
93
|
+
"file": str(loc.get("file", "") or ""),
|
|
94
|
+
"start_line": int(loc.get("start_line", 1) or 1),
|
|
95
|
+
"end_line": int(loc.get("end_line", loc.get("start_line", 1)) or 1),
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
locals_only = [f for f, s in symbols_payload.items() if s.get("kind") == "local"]
|
|
100
|
+
dead_symbols = [f for f in locals_only if symbols_payload[f]["fan_in"] == 0 and not str(f).endswith(".<module>")]
|
|
101
|
+
orchestrators = [f for f in locals_only if symbols_payload[f]["fan_out"] >= int(fanout_threshold)]
|
|
102
|
+
critical = [f for f in locals_only if symbols_payload[f]["fan_in"] >= int(fanin_threshold)]
|
|
103
|
+
|
|
104
|
+
top_fan_in = sorted(
|
|
105
|
+
[{"fqn": f, "fan_in": symbols_payload[f]["fan_in"]} for f in symbols_payload],
|
|
106
|
+
key=lambda r: (-int(r["fan_in"]), r["fqn"]),
|
|
107
|
+
)[: int(top_k)]
|
|
108
|
+
top_fan_out = sorted(
|
|
109
|
+
[{"fqn": f, "fan_out": symbols_payload[f]["fan_out"]} for f in symbols_payload],
|
|
110
|
+
key=lambda r: (-int(r["fan_out"]), r["fqn"]),
|
|
111
|
+
)[: int(top_k)]
|
|
112
|
+
|
|
113
|
+
files_payload: Dict[str, Dict[str, int]] = {}
|
|
114
|
+
incoming_per_file: Dict[str, Set[str]] = defaultdict(set)
|
|
115
|
+
outgoing_per_file: Dict[str, Set[str]] = defaultdict(set)
|
|
116
|
+
edges_per_file: Dict[str, int] = defaultdict(int)
|
|
117
|
+
|
|
118
|
+
for caller, callees in forward.items():
|
|
119
|
+
caller_file = str(symbols_payload.get(caller, {}).get("location", {}).get("file", "") or "")
|
|
120
|
+
if not caller_file:
|
|
121
|
+
continue
|
|
122
|
+
for callee in sorted(callees):
|
|
123
|
+
callee_file = str(symbols_payload.get(callee, {}).get("location", {}).get("file", "") or "")
|
|
124
|
+
outgoing_per_file[caller_file].add(caller)
|
|
125
|
+
edges_per_file[caller_file] += 1
|
|
126
|
+
if callee_file:
|
|
127
|
+
incoming_per_file[callee_file].add(callee)
|
|
128
|
+
|
|
129
|
+
all_files = set(incoming_per_file.keys()) | set(outgoing_per_file.keys()) | set(edges_per_file.keys())
|
|
130
|
+
for fp in sorted(all_files):
|
|
131
|
+
files_payload[fp] = {
|
|
132
|
+
"incoming_symbols": len(incoming_per_file.get(fp, set())),
|
|
133
|
+
"outgoing_symbols": len(outgoing_per_file.get(fp, set())),
|
|
134
|
+
"edges": int(edges_per_file.get(fp, 0)),
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
return {
|
|
138
|
+
"ok": True,
|
|
139
|
+
"repo_prefix": prefix,
|
|
140
|
+
"repo": {
|
|
141
|
+
"total_nodes": len(symbols_payload),
|
|
142
|
+
"dead_symbols": sorted(dead_symbols),
|
|
143
|
+
"orchestrators": sorted(orchestrators),
|
|
144
|
+
"critical_symbols": sorted(critical),
|
|
145
|
+
"top_fan_in": top_fan_in,
|
|
146
|
+
"top_fan_out": top_fan_out,
|
|
147
|
+
},
|
|
148
|
+
"symbols": symbols_payload,
|
|
149
|
+
"files": files_payload,
|
|
150
|
+
"thresholds": {
|
|
151
|
+
"fanout_threshold": int(fanout_threshold),
|
|
152
|
+
"fanin_threshold": int(fanin_threshold),
|
|
153
|
+
"top_k": int(top_k),
|
|
154
|
+
},
|
|
155
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
ο»Ώfrom __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from typing import Dict, List, Set, Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _module_from_fqn(fqn: str) -> str:
|
|
8
|
+
value = str(fqn or "").strip()
|
|
9
|
+
if "." not in value:
|
|
10
|
+
return value
|
|
11
|
+
return value.rsplit(".", 1)[0]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_module_dependency_graph(resolved_calls: list, repo_prefix: str) -> dict:
|
|
15
|
+
graph: Dict[str, Set[str]] = defaultdict(set)
|
|
16
|
+
prefix = str(repo_prefix or "").strip()
|
|
17
|
+
if not prefix:
|
|
18
|
+
return {}
|
|
19
|
+
|
|
20
|
+
for row in resolved_calls or []:
|
|
21
|
+
if not isinstance(row, dict):
|
|
22
|
+
continue
|
|
23
|
+
caller_fqn = str(row.get("caller_fqn", "") or "").strip()
|
|
24
|
+
callee_fqn = str(row.get("callee_fqn", "") or "").strip()
|
|
25
|
+
if not caller_fqn or not callee_fqn:
|
|
26
|
+
continue
|
|
27
|
+
|
|
28
|
+
caller_module = _module_from_fqn(caller_fqn)
|
|
29
|
+
callee_module = _module_from_fqn(callee_fqn)
|
|
30
|
+
|
|
31
|
+
if not caller_module.startswith(prefix + "."):
|
|
32
|
+
continue
|
|
33
|
+
if not callee_module.startswith(prefix + "."):
|
|
34
|
+
continue
|
|
35
|
+
if caller_module == callee_module:
|
|
36
|
+
continue
|
|
37
|
+
graph[caller_module].add(callee_module)
|
|
38
|
+
graph.setdefault(callee_module, set())
|
|
39
|
+
|
|
40
|
+
return {k: set(v) for k, v in graph.items()}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _normalize_cycle(cycle: List[str]) -> Tuple[str, ...]:
|
|
44
|
+
# cycle is expected without duplicated closing node.
|
|
45
|
+
if not cycle:
|
|
46
|
+
return tuple()
|
|
47
|
+
nodes = list(cycle)
|
|
48
|
+
min_idx = min(range(len(nodes)), key=lambda i: nodes[i])
|
|
49
|
+
rotated = nodes[min_idx:] + nodes[:min_idx]
|
|
50
|
+
return tuple(rotated + [rotated[0]])
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def find_dependency_cycles(graph: dict, max_cycles: int = 50, max_depth: int = 20) -> list:
|
|
54
|
+
adjacency: Dict[str, List[str]] = {
|
|
55
|
+
str(k): sorted(str(x) for x in (v or [])) for k, v in (graph or {}).items()
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
found: List[List[str]] = []
|
|
59
|
+
seen: Set[Tuple[str, ...]] = set()
|
|
60
|
+
|
|
61
|
+
def dfs(start: str, node: str, path: List[str], visited: Set[str]) -> None:
|
|
62
|
+
if len(found) >= int(max_cycles):
|
|
63
|
+
return
|
|
64
|
+
if len(path) >= int(max_depth):
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
for nxt in adjacency.get(node, []):
|
|
68
|
+
if nxt == start and len(path) >= 2:
|
|
69
|
+
norm = _normalize_cycle(path)
|
|
70
|
+
if norm and norm not in seen:
|
|
71
|
+
seen.add(norm)
|
|
72
|
+
found.append(list(norm))
|
|
73
|
+
if len(found) >= int(max_cycles):
|
|
74
|
+
return
|
|
75
|
+
continue
|
|
76
|
+
if nxt in visited:
|
|
77
|
+
continue
|
|
78
|
+
visited.add(nxt)
|
|
79
|
+
path.append(nxt)
|
|
80
|
+
dfs(start, nxt, path, visited)
|
|
81
|
+
path.pop()
|
|
82
|
+
visited.remove(nxt)
|
|
83
|
+
|
|
84
|
+
for start in sorted(adjacency.keys()):
|
|
85
|
+
dfs(start, start, [start], {start})
|
|
86
|
+
if len(found) >= int(max_cycles):
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
return found[: int(max_cycles)]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def compute_dependency_cycle_metrics(resolved_calls: list, repo_prefix: str) -> dict:
|
|
93
|
+
graph = build_module_dependency_graph(resolved_calls=resolved_calls, repo_prefix=repo_prefix)
|
|
94
|
+
cycles = find_dependency_cycles(graph)
|
|
95
|
+
edges = sum(len(v) for v in graph.values())
|
|
96
|
+
return {
|
|
97
|
+
"ok": True,
|
|
98
|
+
"repo_prefix": str(repo_prefix or ""),
|
|
99
|
+
"modules": len(graph),
|
|
100
|
+
"edges": int(edges),
|
|
101
|
+
"cycle_count": len(cycles),
|
|
102
|
+
"cycles": cycles,
|
|
103
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
ο»Ώfrom __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from typing import Any, Dict, List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _load_json(path: str, default: Any) -> Any:
|
|
10
|
+
if not os.path.exists(path):
|
|
11
|
+
return default
|
|
12
|
+
try:
|
|
13
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
14
|
+
return json.load(f)
|
|
15
|
+
except Exception:
|
|
16
|
+
return default
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _percentile_90(values: List[int], floor: int) -> int:
|
|
20
|
+
vals = sorted(int(v) for v in values if int(v) >= 0)
|
|
21
|
+
if not vals:
|
|
22
|
+
return int(floor)
|
|
23
|
+
idx = int(0.9 * (len(vals) - 1))
|
|
24
|
+
return max(int(floor), int(vals[idx]))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _risk_label(score: int) -> str:
|
|
28
|
+
if score >= 70:
|
|
29
|
+
return "high"
|
|
30
|
+
if score >= 40:
|
|
31
|
+
return "medium"
|
|
32
|
+
return "low"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _clamp(value: int, lo: int, hi: int) -> int:
|
|
36
|
+
return max(lo, min(hi, int(value)))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def compute_risk_radar(cache_dir: str, top_k: int = 25) -> dict:
|
|
40
|
+
arch = _load_json(os.path.join(cache_dir, "architecture_metrics.json"), {})
|
|
41
|
+
dep = _load_json(os.path.join(cache_dir, "dependency_cycles.json"), {})
|
|
42
|
+
analysis = _load_json(os.path.join(cache_dir, "analysis_metrics.json"), {})
|
|
43
|
+
|
|
44
|
+
if not isinstance(arch, dict) or not arch:
|
|
45
|
+
raise RuntimeError("Missing architecture_metrics.json")
|
|
46
|
+
|
|
47
|
+
repo_prefix = str(arch.get("repo_prefix", "") or "")
|
|
48
|
+
repo = arch.get("repo", {}) if isinstance(arch.get("repo"), dict) else {}
|
|
49
|
+
symbols = arch.get("symbols", {}) if isinstance(arch.get("symbols"), dict) else {}
|
|
50
|
+
files = arch.get("files", {}) if isinstance(arch.get("files"), dict) else {}
|
|
51
|
+
|
|
52
|
+
local_symbols = {fqn: s for fqn, s in symbols.items() if isinstance(s, dict) and str(s.get("kind", "")) == "local"}
|
|
53
|
+
fan_in_vals = [int(s.get("fan_in", 0) or 0) for s in local_symbols.values()]
|
|
54
|
+
fan_out_vals = [int(s.get("fan_out", 0) or 0) for s in local_symbols.values()]
|
|
55
|
+
file_edges_vals = [int((v or {}).get("edges", 0) or 0) for v in files.values() if isinstance(v, dict)]
|
|
56
|
+
|
|
57
|
+
fan_in_hot = _percentile_90(fan_in_vals, 10)
|
|
58
|
+
fan_out_hot = _percentile_90(fan_out_vals, 10)
|
|
59
|
+
file_edges_hot = _percentile_90(file_edges_vals, 20)
|
|
60
|
+
|
|
61
|
+
total_calls = int(analysis.get("total_calls", 0) or 0)
|
|
62
|
+
unresolved_calls = int(analysis.get("unresolved_calls", 0) or 0)
|
|
63
|
+
unresolved_ratio = float(unresolved_calls / total_calls) if total_calls > 0 else 0.0
|
|
64
|
+
cycle_count = int(dep.get("cycle_count", 0) or 0)
|
|
65
|
+
|
|
66
|
+
orchestrators = set(str(x) for x in (repo.get("orchestrators") or []))
|
|
67
|
+
critical = set(str(x) for x in (repo.get("critical_symbols") or []))
|
|
68
|
+
|
|
69
|
+
hotspots: List[Dict[str, Any]] = []
|
|
70
|
+
for fqn, sym in local_symbols.items():
|
|
71
|
+
fan_in = int(sym.get("fan_in", 0) or 0)
|
|
72
|
+
fan_out = int(sym.get("fan_out", 0) or 0)
|
|
73
|
+
score = 0
|
|
74
|
+
reasons: List[str] = []
|
|
75
|
+
flags: List[str] = []
|
|
76
|
+
|
|
77
|
+
if fan_in >= fan_in_hot:
|
|
78
|
+
score += 40
|
|
79
|
+
reasons.append("High fan-in: many callers depend on it")
|
|
80
|
+
if fan_out >= fan_out_hot:
|
|
81
|
+
score += 40
|
|
82
|
+
reasons.append("High fan-out: orchestrates many calls")
|
|
83
|
+
if fqn in orchestrators:
|
|
84
|
+
score += 15
|
|
85
|
+
flags.append("orchestrator")
|
|
86
|
+
if fqn in critical:
|
|
87
|
+
score += 15
|
|
88
|
+
flags.append("critical")
|
|
89
|
+
if str(fqn).endswith(".<module>"):
|
|
90
|
+
score += 10
|
|
91
|
+
reasons.append("Module-level script orchestration")
|
|
92
|
+
flags.append("module_level")
|
|
93
|
+
if cycle_count > 0:
|
|
94
|
+
score += 10
|
|
95
|
+
reasons.append("Repo has dependency cycles")
|
|
96
|
+
flags.append("cycle_related")
|
|
97
|
+
if unresolved_ratio > 0.2:
|
|
98
|
+
score += 10
|
|
99
|
+
reasons.append("High unresolved call ratio")
|
|
100
|
+
|
|
101
|
+
score = _clamp(score, 0, 100)
|
|
102
|
+
loc = sym.get("location", {}) if isinstance(sym.get("location"), dict) else {}
|
|
103
|
+
hotspots.append(
|
|
104
|
+
{
|
|
105
|
+
"fqn": fqn,
|
|
106
|
+
"risk": _risk_label(score),
|
|
107
|
+
"score": score,
|
|
108
|
+
"reasons": reasons,
|
|
109
|
+
"fan_in": fan_in,
|
|
110
|
+
"fan_out": fan_out,
|
|
111
|
+
"location": {
|
|
112
|
+
"file": str(loc.get("file", "") or ""),
|
|
113
|
+
"start_line": int(loc.get("start_line", 1) or 1),
|
|
114
|
+
"end_line": int(loc.get("end_line", loc.get("start_line", 1)) or 1),
|
|
115
|
+
},
|
|
116
|
+
"flags": sorted(set(flags)),
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
hotspots = sorted(hotspots, key=lambda h: (-int(h["score"]), h["fqn"]))[: int(top_k)]
|
|
121
|
+
|
|
122
|
+
file_out_vals = sorted([int((v or {}).get("outgoing_symbols", 0) or 0) for v in files.values() if isinstance(v, dict)])
|
|
123
|
+
file_in_vals = sorted([int((v or {}).get("incoming_symbols", 0) or 0) for v in files.values() if isinstance(v, dict)])
|
|
124
|
+
out_top = file_out_vals[int(0.9 * (len(file_out_vals) - 1))] if file_out_vals else 0
|
|
125
|
+
in_top = file_in_vals[int(0.9 * (len(file_in_vals) - 1))] if file_in_vals else 0
|
|
126
|
+
|
|
127
|
+
risky_files: List[Dict[str, Any]] = []
|
|
128
|
+
for file_path, fv in files.items():
|
|
129
|
+
if not isinstance(fv, dict):
|
|
130
|
+
continue
|
|
131
|
+
edges = int(fv.get("edges", 0) or 0)
|
|
132
|
+
incoming = int(fv.get("incoming_symbols", 0) or 0)
|
|
133
|
+
outgoing = int(fv.get("outgoing_symbols", 0) or 0)
|
|
134
|
+
|
|
135
|
+
score = 0
|
|
136
|
+
reasons: List[str] = []
|
|
137
|
+
if edges >= file_edges_hot:
|
|
138
|
+
score += 60
|
|
139
|
+
reasons.append("High edge density")
|
|
140
|
+
if outgoing >= out_top and out_top > 0:
|
|
141
|
+
score += 20
|
|
142
|
+
reasons.append("Top outgoing coupling")
|
|
143
|
+
if incoming >= in_top and in_top > 0:
|
|
144
|
+
score += 20
|
|
145
|
+
reasons.append("Top incoming coupling")
|
|
146
|
+
|
|
147
|
+
score = _clamp(score, 0, 100)
|
|
148
|
+
risky_files.append(
|
|
149
|
+
{
|
|
150
|
+
"file": str(file_path),
|
|
151
|
+
"risk": _risk_label(score),
|
|
152
|
+
"score": score,
|
|
153
|
+
"edges": edges,
|
|
154
|
+
"incoming_symbols": incoming,
|
|
155
|
+
"outgoing_symbols": outgoing,
|
|
156
|
+
"reasons": reasons,
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
risky_files = sorted(risky_files, key=lambda r: (-int(r["score"]), r["file"]))[: int(top_k)]
|
|
161
|
+
|
|
162
|
+
by_fan_out = sorted(hotspots, key=lambda h: (-int(h.get("fan_out", 0)), h["fqn"]))
|
|
163
|
+
by_fan_in = sorted(hotspots, key=lambda h: (-int(h.get("fan_in", 0)), h["fqn"]))
|
|
164
|
+
module_level = [h for h in hotspots if "module_level" in (h.get("flags") or [])]
|
|
165
|
+
|
|
166
|
+
refactor_targets: List[Dict[str, Any]] = []
|
|
167
|
+
if by_fan_out:
|
|
168
|
+
refactor_targets.append({
|
|
169
|
+
"title": "Break down top orchestrator",
|
|
170
|
+
"why": "High fan-out symbols coordinate too many responsibilities.",
|
|
171
|
+
"targets": [h["fqn"] for h in by_fan_out[:3]],
|
|
172
|
+
})
|
|
173
|
+
if by_fan_in:
|
|
174
|
+
refactor_targets.append({
|
|
175
|
+
"title": "Stabilize critical API",
|
|
176
|
+
"why": "High fan-in symbols affect many callers and should remain stable.",
|
|
177
|
+
"targets": [h["fqn"] for h in by_fan_in[:3]],
|
|
178
|
+
})
|
|
179
|
+
if module_level:
|
|
180
|
+
refactor_targets.append({
|
|
181
|
+
"title": "Reduce script-level work",
|
|
182
|
+
"why": "Module-level orchestration is harder to test and reuse.",
|
|
183
|
+
"targets": [h["fqn"] for h in module_level[:3]],
|
|
184
|
+
})
|
|
185
|
+
if unresolved_ratio > 0.2:
|
|
186
|
+
refactor_targets.append({
|
|
187
|
+
"title": "Investigate unresolved calls",
|
|
188
|
+
"why": "Unresolved calls hide dependencies and increase uncertainty.",
|
|
189
|
+
"targets": [f"unresolved_ratio={unresolved_ratio:.2f}"],
|
|
190
|
+
})
|
|
191
|
+
if cycle_count > 0:
|
|
192
|
+
cycle_preview = dep.get("cycles", []) if isinstance(dep.get("cycles"), list) else []
|
|
193
|
+
pretty = [" -> ".join(str(x) for x in c) for c in cycle_preview[:3] if isinstance(c, list)]
|
|
194
|
+
refactor_targets.append({
|
|
195
|
+
"title": "Address dependency cycles",
|
|
196
|
+
"why": "Cycles increase coupling and make changes risky.",
|
|
197
|
+
"targets": pretty,
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
"ok": True,
|
|
202
|
+
"repo_prefix": repo_prefix,
|
|
203
|
+
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
204
|
+
"thresholds": {
|
|
205
|
+
"fan_in_hot": int(fan_in_hot),
|
|
206
|
+
"fan_out_hot": int(fan_out_hot),
|
|
207
|
+
"file_edges_hot": int(file_edges_hot),
|
|
208
|
+
"top_k": int(top_k),
|
|
209
|
+
},
|
|
210
|
+
"repo_health": {
|
|
211
|
+
"hotspot_symbols": len(hotspots),
|
|
212
|
+
"risky_files": len(risky_files),
|
|
213
|
+
"dead_symbols": len(repo.get("dead_symbols", []) if isinstance(repo.get("dead_symbols"), list) else []),
|
|
214
|
+
"dependency_cycles": cycle_count,
|
|
215
|
+
"unresolved_ratio": round(unresolved_ratio, 6),
|
|
216
|
+
},
|
|
217
|
+
"hotspots": hotspots,
|
|
218
|
+
"risky_files": risky_files,
|
|
219
|
+
"refactor_targets": refactor_targets[:6],
|
|
220
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Call graph package
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# AST Call detection
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
class FunctionCallVisitor(ast.NodeVisitor):
|
|
6
|
+
def __init__(self, file_path):
|
|
7
|
+
self.file_path = file_path
|
|
8
|
+
self.current_function = None
|
|
9
|
+
self.current_class = None # already existed β
|
|
10
|
+
self.calls = []
|
|
11
|
+
|
|
12
|
+
# π ADDED: track class context
|
|
13
|
+
def visit_ClassDef(self, node):
|
|
14
|
+
previous_class = self.current_class
|
|
15
|
+
self.current_class = node.name
|
|
16
|
+
|
|
17
|
+
self.generic_visit(node)
|
|
18
|
+
|
|
19
|
+
self.current_class = previous_class
|
|
20
|
+
|
|
21
|
+
def visit_FunctionDef(self, node):
|
|
22
|
+
previous_function = self.current_function
|
|
23
|
+
self.current_function = node.name
|
|
24
|
+
|
|
25
|
+
self.generic_visit(node)
|
|
26
|
+
|
|
27
|
+
self.current_function = previous_function
|
|
28
|
+
|
|
29
|
+
def visit_Call(self, node):
|
|
30
|
+
if self.current_function is None:
|
|
31
|
+
return
|
|
32
|
+
|
|
33
|
+
func_name = self._get_call_name(node.func)
|
|
34
|
+
|
|
35
|
+
obj_name = None
|
|
36
|
+
call_class = self.current_class
|
|
37
|
+
call_type = "local"
|
|
38
|
+
target = func_name
|
|
39
|
+
|
|
40
|
+
# obj.foo()
|
|
41
|
+
if isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name):
|
|
42
|
+
obj_name = node.func.value.id
|
|
43
|
+
|
|
44
|
+
# π ADDED: self.method() resolution
|
|
45
|
+
if obj_name == "self" and self.current_class:
|
|
46
|
+
call_type = "method"
|
|
47
|
+
target = f"{self.current_class}.{func_name}"
|
|
48
|
+
else:
|
|
49
|
+
call_type = "attribute"
|
|
50
|
+
target = f"{obj_name}.{func_name}"
|
|
51
|
+
|
|
52
|
+
# builtin function
|
|
53
|
+
elif isinstance(node.func, ast.Name):
|
|
54
|
+
if func_name in dir(__builtins__):
|
|
55
|
+
call_type = "builtin"
|
|
56
|
+
target = f"builtins.{func_name}"
|
|
57
|
+
|
|
58
|
+
if func_name:
|
|
59
|
+
self.calls.append({
|
|
60
|
+
"caller": self.current_function,
|
|
61
|
+
"class": call_class,
|
|
62
|
+
"object": obj_name,
|
|
63
|
+
"callee": func_name,
|
|
64
|
+
"line": node.lineno,
|
|
65
|
+
"file": self.file_path,
|
|
66
|
+
"call_type": call_type, # π ADDED
|
|
67
|
+
"target": target # π ADDED
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
self.generic_visit(node)
|
|
71
|
+
|
|
72
|
+
def _get_call_name(self, node):
|
|
73
|
+
# foo()
|
|
74
|
+
if isinstance(node, ast.Name):
|
|
75
|
+
return node.id
|
|
76
|
+
|
|
77
|
+
# obj.foo()
|
|
78
|
+
elif isinstance(node, ast.Attribute):
|
|
79
|
+
return node.attr
|
|
80
|
+
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def extract_function_calls(file_path):
|
|
85
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
86
|
+
tree = ast.parse(f.read())
|
|
87
|
+
|
|
88
|
+
visitor = FunctionCallVisitor(file_path)
|
|
89
|
+
visitor.visit(tree)
|
|
90
|
+
|
|
91
|
+
return visitor.calls
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Build project-level graph
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Resolve what is called
|
|
2
|
+
# analysis/call_graph/call_resolver.py
|
|
3
|
+
|
|
4
|
+
import builtins
|
|
5
|
+
|
|
6
|
+
def resolve_calls(calls, local_functions, imports, class_methods):
|
|
7
|
+
import builtins
|
|
8
|
+
builtin_funcs = set(dir(builtins))
|
|
9
|
+
|
|
10
|
+
imported_names = set()
|
|
11
|
+
for imp in imports:
|
|
12
|
+
if imp["type"] == "import":
|
|
13
|
+
imported_names.add(imp["module"].split(".")[0])
|
|
14
|
+
elif imp["type"] == "from_import":
|
|
15
|
+
imported_names.add(imp["name"])
|
|
16
|
+
|
|
17
|
+
resolved = []
|
|
18
|
+
|
|
19
|
+
for call in calls:
|
|
20
|
+
callee = call["callee"]
|
|
21
|
+
caller = call["caller"]
|
|
22
|
+
class_name = call.get("class")
|
|
23
|
+
obj = call.get("object")
|
|
24
|
+
|
|
25
|
+
# π STEP-3: method resolution (ONLY self.method())
|
|
26
|
+
if obj == "self" and class_name and callee in class_methods.get(class_name, set()):
|
|
27
|
+
resolved.append({
|
|
28
|
+
**call,
|
|
29
|
+
"caller": f"{class_name}.{caller}",
|
|
30
|
+
"callee": f"{class_name}.{callee}",
|
|
31
|
+
"call_type": "method"
|
|
32
|
+
})
|
|
33
|
+
continue
|
|
34
|
+
|
|
35
|
+
if callee in local_functions:
|
|
36
|
+
call_type = "local"
|
|
37
|
+
target = callee
|
|
38
|
+
|
|
39
|
+
elif callee in imported_names:
|
|
40
|
+
call_type = "imported"
|
|
41
|
+
target = callee
|
|
42
|
+
|
|
43
|
+
elif callee in builtin_funcs:
|
|
44
|
+
call_type = "builtin"
|
|
45
|
+
target = f"builtins.{callee}"
|
|
46
|
+
|
|
47
|
+
else:
|
|
48
|
+
call_type = "unknown"
|
|
49
|
+
target = callee
|
|
50
|
+
|
|
51
|
+
resolved.append({
|
|
52
|
+
**call,
|
|
53
|
+
"call_type": call_type,
|
|
54
|
+
"target": target
|
|
55
|
+
})
|
|
56
|
+
return resolved
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Execution metadata models
|