codemap-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +1 -0
- analysis/architecture/__init__.py +1 -0
- analysis/architecture/architecture_engine.py +155 -0
- analysis/architecture/dependency_cycles.py +103 -0
- analysis/architecture/risk_radar.py +220 -0
- analysis/call_graph/__init__.py +1 -0
- analysis/call_graph/call_extractor.py +91 -0
- analysis/call_graph/call_graph_builder.py +1 -0
- analysis/call_graph/call_resolver.py +56 -0
- analysis/call_graph/context_models.py +1 -0
- analysis/call_graph/cross_file_resolver.py +122 -0
- analysis/call_graph/execution_tracker.py +1 -0
- analysis/call_graph/flow_builder.py +1 -0
- analysis/call_graph/models.py +1 -0
- analysis/core/__init__.py +1 -0
- analysis/core/ast_context.py +1 -0
- analysis/core/ast_parser.py +8 -0
- analysis/core/class_extractor.py +35 -0
- analysis/core/function_extractor.py +16 -0
- analysis/core/import_extractor.py +43 -0
- analysis/explain/__init__.py +1 -0
- analysis/explain/docstring_extractor.py +45 -0
- analysis/explain/explain_runner.py +177 -0
- analysis/explain/repo_summary_generator.py +138 -0
- analysis/explain/return_analyzer.py +114 -0
- analysis/explain/risk_flags.py +1 -0
- analysis/explain/signature_extractor.py +104 -0
- analysis/explain/summary_generator.py +282 -0
- analysis/graph/__init__.py +1 -0
- analysis/graph/callgraph_index.py +117 -0
- analysis/graph/entrypoint_detector.py +1 -0
- analysis/graph/impact_analyzer.py +210 -0
- analysis/indexing/__init__.py +1 -0
- analysis/indexing/import_resolver.py +156 -0
- analysis/indexing/symbol_index.py +150 -0
- analysis/runners/__init__.py +1 -0
- analysis/runners/phase4_runner.py +137 -0
- analysis/utils/__init__.py +1 -0
- analysis/utils/ast_helpers.py +1 -0
- analysis/utils/cache_manager.py +659 -0
- analysis/utils/path_resolver.py +1 -0
- analysis/utils/repo_fetcher.py +469 -0
- cli.py +1728 -0
- codemap_cli.py +11 -0
- codemap_python-0.1.0.dist-info/METADATA +399 -0
- codemap_python-0.1.0.dist-info/RECORD +58 -0
- codemap_python-0.1.0.dist-info/WHEEL +5 -0
- codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
- codemap_python-0.1.0.dist-info/top_level.txt +5 -0
- security_utils.py +51 -0
- ui/__init__.py +1 -0
- ui/app.py +2160 -0
- ui/device_id.py +27 -0
- ui/static/app.js +2703 -0
- ui/static/styles.css +1268 -0
- ui/templates/index.html +231 -0
- ui/utils/__init__.py +1 -0
- ui/utils/registry_manager.py +190 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# Cross-file call resolution
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo, SymbolKind
|
|
5
|
+
from analysis.indexing.import_resolver import ImportResolver, ResolvedImport
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CrossFileResolver:
|
|
9
|
+
"""
|
|
10
|
+
Resolves a function/method call to its exact SymbolInfo
|
|
11
|
+
using symbol index and import resolver.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
symbol_index: SymbolIndex,
|
|
17
|
+
import_resolver: ImportResolver
|
|
18
|
+
):
|
|
19
|
+
self.symbol_index = symbol_index
|
|
20
|
+
self.import_resolver = import_resolver # ✅ NOW ACTUALLY USED
|
|
21
|
+
|
|
22
|
+
# -------------------------------------------------
|
|
23
|
+
# Public API
|
|
24
|
+
# -------------------------------------------------
|
|
25
|
+
def resolve_call(
|
|
26
|
+
self,
|
|
27
|
+
call: dict,
|
|
28
|
+
current_module: str
|
|
29
|
+
) -> Optional[SymbolInfo]:
|
|
30
|
+
|
|
31
|
+
callee = call["callee"]
|
|
32
|
+
obj = call.get("object")
|
|
33
|
+
class_name = call.get("class")
|
|
34
|
+
|
|
35
|
+
# 🔹 Get imports for this module
|
|
36
|
+
import_map = self.import_resolver.get_imports(current_module)
|
|
37
|
+
|
|
38
|
+
# 1️⃣ Method call on self
|
|
39
|
+
if obj == "self" and class_name:
|
|
40
|
+
symbol = self._resolve_method(
|
|
41
|
+
current_module, class_name, callee
|
|
42
|
+
)
|
|
43
|
+
if symbol:
|
|
44
|
+
return symbol
|
|
45
|
+
|
|
46
|
+
# 2️⃣ Local function
|
|
47
|
+
symbol = self._resolve_local_function(
|
|
48
|
+
current_module, callee
|
|
49
|
+
)
|
|
50
|
+
if symbol:
|
|
51
|
+
return symbol
|
|
52
|
+
|
|
53
|
+
# 3️⃣ Imported symbol (direct)
|
|
54
|
+
if callee in import_map:
|
|
55
|
+
symbol = self._resolve_imported_symbol(
|
|
56
|
+
import_map[callee]
|
|
57
|
+
)
|
|
58
|
+
if symbol:
|
|
59
|
+
return symbol
|
|
60
|
+
|
|
61
|
+
# 4️⃣ Imported module attribute
|
|
62
|
+
if obj and obj in import_map:
|
|
63
|
+
symbol = self._resolve_module_attribute(
|
|
64
|
+
import_map[obj], callee
|
|
65
|
+
)
|
|
66
|
+
if symbol:
|
|
67
|
+
return symbol
|
|
68
|
+
|
|
69
|
+
# 5️⃣ Built-in
|
|
70
|
+
import builtins
|
|
71
|
+
if hasattr(builtins,callee):
|
|
72
|
+
return SymbolInfo(
|
|
73
|
+
name=callee,
|
|
74
|
+
qualified_name=callee,
|
|
75
|
+
kind=SymbolKind.BUILTIN,
|
|
76
|
+
module="builtins",
|
|
77
|
+
file_path="",
|
|
78
|
+
start_line=-1,
|
|
79
|
+
end_line=-1
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
# -------------------------------------------------
|
|
85
|
+
# Helpers
|
|
86
|
+
# -------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def _resolve_method(
|
|
89
|
+
self,
|
|
90
|
+
module: str,
|
|
91
|
+
class_name: str,
|
|
92
|
+
method_name: str
|
|
93
|
+
) -> Optional[SymbolInfo]:
|
|
94
|
+
return self.symbol_index.get(
|
|
95
|
+
module, f"{class_name}.{method_name}"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _resolve_local_function(
|
|
99
|
+
self,
|
|
100
|
+
module: str,
|
|
101
|
+
function_name: str
|
|
102
|
+
) -> Optional[SymbolInfo]:
|
|
103
|
+
return self.symbol_index.get(module, function_name)
|
|
104
|
+
|
|
105
|
+
def _resolve_imported_symbol(
|
|
106
|
+
self,
|
|
107
|
+
resolved: ResolvedImport
|
|
108
|
+
) -> Optional[SymbolInfo]:
|
|
109
|
+
if resolved.symbol:
|
|
110
|
+
return self.symbol_index.get(
|
|
111
|
+
resolved.module, resolved.symbol
|
|
112
|
+
)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def _resolve_module_attribute(
|
|
116
|
+
self,
|
|
117
|
+
resolved: ResolvedImport,
|
|
118
|
+
attr: str
|
|
119
|
+
) -> Optional[SymbolInfo]:
|
|
120
|
+
return self.symbol_index.get(
|
|
121
|
+
resolved.module, attr
|
|
122
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Call stack + depth tracking
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Execution-ordered flow
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# CallNode / CallEdge
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Core analysis modules - Phase 1 & 2
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Shared AST symbol context
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Class Extractor Module
|
|
2
|
+
import ast
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
|
|
6
|
+
def extract_classes(ast_tree, file_path):
|
|
7
|
+
classes = []
|
|
8
|
+
|
|
9
|
+
for node in ast.walk(ast_tree):
|
|
10
|
+
if isinstance(node, ast.ClassDef):
|
|
11
|
+
base_classes = []
|
|
12
|
+
methods = []
|
|
13
|
+
|
|
14
|
+
# Extract base classes
|
|
15
|
+
for base in node.bases:
|
|
16
|
+
if isinstance(base, ast.Name):
|
|
17
|
+
base_classes.append(base.id)
|
|
18
|
+
elif isinstance(base, ast.Attribute):
|
|
19
|
+
base_classes.append(base.attr)
|
|
20
|
+
|
|
21
|
+
# Extract methods inside class body
|
|
22
|
+
for item in node.body:
|
|
23
|
+
if isinstance(item, ast.FunctionDef):
|
|
24
|
+
methods.append(item.name)
|
|
25
|
+
|
|
26
|
+
classes.append({
|
|
27
|
+
"name": node.name,
|
|
28
|
+
"file": file_path,
|
|
29
|
+
"start_line": node.lineno,
|
|
30
|
+
"end_line": node.end_lineno,
|
|
31
|
+
"base_classes": base_classes,
|
|
32
|
+
"methods": methods
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
return classes
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Function Extractor Module
|
|
2
|
+
import ast
|
|
3
|
+
|
|
4
|
+
def extract_functions(ast_tree, file_path):
|
|
5
|
+
functions = []
|
|
6
|
+
|
|
7
|
+
for node in ast.walk(ast_tree):
|
|
8
|
+
if isinstance(node, ast.FunctionDef):
|
|
9
|
+
functions.append({
|
|
10
|
+
"fun_name": node.name,
|
|
11
|
+
"file_name": file_path,
|
|
12
|
+
"start_line": node.lineno,
|
|
13
|
+
"end_line": node.end_lineno
|
|
14
|
+
})
|
|
15
|
+
|
|
16
|
+
return functions
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Import Extractor Module
|
|
2
|
+
# analysis/import_extractor.py
|
|
3
|
+
|
|
4
|
+
import ast
|
|
5
|
+
|
|
6
|
+
def extract_imports(file_path):
|
|
7
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
8
|
+
source = f.read()
|
|
9
|
+
|
|
10
|
+
tree = ast.parse(source)
|
|
11
|
+
imports = []
|
|
12
|
+
|
|
13
|
+
for node in ast.walk(tree):
|
|
14
|
+
|
|
15
|
+
# import module
|
|
16
|
+
if isinstance(node, ast.Import):
|
|
17
|
+
for alias in node.names:
|
|
18
|
+
imports.append({
|
|
19
|
+
"type": "import",
|
|
20
|
+
"module": alias.name,
|
|
21
|
+
"name": None,
|
|
22
|
+
"alias": alias.asname,
|
|
23
|
+
"line": node.lineno,
|
|
24
|
+
"file": file_path
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
# from module import name
|
|
28
|
+
elif isinstance(node, ast.ImportFrom):
|
|
29
|
+
module = node.module
|
|
30
|
+
level = node.level # 0 = absolute, >0 = relative
|
|
31
|
+
|
|
32
|
+
for alias in node.names:
|
|
33
|
+
imports.append({
|
|
34
|
+
"type": "from_import",
|
|
35
|
+
"module": module,
|
|
36
|
+
"name": alias.name,
|
|
37
|
+
"alias": alias.asname,
|
|
38
|
+
"level": level,
|
|
39
|
+
"line": node.lineno,
|
|
40
|
+
"file": file_path
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
return imports
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Explain package - Symbol documentation and analysis
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Get docstrings from AST
|
|
2
|
+
# analysis/explain/docstring_extractor.py
|
|
3
|
+
# Phase-5 Step-2.1: Extract module/class/function/method docstrings from AST
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from typing import Dict, Optional
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def extract_docstrings(ast_tree: ast.AST) -> Dict[str, object]:
|
|
10
|
+
"""
|
|
11
|
+
Extract docstrings from a parsed AST tree.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
{
|
|
15
|
+
"module": Optional[str],
|
|
16
|
+
"classes": Dict[str, Optional[str]],
|
|
17
|
+
"functions": Dict[str, Optional[str]],
|
|
18
|
+
"methods": Dict[str, Optional[str]] # key = "ClassName.method"
|
|
19
|
+
}
|
|
20
|
+
"""
|
|
21
|
+
result = {
|
|
22
|
+
"module": ast.get_docstring(ast_tree),
|
|
23
|
+
"classes": {},
|
|
24
|
+
"functions": {},
|
|
25
|
+
"methods": {},
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
# Only immediate children are needed for docstrings;
|
|
29
|
+
# ast.walk would include nested defs which we don't want for Phase-5.
|
|
30
|
+
for node in ast_tree.body:
|
|
31
|
+
# Top-level functions
|
|
32
|
+
if isinstance(node, ast.FunctionDef):
|
|
33
|
+
result["functions"][node.name] = ast.get_docstring(node)
|
|
34
|
+
|
|
35
|
+
# Classes
|
|
36
|
+
elif isinstance(node, ast.ClassDef):
|
|
37
|
+
result["classes"][node.name] = ast.get_docstring(node)
|
|
38
|
+
|
|
39
|
+
# Methods inside class
|
|
40
|
+
for item in node.body:
|
|
41
|
+
if isinstance(item, ast.FunctionDef):
|
|
42
|
+
key = f"{node.name}.{item.name}"
|
|
43
|
+
result["methods"][key] = ast.get_docstring(item)
|
|
44
|
+
|
|
45
|
+
return result
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# Orchestrates per-symbol explain
|
|
2
|
+
# analysis/explain/explain_runner.py
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Optional, Dict, Any
|
|
7
|
+
|
|
8
|
+
import ast
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
|
|
13
|
+
from analysis.graph.callgraph_index import CallGraphIndex, CallSite
|
|
14
|
+
from analysis.explain.docstring_extractor import extract_docstrings
|
|
15
|
+
from analysis.explain.signature_extractor import extract_signatures
|
|
16
|
+
from analysis.explain.return_analyzer import analyze_returns
|
|
17
|
+
from analysis.explain.summary_generator import generate_symbol_summary
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def collect_python_files(root_dir: str):
|
|
21
|
+
py_files = []
|
|
22
|
+
for root, _, files in os.walk(root_dir):
|
|
23
|
+
for file in files:
|
|
24
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
25
|
+
py_files.append(os.path.join(root, file))
|
|
26
|
+
return py_files
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def parse_ast(file_path: str) -> ast.AST:
|
|
30
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
31
|
+
return ast.parse(f.read())
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def file_to_module(file_path: str, repo_root: str) -> str:
|
|
35
|
+
repo_root = os.path.abspath(repo_root)
|
|
36
|
+
file_path = os.path.abspath(file_path)
|
|
37
|
+
|
|
38
|
+
rel = os.path.relpath(file_path, repo_root)
|
|
39
|
+
rel = rel.replace(os.sep, ".")
|
|
40
|
+
if rel.endswith(".py"):
|
|
41
|
+
rel = rel[:-3]
|
|
42
|
+
|
|
43
|
+
# Prefix with folder name so symbols don’t collide across repos
|
|
44
|
+
repo_name = os.path.basename(repo_root.rstrip("\\/"))
|
|
45
|
+
return f"{repo_name}.{rel}"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_callgraph_from_resolved_calls_json(path: str) -> CallGraphIndex:
|
|
50
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
51
|
+
resolved_calls = json.load(f)
|
|
52
|
+
|
|
53
|
+
idx = CallGraphIndex()
|
|
54
|
+
for c in resolved_calls:
|
|
55
|
+
idx.add_call(
|
|
56
|
+
CallSite(
|
|
57
|
+
caller_fqn=c["caller_fqn"],
|
|
58
|
+
callee_fqn=c.get("callee_fqn"),
|
|
59
|
+
callee_name=c.get("callee", "<unknown>"),
|
|
60
|
+
file=c.get("file", ""),
|
|
61
|
+
line=int(c.get("line", -1)),
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
return idx
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def symbol_fqn(sym: SymbolInfo) -> str:
|
|
68
|
+
return f"{sym.module}.{sym.qualified_name}"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def merge_maps(dst: dict, src: dict):
|
|
72
|
+
"""
|
|
73
|
+
Merge extractor outputs across files into single dicts.
|
|
74
|
+
"""
|
|
75
|
+
dst["module"] = dst.get("module")
|
|
76
|
+
for k in ("classes", "functions", "methods"):
|
|
77
|
+
dst.setdefault(k, {})
|
|
78
|
+
dst[k].update(src.get(k, {}))
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
82
|
+
"""
|
|
83
|
+
Callable explain pipeline (Phase-5/6), suitable for CLI/VS Code.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
repo_dir: directory to analyze (default: analysis/testing_repo)
|
|
87
|
+
output_dir: directory to write outputs (default: analysis/output)
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
{
|
|
91
|
+
"explain_path": ".../explain.json",
|
|
92
|
+
"symbols": <int>
|
|
93
|
+
}
|
|
94
|
+
"""
|
|
95
|
+
analysis_root = os.path.dirname(os.path.dirname(__file__)) # /analysis
|
|
96
|
+
|
|
97
|
+
if repo_dir is None:
|
|
98
|
+
repo_dir = os.path.join(analysis_root, "testing_repo")
|
|
99
|
+
|
|
100
|
+
if output_dir is None:
|
|
101
|
+
output_dir = os.path.join(analysis_root, "output")
|
|
102
|
+
|
|
103
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
104
|
+
|
|
105
|
+
resolved_calls_json = os.path.join(output_dir, "resolved_calls.json")
|
|
106
|
+
if not os.path.exists(resolved_calls_json):
|
|
107
|
+
raise FileNotFoundError(
|
|
108
|
+
f"Missing: {resolved_calls_json}\nRun Phase-4 first."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
# 1) Load callgraph
|
|
112
|
+
callgraph = build_callgraph_from_resolved_calls_json(resolved_calls_json)
|
|
113
|
+
|
|
114
|
+
# 2) Collect repo python files
|
|
115
|
+
python_files = collect_python_files(repo_dir)
|
|
116
|
+
|
|
117
|
+
# 3) Build symbol index + extractors across repo
|
|
118
|
+
symbol_index = SymbolIndex()
|
|
119
|
+
|
|
120
|
+
repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
|
|
121
|
+
repo_signatures = {"functions": {}, "methods": {}}
|
|
122
|
+
repo_returns = {"functions": {}, "methods": {}}
|
|
123
|
+
|
|
124
|
+
for file_path in python_files:
|
|
125
|
+
tree = parse_ast(file_path)
|
|
126
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# index symbols
|
|
130
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
131
|
+
|
|
132
|
+
# extract per-file and merge
|
|
133
|
+
merge_maps(repo_docstrings, extract_docstrings(tree))
|
|
134
|
+
|
|
135
|
+
sigs = extract_signatures(tree)
|
|
136
|
+
repo_signatures["functions"].update(sigs.get("functions", {}))
|
|
137
|
+
repo_signatures["methods"].update(sigs.get("methods", {}))
|
|
138
|
+
|
|
139
|
+
rets = analyze_returns(tree)
|
|
140
|
+
repo_returns["functions"].update(rets.get("functions", {}))
|
|
141
|
+
repo_returns["methods"].update(rets.get("methods", {}))
|
|
142
|
+
|
|
143
|
+
# 4) Generate summaries for all symbols
|
|
144
|
+
explain: Dict[str, dict] = {}
|
|
145
|
+
|
|
146
|
+
for sym in symbol_index.all_symbols():
|
|
147
|
+
fqn = symbol_fqn(sym)
|
|
148
|
+
explain[fqn] = generate_symbol_summary(
|
|
149
|
+
symbol_fqn=fqn,
|
|
150
|
+
symbol_info=sym,
|
|
151
|
+
docstrings=repo_docstrings,
|
|
152
|
+
signatures=repo_signatures,
|
|
153
|
+
returns=repo_returns,
|
|
154
|
+
callgraph=callgraph,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# 5) Save explain.json
|
|
158
|
+
explain_path = os.path.join(output_dir, "explain.json")
|
|
159
|
+
with open(explain_path, "w", encoding="utf-8") as f:
|
|
160
|
+
json.dump(explain, f, indent=2)
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"explain_path": explain_path,
|
|
164
|
+
"symbols": len(explain),
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def main():
|
|
169
|
+
print("\n=== Phase-5 Explain Runner ===\n")
|
|
170
|
+
result = run()
|
|
171
|
+
print(f"Saved: {result['explain_path']}")
|
|
172
|
+
print(f"Symbols explained: {result['symbols']}")
|
|
173
|
+
print("\n=== Phase-5 Step-6 Complete ===\n")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
if __name__ == "__main__":
|
|
177
|
+
main()
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _load_json(path: str, default: Any) -> Any:
|
|
9
|
+
if not os.path.exists(path):
|
|
10
|
+
return default
|
|
11
|
+
try:
|
|
12
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
13
|
+
return json.load(f)
|
|
14
|
+
except Exception:
|
|
15
|
+
return default
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _save_json(path: str, payload: Dict[str, Any]) -> None:
|
|
19
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
20
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
21
|
+
json.dump(payload, f, indent=2)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _short_loc(symbol_meta: Dict[str, Any]) -> Tuple[str, int]:
|
|
25
|
+
loc = symbol_meta.get("location", {}) if isinstance(symbol_meta.get("location"), dict) else {}
|
|
26
|
+
return str(loc.get("file", "") or ""), int(loc.get("start_line", 1) or 1)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_repo_summary_context(repo_cache_dir: str) -> dict:
|
|
30
|
+
arch = _load_json(os.path.join(repo_cache_dir, "architecture_metrics.json"), {})
|
|
31
|
+
dep = _load_json(os.path.join(repo_cache_dir, "dependency_cycles.json"), {})
|
|
32
|
+
analysis = _load_json(os.path.join(repo_cache_dir, "analysis_metrics.json"), {})
|
|
33
|
+
tree = _load_json(os.path.join(repo_cache_dir, "project_tree.json"), {})
|
|
34
|
+
risk = _load_json(os.path.join(repo_cache_dir, "risk_radar.json"), {})
|
|
35
|
+
|
|
36
|
+
repo = arch.get("repo", {}) if isinstance(arch.get("repo"), dict) else {}
|
|
37
|
+
symbols = arch.get("symbols", {}) if isinstance(arch.get("symbols"), dict) else {}
|
|
38
|
+
|
|
39
|
+
orchestrators = [str(x) for x in (repo.get("orchestrators") or [])][:5]
|
|
40
|
+
critical = [str(x) for x in (repo.get("critical_symbols") or [])][:5]
|
|
41
|
+
|
|
42
|
+
def _shape(items: List[str]) -> List[Dict[str, Any]]:
|
|
43
|
+
out: List[Dict[str, Any]] = []
|
|
44
|
+
for fqn in items:
|
|
45
|
+
meta = symbols.get(fqn, {}) if isinstance(symbols.get(fqn), dict) else {}
|
|
46
|
+
file_path, line = _short_loc(meta)
|
|
47
|
+
out.append(
|
|
48
|
+
{
|
|
49
|
+
"fqn": fqn,
|
|
50
|
+
"in": int(meta.get("fan_in", 0) or 0),
|
|
51
|
+
"out": int(meta.get("fan_out", 0) or 0),
|
|
52
|
+
"file": file_path,
|
|
53
|
+
"line": line,
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
return out
|
|
57
|
+
|
|
58
|
+
top_hotspots = risk.get("top_hotspots", []) if isinstance(risk.get("top_hotspots"), list) else []
|
|
59
|
+
return {
|
|
60
|
+
"repo_prefix": str(arch.get("repo_prefix", "") or ""),
|
|
61
|
+
"counts": {
|
|
62
|
+
"symbols": int(len(symbols)),
|
|
63
|
+
"calls": int(analysis.get("total_calls", 0) or 0),
|
|
64
|
+
"files": int(analysis.get("total_files", 0) or 0),
|
|
65
|
+
"unresolved_calls": int(analysis.get("unresolved_calls", 0) or 0),
|
|
66
|
+
"cycles_count": int(dep.get("cycle_count", 0) or 0),
|
|
67
|
+
},
|
|
68
|
+
"orchestrators": _shape(orchestrators),
|
|
69
|
+
"critical_apis": _shape(critical),
|
|
70
|
+
"dead_symbols": [str(x) for x in (repo.get("dead_symbols") or [])][:10],
|
|
71
|
+
"cycles": dep.get("cycles", [])[:5] if isinstance(dep.get("cycles"), list) else [],
|
|
72
|
+
"top_tree_entries": tree.get("children", [])[:10] if isinstance(tree, dict) else [],
|
|
73
|
+
"top_hotspots": top_hotspots[:5],
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _normalize_summary(summary: Dict[str, Any]) -> Dict[str, Any]:
|
|
78
|
+
bullets = [str(x).strip() for x in (summary.get("bullets") or []) if str(x).strip()][:7]
|
|
79
|
+
one = str(summary.get("one_liner", "") or "").strip() or (bullets[0] if bullets else "Repository summary unavailable.")
|
|
80
|
+
notes = [str(x).strip() for x in (summary.get("notes") or []) if str(x).strip()][:5]
|
|
81
|
+
return {"one_liner": one[:180], "bullets": bullets, "notes": notes}
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _deterministic_summary(context: Dict[str, Any]) -> Dict[str, Any]:
|
|
85
|
+
counts = context.get("counts", {}) if isinstance(context.get("counts"), dict) else {}
|
|
86
|
+
files = int(counts.get("files", 0) or 0)
|
|
87
|
+
symbols = int(counts.get("symbols", 0) or 0)
|
|
88
|
+
calls = int(counts.get("calls", 0) or 0)
|
|
89
|
+
cycles_count = int(counts.get("cycles_count", 0) or 0)
|
|
90
|
+
unresolved = int(counts.get("unresolved_calls", 0) or 0)
|
|
91
|
+
orchestrators = context.get("orchestrators", []) if isinstance(context.get("orchestrators"), list) else []
|
|
92
|
+
critical = context.get("critical_apis", []) if isinstance(context.get("critical_apis"), list) else []
|
|
93
|
+
dead = context.get("dead_symbols", []) if isinstance(context.get("dead_symbols"), list) else []
|
|
94
|
+
hotspots = context.get("top_hotspots", []) if isinstance(context.get("top_hotspots"), list) else []
|
|
95
|
+
|
|
96
|
+
top_orchestrators = [str(item.get("fqn", "") or "") for item in orchestrators[:3] if isinstance(item, dict)]
|
|
97
|
+
top_critical = [str(item.get("fqn", "") or "") for item in critical[:3] if isinstance(item, dict)]
|
|
98
|
+
top_hotspot_labels = []
|
|
99
|
+
for item in hotspots[:3]:
|
|
100
|
+
if isinstance(item, dict):
|
|
101
|
+
top_hotspot_labels.append(str(item.get("fqn") or item.get("file") or "").strip())
|
|
102
|
+
elif isinstance(item, str):
|
|
103
|
+
top_hotspot_labels.append(item)
|
|
104
|
+
one_liner = f"Scanned {files} files, indexed {symbols} symbols, and resolved {calls} calls."
|
|
105
|
+
bullets: List[str] = []
|
|
106
|
+
if top_orchestrators:
|
|
107
|
+
bullets.append("Top orchestrators: " + ", ".join(top_orchestrators))
|
|
108
|
+
if top_critical:
|
|
109
|
+
bullets.append("Critical APIs: " + ", ".join(top_critical))
|
|
110
|
+
bullets.append(f"Dependency cycles: {cycles_count}")
|
|
111
|
+
if top_hotspot_labels:
|
|
112
|
+
bullets.append("Hotspots: " + ", ".join(top_hotspot_labels))
|
|
113
|
+
if unresolved:
|
|
114
|
+
bullets.append(f"Unresolved calls: {unresolved}")
|
|
115
|
+
if dead:
|
|
116
|
+
bullets.append("Dead symbols: " + ", ".join([str(x) for x in dead[:3]]))
|
|
117
|
+
if not bullets:
|
|
118
|
+
bullets.append("No major hotspots detected from cached architecture artifacts.")
|
|
119
|
+
notes = []
|
|
120
|
+
if cycles_count:
|
|
121
|
+
notes.append("Break dependency cycles first to reduce architecture friction.")
|
|
122
|
+
if hotspots:
|
|
123
|
+
notes.append("Review top hotspots before making broad refactors.")
|
|
124
|
+
return _normalize_summary({"one_liner": one_liner, "bullets": bullets, "notes": notes})
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def generate_repo_summary(repo_cache_dir: str, llm_client=None) -> dict:
|
|
128
|
+
context = build_repo_summary_context(repo_cache_dir)
|
|
129
|
+
summary = _deterministic_summary(context)
|
|
130
|
+
payload = {
|
|
131
|
+
"ok": True,
|
|
132
|
+
"cached": False,
|
|
133
|
+
"provider": "deterministic",
|
|
134
|
+
"summary": summary,
|
|
135
|
+
"error": None,
|
|
136
|
+
}
|
|
137
|
+
_save_json(os.path.join(repo_cache_dir, "repo_summary.json"), payload)
|
|
138
|
+
return payload
|