codemap-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +1 -0
- analysis/architecture/__init__.py +1 -0
- analysis/architecture/architecture_engine.py +155 -0
- analysis/architecture/dependency_cycles.py +103 -0
- analysis/architecture/risk_radar.py +220 -0
- analysis/call_graph/__init__.py +1 -0
- analysis/call_graph/call_extractor.py +91 -0
- analysis/call_graph/call_graph_builder.py +1 -0
- analysis/call_graph/call_resolver.py +56 -0
- analysis/call_graph/context_models.py +1 -0
- analysis/call_graph/cross_file_resolver.py +122 -0
- analysis/call_graph/execution_tracker.py +1 -0
- analysis/call_graph/flow_builder.py +1 -0
- analysis/call_graph/models.py +1 -0
- analysis/core/__init__.py +1 -0
- analysis/core/ast_context.py +1 -0
- analysis/core/ast_parser.py +8 -0
- analysis/core/class_extractor.py +35 -0
- analysis/core/function_extractor.py +16 -0
- analysis/core/import_extractor.py +43 -0
- analysis/explain/__init__.py +1 -0
- analysis/explain/docstring_extractor.py +45 -0
- analysis/explain/explain_runner.py +177 -0
- analysis/explain/repo_summary_generator.py +138 -0
- analysis/explain/return_analyzer.py +114 -0
- analysis/explain/risk_flags.py +1 -0
- analysis/explain/signature_extractor.py +104 -0
- analysis/explain/summary_generator.py +282 -0
- analysis/graph/__init__.py +1 -0
- analysis/graph/callgraph_index.py +117 -0
- analysis/graph/entrypoint_detector.py +1 -0
- analysis/graph/impact_analyzer.py +210 -0
- analysis/indexing/__init__.py +1 -0
- analysis/indexing/import_resolver.py +156 -0
- analysis/indexing/symbol_index.py +150 -0
- analysis/runners/__init__.py +1 -0
- analysis/runners/phase4_runner.py +137 -0
- analysis/utils/__init__.py +1 -0
- analysis/utils/ast_helpers.py +1 -0
- analysis/utils/cache_manager.py +659 -0
- analysis/utils/path_resolver.py +1 -0
- analysis/utils/repo_fetcher.py +469 -0
- cli.py +1728 -0
- codemap_cli.py +11 -0
- codemap_python-0.1.0.dist-info/METADATA +399 -0
- codemap_python-0.1.0.dist-info/RECORD +58 -0
- codemap_python-0.1.0.dist-info/WHEEL +5 -0
- codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
- codemap_python-0.1.0.dist-info/top_level.txt +5 -0
- security_utils.py +51 -0
- ui/__init__.py +1 -0
- ui/app.py +2160 -0
- ui/device_id.py +27 -0
- ui/static/app.js +2703 -0
- ui/static/styles.css +1268 -0
- ui/templates/index.html +231 -0
- ui/utils/__init__.py +1 -0
- ui/utils/registry_manager.py +190 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Detect return patterns
|
|
2
|
+
# analysis/explain/return_analyzer.py
|
|
3
|
+
# Phase-5 Step-4.1: Analyze return statements from AST (static)
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
from typing import Any, Dict, Optional, Set, List
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _safe_unparse(node: Optional[ast.AST]) -> Optional[str]:
|
|
10
|
+
if node is None:
|
|
11
|
+
return None
|
|
12
|
+
try:
|
|
13
|
+
return ast.unparse(node)
|
|
14
|
+
except Exception:
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _classify_return_value(value: Optional[ast.AST]) -> str:
|
|
19
|
+
"""
|
|
20
|
+
Classify a return expression node into a simple category.
|
|
21
|
+
"""
|
|
22
|
+
if value is None:
|
|
23
|
+
return "none"
|
|
24
|
+
|
|
25
|
+
# return None
|
|
26
|
+
if isinstance(value, ast.Constant) and value.value is None:
|
|
27
|
+
return "none"
|
|
28
|
+
|
|
29
|
+
# return 1 / "x" / True
|
|
30
|
+
if isinstance(value, ast.Constant):
|
|
31
|
+
return "constant"
|
|
32
|
+
|
|
33
|
+
# return x
|
|
34
|
+
if isinstance(value, ast.Name):
|
|
35
|
+
return "name"
|
|
36
|
+
|
|
37
|
+
# return obj.attr
|
|
38
|
+
if isinstance(value, ast.Attribute):
|
|
39
|
+
return "attribute"
|
|
40
|
+
|
|
41
|
+
# return foo(...) or obj.foo(...)
|
|
42
|
+
if isinstance(value, ast.Call):
|
|
43
|
+
return "call"
|
|
44
|
+
|
|
45
|
+
# Anything else: a+b, f-strings, comprehensions, etc.
|
|
46
|
+
return "expression"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _analyze_function_returns(fn: ast.FunctionDef) -> Dict[str, Any]:
|
|
50
|
+
return_nodes: List[ast.Return] = [
|
|
51
|
+
n for n in ast.walk(fn) if isinstance(n, ast.Return)
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
kinds: Set[str] = set()
|
|
55
|
+
examples: List[str] = []
|
|
56
|
+
|
|
57
|
+
for r in return_nodes:
|
|
58
|
+
kind = _classify_return_value(r.value)
|
|
59
|
+
kinds.add(kind)
|
|
60
|
+
|
|
61
|
+
if len(examples) < 3:
|
|
62
|
+
if r.value is None:
|
|
63
|
+
examples.append("None")
|
|
64
|
+
else:
|
|
65
|
+
ex = _safe_unparse(r.value)
|
|
66
|
+
examples.append(ex if ex is not None else "<unparse_failed>")
|
|
67
|
+
|
|
68
|
+
# If there are no return statements, Python returns None implicitly
|
|
69
|
+
if not return_nodes:
|
|
70
|
+
return {
|
|
71
|
+
"has_return": False,
|
|
72
|
+
"returns_count": 0,
|
|
73
|
+
"return_kinds": ["none"],
|
|
74
|
+
"examples": [],
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
"has_return": True,
|
|
79
|
+
"returns_count": len(return_nodes),
|
|
80
|
+
"return_kinds": sorted(kinds),
|
|
81
|
+
"examples": examples,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def analyze_returns(ast_tree: ast.AST) -> Dict[str, Dict[str, Any]]:
|
|
86
|
+
"""
|
|
87
|
+
Analyze returns for:
|
|
88
|
+
- top-level functions
|
|
89
|
+
- class methods
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
{
|
|
93
|
+
"functions": { "func": return_info },
|
|
94
|
+
"methods": { "Class.method": return_info }
|
|
95
|
+
}
|
|
96
|
+
"""
|
|
97
|
+
result: Dict[str, Dict[str, Any]] = {
|
|
98
|
+
"functions": {},
|
|
99
|
+
"methods": {},
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
for node in ast_tree.body:
|
|
103
|
+
# Top-level functions
|
|
104
|
+
if isinstance(node, ast.FunctionDef):
|
|
105
|
+
result["functions"][node.name] = _analyze_function_returns(node)
|
|
106
|
+
|
|
107
|
+
# Classes and methods
|
|
108
|
+
elif isinstance(node, ast.ClassDef):
|
|
109
|
+
for item in node.body:
|
|
110
|
+
if isinstance(item, ast.FunctionDef):
|
|
111
|
+
key = f"{node.name}.{item.name}"
|
|
112
|
+
result["methods"][key] = _analyze_function_returns(item)
|
|
113
|
+
|
|
114
|
+
return result
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# IO/network/db/recursion/large loops
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Args, defaults, *args/**kwargs
|
|
2
|
+
|
|
3
|
+
# analysis/explain/signature_extractor.py
|
|
4
|
+
# Phase-5 Step-3.1: Extract function/method signatures from AST
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _safe_unparse(node: Optional[ast.AST]) -> Optional[str]:
|
|
11
|
+
if node is None:
|
|
12
|
+
return None
|
|
13
|
+
try:
|
|
14
|
+
return ast.unparse(node)
|
|
15
|
+
except Exception:
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _extract_signature_dict(fn: ast.FunctionDef) -> Dict[str, Any]:
|
|
20
|
+
"""
|
|
21
|
+
Extract a normalized signature dict from a FunctionDef node.
|
|
22
|
+
"""
|
|
23
|
+
args = fn.args
|
|
24
|
+
|
|
25
|
+
# Positional-or-keyword parameters
|
|
26
|
+
pos_args = [a.arg for a in args.args]
|
|
27
|
+
|
|
28
|
+
# Keyword-only parameters
|
|
29
|
+
kwonly_args = [a.arg for a in args.kwonlyargs]
|
|
30
|
+
|
|
31
|
+
# *args / **kwargs
|
|
32
|
+
vararg = args.vararg.arg if args.vararg else None
|
|
33
|
+
kwarg = args.kwarg.arg if args.kwarg else None
|
|
34
|
+
|
|
35
|
+
# Defaults apply to the LAST N positional-or-keyword params
|
|
36
|
+
defaults_map: Dict[str, Optional[str]] = {}
|
|
37
|
+
if args.defaults:
|
|
38
|
+
default_values = [_safe_unparse(d) for d in args.defaults]
|
|
39
|
+
default_param_names = pos_args[-len(default_values):]
|
|
40
|
+
defaults_map = dict(zip(default_param_names, default_values))
|
|
41
|
+
|
|
42
|
+
# Keyword-only defaults map (kw_defaults aligns with kwonlyargs)
|
|
43
|
+
kwonly_defaults_map: Dict[str, Optional[str]] = {}
|
|
44
|
+
if args.kwonlyargs:
|
|
45
|
+
for name_node, def_node in zip(args.kwonlyargs, args.kw_defaults):
|
|
46
|
+
kwonly_defaults_map[name_node.arg] = _safe_unparse(def_node)
|
|
47
|
+
|
|
48
|
+
# Type annotations for params + return
|
|
49
|
+
annotations: Dict[str, Optional[str]] = {}
|
|
50
|
+
for a in args.args:
|
|
51
|
+
if a.annotation is not None:
|
|
52
|
+
annotations[a.arg] = _safe_unparse(a.annotation)
|
|
53
|
+
for a in args.kwonlyargs:
|
|
54
|
+
if a.annotation is not None:
|
|
55
|
+
annotations[a.arg] = _safe_unparse(a.annotation)
|
|
56
|
+
if args.vararg and args.vararg.annotation is not None:
|
|
57
|
+
annotations[f"*{args.vararg.arg}"] = _safe_unparse(args.vararg.annotation)
|
|
58
|
+
if args.kwarg and args.kwarg.annotation is not None:
|
|
59
|
+
annotations[f"**{args.kwarg.arg}"] = _safe_unparse(args.kwarg.annotation)
|
|
60
|
+
|
|
61
|
+
if fn.returns is not None:
|
|
62
|
+
annotations["return"] = _safe_unparse(fn.returns)
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"args": pos_args,
|
|
66
|
+
"defaults": defaults_map,
|
|
67
|
+
"kwonlyargs": kwonly_args,
|
|
68
|
+
"kwonly_defaults": kwonly_defaults_map,
|
|
69
|
+
"vararg": vararg,
|
|
70
|
+
"kwarg": kwarg,
|
|
71
|
+
"annotations": annotations,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def extract_signatures(ast_tree: ast.AST) -> Dict[str, Dict[str, Any]]:
|
|
76
|
+
"""
|
|
77
|
+
Extract signatures for:
|
|
78
|
+
- top-level functions
|
|
79
|
+
- class methods
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
{
|
|
83
|
+
"functions": { "func": signature_dict },
|
|
84
|
+
"methods": { "Class.method": signature_dict }
|
|
85
|
+
}
|
|
86
|
+
"""
|
|
87
|
+
result: Dict[str, Dict[str, Any]] = {
|
|
88
|
+
"functions": {},
|
|
89
|
+
"methods": {},
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
for node in ast_tree.body:
|
|
93
|
+
# Top-level functions
|
|
94
|
+
if isinstance(node, ast.FunctionDef):
|
|
95
|
+
result["functions"][node.name] = _extract_signature_dict(node)
|
|
96
|
+
|
|
97
|
+
# Classes and methods
|
|
98
|
+
elif isinstance(node, ast.ClassDef):
|
|
99
|
+
for item in node.body:
|
|
100
|
+
if isinstance(item, ast.FunctionDef):
|
|
101
|
+
key = f"{node.name}.{item.name}"
|
|
102
|
+
result["methods"][key] = _extract_signature_dict(item)
|
|
103
|
+
|
|
104
|
+
return result
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
# Heuristic summary (no LLM yet)
|
|
2
|
+
# analysis/explain/summary_generator.py
|
|
3
|
+
# Phase-5 Step-5.1: Heuristic summary generator (no LLM)
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from typing import Any, Dict, Optional, List
|
|
9
|
+
|
|
10
|
+
from analysis.indexing.symbol_index import SymbolInfo
|
|
11
|
+
from analysis.graph.callgraph_index import CallGraphIndex
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _first_line(text: Optional[str]) -> Optional[str]:
|
|
15
|
+
if not text:
|
|
16
|
+
return None
|
|
17
|
+
line = text.strip().splitlines()[0].strip()
|
|
18
|
+
return line or None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _humanize_name(name: str) -> str:
|
|
22
|
+
# snake_case -> words
|
|
23
|
+
return name.replace("_", " ").strip()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _format_args(sig: Optional[dict]) -> str:
|
|
27
|
+
if not sig:
|
|
28
|
+
return "()"
|
|
29
|
+
args = sig.get("args", [])
|
|
30
|
+
vararg = sig.get("vararg")
|
|
31
|
+
kwonly = sig.get("kwonlyargs", [])
|
|
32
|
+
kwarg = sig.get("kwarg")
|
|
33
|
+
|
|
34
|
+
parts: List[str] = []
|
|
35
|
+
parts.extend(args)
|
|
36
|
+
|
|
37
|
+
if vararg:
|
|
38
|
+
parts.append(f"*{vararg}")
|
|
39
|
+
|
|
40
|
+
if kwonly:
|
|
41
|
+
# show marker for kw-only if not already implied by *args
|
|
42
|
+
if not vararg:
|
|
43
|
+
parts.append("*")
|
|
44
|
+
parts.extend(kwonly)
|
|
45
|
+
|
|
46
|
+
if kwarg:
|
|
47
|
+
parts.append(f"**{kwarg}")
|
|
48
|
+
|
|
49
|
+
return "(" + ", ".join(parts) + ")"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _return_phrase(ret_info: Optional[dict]) -> str:
|
|
53
|
+
if not ret_info:
|
|
54
|
+
return "Returns: unknown."
|
|
55
|
+
|
|
56
|
+
kinds = ret_info.get("return_kinds", [])
|
|
57
|
+
if kinds == ["none"]:
|
|
58
|
+
return "Returns: None."
|
|
59
|
+
if "call" in kinds:
|
|
60
|
+
return "Returns: result of a call."
|
|
61
|
+
if "expression" in kinds:
|
|
62
|
+
return "Returns: computed value."
|
|
63
|
+
if "name" in kinds:
|
|
64
|
+
return "Returns: a variable value."
|
|
65
|
+
if "constant" in kinds:
|
|
66
|
+
return "Returns: a constant."
|
|
67
|
+
if "attribute" in kinds:
|
|
68
|
+
return "Returns: an attribute value."
|
|
69
|
+
return "Returns: mixed/complex."
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _tags_from_callees(callee_fqns: List[str]) -> List[str]:
|
|
73
|
+
tags: List[str] = []
|
|
74
|
+
if any(c == "builtins.print" for c in callee_fqns):
|
|
75
|
+
tags.append("io:print")
|
|
76
|
+
if any(c == "builtins.open" for c in callee_fqns):
|
|
77
|
+
tags.append("io:file")
|
|
78
|
+
if any("read" in c.lower() or "load" in c.lower() for c in callee_fqns):
|
|
79
|
+
tags.append("io:read")
|
|
80
|
+
if any("write" in c.lower() or "save" in c.lower() for c in callee_fqns):
|
|
81
|
+
tags.append("io:write")
|
|
82
|
+
return tags
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _analyze_method_behavior(symbol_info: SymbolInfo, callee_fqns: List[str]) -> str:
|
|
86
|
+
"""Analyze what a method does based on its callees and name patterns."""
|
|
87
|
+
name = symbol_info.name.lower()
|
|
88
|
+
|
|
89
|
+
# Check for initialization patterns
|
|
90
|
+
if name in ("__init__", "init", "initialize", "setup"):
|
|
91
|
+
return "initializes the object"
|
|
92
|
+
|
|
93
|
+
# Check for display/output patterns
|
|
94
|
+
if any(word in name for word in ["display", "show", "print", "render"]):
|
|
95
|
+
if any("print" in c for c in callee_fqns):
|
|
96
|
+
return "displays information to console"
|
|
97
|
+
return "displays information"
|
|
98
|
+
|
|
99
|
+
# Check for data setting patterns
|
|
100
|
+
if any(word in name for word in ["set", "update", "assign", "configure"]):
|
|
101
|
+
return "sets or updates internal state"
|
|
102
|
+
|
|
103
|
+
# Check for data retrieval patterns
|
|
104
|
+
if any(word in name for word in ["get", "fetch", "retrieve", "load"]):
|
|
105
|
+
return "retrieves or loads data"
|
|
106
|
+
|
|
107
|
+
# Check for validation patterns
|
|
108
|
+
if any(word in name for word in ["validate", "check", "verify", "test"]):
|
|
109
|
+
return "validates or checks conditions"
|
|
110
|
+
|
|
111
|
+
# Check for computation patterns
|
|
112
|
+
if any(word in name for word in ["calculate", "compute", "process", "transform"]):
|
|
113
|
+
return "performs calculations or transformations"
|
|
114
|
+
|
|
115
|
+
# Check based on callees
|
|
116
|
+
if callee_fqns:
|
|
117
|
+
if any("display" in c.lower() or "show" in c.lower() for c in callee_fqns):
|
|
118
|
+
return "orchestrates display operations"
|
|
119
|
+
if any("save" in c.lower() or "write" in c.lower() for c in callee_fqns):
|
|
120
|
+
return "saves or persists data"
|
|
121
|
+
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def generate_symbol_summary(
|
|
126
|
+
symbol_fqn: str,
|
|
127
|
+
symbol_info: SymbolInfo,
|
|
128
|
+
docstrings: dict,
|
|
129
|
+
signatures: dict,
|
|
130
|
+
returns: dict,
|
|
131
|
+
callgraph: CallGraphIndex
|
|
132
|
+
) -> Dict[str, Any]:
|
|
133
|
+
"""
|
|
134
|
+
Generate a heuristic summary for one symbol.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
# -------- Docstring lookup --------
|
|
138
|
+
doc: Optional[str] = None
|
|
139
|
+
if symbol_info.kind.value in ("method",):
|
|
140
|
+
# methods dict uses "Class.method"
|
|
141
|
+
key = symbol_info.qualified_name
|
|
142
|
+
doc = docstrings.get("methods", {}).get(key)
|
|
143
|
+
elif symbol_info.kind.value in ("function",):
|
|
144
|
+
key = symbol_info.name
|
|
145
|
+
doc = docstrings.get("functions", {}).get(key)
|
|
146
|
+
elif symbol_info.kind.value in ("class",):
|
|
147
|
+
key = symbol_info.name
|
|
148
|
+
doc = docstrings.get("classes", {}).get(key)
|
|
149
|
+
|
|
150
|
+
doc_first = _first_line(doc)
|
|
151
|
+
|
|
152
|
+
# -------- Signature lookup --------
|
|
153
|
+
sig: Optional[dict] = None
|
|
154
|
+
if symbol_info.kind.value == "method":
|
|
155
|
+
sig = signatures.get("methods", {}).get(symbol_info.qualified_name)
|
|
156
|
+
elif symbol_info.kind.value == "function":
|
|
157
|
+
sig = signatures.get("functions", {}).get(symbol_info.name)
|
|
158
|
+
|
|
159
|
+
args_str = _format_args(sig)
|
|
160
|
+
|
|
161
|
+
# -------- Returns lookup --------
|
|
162
|
+
ret_info: Optional[dict] = None
|
|
163
|
+
if symbol_info.kind.value == "method":
|
|
164
|
+
ret_info = returns.get("methods", {}).get(symbol_info.qualified_name)
|
|
165
|
+
elif symbol_info.kind.value == "function":
|
|
166
|
+
ret_info = returns.get("functions", {}).get(symbol_info.name)
|
|
167
|
+
|
|
168
|
+
# -------- Callgraph lookup --------
|
|
169
|
+
callees_sites = callgraph.callees_of(symbol_fqn)
|
|
170
|
+
callers_sites = callgraph.callers_of(symbol_fqn)
|
|
171
|
+
|
|
172
|
+
callee_fqns = [cs.callee_fqn for cs in callees_sites if cs.callee_fqn]
|
|
173
|
+
caller_fqns = [cs.caller_fqn for cs in callers_sites]
|
|
174
|
+
|
|
175
|
+
callee_counts = Counter(callee_fqns)
|
|
176
|
+
|
|
177
|
+
# -------- One-liner --------
|
|
178
|
+
if doc_first:
|
|
179
|
+
one_liner = doc_first
|
|
180
|
+
else:
|
|
181
|
+
# Try to infer behavior from method analysis
|
|
182
|
+
behavior = _analyze_method_behavior(symbol_info, callee_fqns)
|
|
183
|
+
|
|
184
|
+
if behavior:
|
|
185
|
+
one_liner = f"{symbol_info.name}{args_str} {behavior}."
|
|
186
|
+
else:
|
|
187
|
+
# Heuristic verb phrase from name
|
|
188
|
+
name_phrase = _humanize_name(symbol_info.name)
|
|
189
|
+
if symbol_info.name.startswith("get_"):
|
|
190
|
+
verb = "gets"
|
|
191
|
+
rest = _humanize_name(symbol_info.name[4:])
|
|
192
|
+
one_liner = f"{symbol_info.name}{args_str} {verb} {rest}."
|
|
193
|
+
elif symbol_info.name.startswith("set_"):
|
|
194
|
+
verb = "sets"
|
|
195
|
+
rest = _humanize_name(symbol_info.name[4:])
|
|
196
|
+
one_liner = f"{symbol_info.name}{args_str} {verb} {rest}."
|
|
197
|
+
elif symbol_info.name.startswith("load_"):
|
|
198
|
+
verb = "loads"
|
|
199
|
+
rest = _humanize_name(symbol_info.name[5:])
|
|
200
|
+
one_liner = f"{symbol_info.name}{args_str} {verb} {rest}."
|
|
201
|
+
elif symbol_info.name.startswith("save_"):
|
|
202
|
+
verb = "saves"
|
|
203
|
+
rest = _humanize_name(symbol_info.name[5:])
|
|
204
|
+
one_liner = f"{symbol_info.name}{args_str} {verb} {rest}."
|
|
205
|
+
elif symbol_info.name.startswith("build_"):
|
|
206
|
+
verb = "builds"
|
|
207
|
+
rest = _humanize_name(symbol_info.name[6:])
|
|
208
|
+
one_liner = f"{symbol_info.name}{args_str} {verb} {rest}."
|
|
209
|
+
else:
|
|
210
|
+
one_liner = f"{symbol_info.name}{args_str} does work related to '{name_phrase}'."
|
|
211
|
+
|
|
212
|
+
# -------- Details --------
|
|
213
|
+
details: List[str] = []
|
|
214
|
+
|
|
215
|
+
# Location info
|
|
216
|
+
if symbol_info.file_path and symbol_info.start_line > 0:
|
|
217
|
+
details.append(
|
|
218
|
+
f"Defined in {symbol_info.file_path}:{symbol_info.start_line}-{symbol_info.end_line}"
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Signature details
|
|
222
|
+
if sig:
|
|
223
|
+
sig_parts = []
|
|
224
|
+
if sig.get("args"):
|
|
225
|
+
sig_parts.append(f"Parameters: {', '.join(sig['args'])}")
|
|
226
|
+
if sig.get("vararg"):
|
|
227
|
+
sig_parts.append(f"*args: {sig['vararg']}")
|
|
228
|
+
if sig.get("kwonlyargs"):
|
|
229
|
+
sig_parts.append(f"Keyword-only: {', '.join(sig['kwonlyargs'])}")
|
|
230
|
+
if sig.get("kwarg"):
|
|
231
|
+
sig_parts.append(f"**kwargs: {sig['kwarg']}")
|
|
232
|
+
if sig_parts:
|
|
233
|
+
details.append("Signature: " + "; ".join(sig_parts))
|
|
234
|
+
|
|
235
|
+
# Caller info
|
|
236
|
+
if caller_fqns:
|
|
237
|
+
unique_callers = sorted(set(caller_fqns))
|
|
238
|
+
if len(unique_callers) <= 3:
|
|
239
|
+
details.append("Called by: " + ", ".join(unique_callers))
|
|
240
|
+
else:
|
|
241
|
+
details.append(f"Called by: {', '.join(unique_callers[:3])} and {len(unique_callers) - 3} more")
|
|
242
|
+
else:
|
|
243
|
+
details.append("Called by: (no callers found)")
|
|
244
|
+
|
|
245
|
+
# Callee info with better formatting
|
|
246
|
+
if callee_counts:
|
|
247
|
+
calls_list = []
|
|
248
|
+
for name, cnt in callee_counts.most_common(8):
|
|
249
|
+
short_name = name.split(".")[-1] if "." in name else name
|
|
250
|
+
if cnt > 1:
|
|
251
|
+
calls_list.append(f"{short_name}() x{cnt}")
|
|
252
|
+
else:
|
|
253
|
+
calls_list.append(f"{short_name}()")
|
|
254
|
+
details.append("Calls: " + ", ".join(calls_list))
|
|
255
|
+
else:
|
|
256
|
+
details.append("Calls: (no callees found)")
|
|
257
|
+
|
|
258
|
+
# Return info
|
|
259
|
+
ret_phrase = _return_phrase(ret_info)
|
|
260
|
+
if ret_info and ret_info.get("examples"):
|
|
261
|
+
examples = ret_info["examples"][:2]
|
|
262
|
+
ret_phrase += f" Examples: {', '.join(examples)}"
|
|
263
|
+
details.append(ret_phrase)
|
|
264
|
+
|
|
265
|
+
tags = _tags_from_callees(callee_fqns)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
location = {
|
|
269
|
+
"file": symbol_info.file_path,
|
|
270
|
+
"start_line": symbol_info.start_line,
|
|
271
|
+
"end_line": symbol_info.end_line,
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
"fqn": symbol_fqn,
|
|
278
|
+
"one_liner": one_liner,
|
|
279
|
+
"details": details,
|
|
280
|
+
"tags": tags,
|
|
281
|
+
"location":location,
|
|
282
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Graph package - Call graph indexing and analysis
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Build caller->callees and reverse index
|
|
2
|
+
# analysis/graph/callgraph_index.py
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
from collections import Counter
|
|
10
|
+
from typing import Dict, List, Optional, Set, Any
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class CallSite:
|
|
15
|
+
caller_fqn: str
|
|
16
|
+
callee_fqn: Optional[str]
|
|
17
|
+
callee_name: str
|
|
18
|
+
file: str
|
|
19
|
+
line: int
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class CallGraphIndex:
|
|
23
|
+
"""Stores forward/reverse call indexes and unresolved call list."""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self._forward: Dict[str, List[CallSite]] = {}
|
|
27
|
+
self._reverse: Dict[str, List[CallSite]] = {}
|
|
28
|
+
self._unresolved: List[CallSite] = []
|
|
29
|
+
|
|
30
|
+
def add_call(self, callsite: CallSite) -> None:
|
|
31
|
+
self._forward.setdefault(callsite.caller_fqn, []).append(callsite)
|
|
32
|
+
if callsite.callee_fqn:
|
|
33
|
+
self._reverse.setdefault(callsite.callee_fqn, []).append(callsite)
|
|
34
|
+
else:
|
|
35
|
+
self._unresolved.append(callsite)
|
|
36
|
+
|
|
37
|
+
def callees_of(self, caller_fqn: str) -> List[CallSite]:
|
|
38
|
+
return self._forward.get(caller_fqn, [])
|
|
39
|
+
|
|
40
|
+
def callers_of(self, callee_fqn: str) -> List[CallSite]:
|
|
41
|
+
return self._reverse.get(callee_fqn, [])
|
|
42
|
+
|
|
43
|
+
def unresolved_calls(self) -> List[CallSite]:
|
|
44
|
+
return list(self._unresolved)
|
|
45
|
+
|
|
46
|
+
def all_callers(self) -> List[str]:
|
|
47
|
+
return sorted(self._forward.keys())
|
|
48
|
+
|
|
49
|
+
def all_callees(self) -> List[str]:
|
|
50
|
+
return sorted(self._reverse.keys())
|
|
51
|
+
|
|
52
|
+
def stats(self) -> dict:
|
|
53
|
+
return {
|
|
54
|
+
"unique_callers": len(self._forward),
|
|
55
|
+
"unique_callees": len(self._reverse),
|
|
56
|
+
"unresolved_calls": len(self._unresolved),
|
|
57
|
+
"total_calls": sum(len(v) for v in self._forward.values()),
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def build_caller_fqn(call: dict, current_module: str) -> str:
|
|
62
|
+
caller = call.get("caller", "<unknown>")
|
|
63
|
+
cls = call.get("class")
|
|
64
|
+
if cls:
|
|
65
|
+
return f"{current_module}.{cls}.{caller}"
|
|
66
|
+
return f"{current_module}.{caller}"
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def write_hub_metrics_from_resolved_calls(resolved_calls_path: str, output_path: Optional[str] = None) -> Dict[str, Any]:
|
|
70
|
+
"""Compute simple repository call metrics from resolved_calls.json and optionally write to file."""
|
|
71
|
+
if not os.path.exists(resolved_calls_path):
|
|
72
|
+
raise FileNotFoundError(resolved_calls_path)
|
|
73
|
+
|
|
74
|
+
with open(resolved_calls_path, "r", encoding="utf-8") as f:
|
|
75
|
+
rows = json.load(f)
|
|
76
|
+
if not isinstance(rows, list):
|
|
77
|
+
rows = []
|
|
78
|
+
|
|
79
|
+
fan_in: Counter[str] = Counter()
|
|
80
|
+
fan_out: Counter[str] = Counter()
|
|
81
|
+
files: Set[str] = set()
|
|
82
|
+
unresolved = 0
|
|
83
|
+
|
|
84
|
+
for row in rows:
|
|
85
|
+
if not isinstance(row, dict):
|
|
86
|
+
continue
|
|
87
|
+
caller = str(row.get("caller_fqn", "") or "")
|
|
88
|
+
callee = str(row.get("callee_fqn", "") or "")
|
|
89
|
+
file_path = str(row.get("file", "") or "")
|
|
90
|
+
if file_path:
|
|
91
|
+
files.add(file_path)
|
|
92
|
+
|
|
93
|
+
if caller:
|
|
94
|
+
fan_out[caller] += 1
|
|
95
|
+
if callee:
|
|
96
|
+
fan_in[callee] += 1
|
|
97
|
+
else:
|
|
98
|
+
unresolved += 1
|
|
99
|
+
|
|
100
|
+
critical_apis = [{"fqn": fqn, "fan_in": cnt} for fqn, cnt in fan_in.most_common(25)]
|
|
101
|
+
orchestrators = [{"fqn": fqn, "fan_out": cnt} for fqn, cnt in fan_out.most_common(25)]
|
|
102
|
+
|
|
103
|
+
payload: Dict[str, Any] = {
|
|
104
|
+
"ok": True,
|
|
105
|
+
"total_calls": len(rows),
|
|
106
|
+
"unresolved_calls": unresolved,
|
|
107
|
+
"total_files": len(files),
|
|
108
|
+
"critical_apis": critical_apis,
|
|
109
|
+
"orchestrators": orchestrators,
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if output_path:
|
|
113
|
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
114
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
115
|
+
json.dump(payload, f, indent=2)
|
|
116
|
+
|
|
117
|
+
return payload
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# __main__, click/typer/argparse, etc.
|