codemap-python 0.1.4__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codemap_python-0.1.4 → codemap_python-0.1.6}/PKG-INFO +1 -1
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/architecture/architecture_engine.py +5 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/call_extractor.py +8 -5
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/import_extractor.py +40 -36
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/explain_runner.py +42 -34
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/summary_generator.py +8 -5
- codemap_python-0.1.6/analysis/graph/entrypoint_detector.py +217 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/runners/phase4_runner.py +35 -33
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/cache_manager.py +26 -21
- codemap_python-0.1.6/analysis/utils/repo_walk.py +27 -0
- codemap_python-0.1.4/codemap_cli.py → codemap_python-0.1.6/cli.py +11 -11
- codemap_python-0.1.4/cli.py → codemap_python-0.1.6/codemap_app.py +209 -144
- codemap_python-0.1.6/codemap_cli.py +11 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/PKG-INFO +1 -1
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/SOURCES.txt +7 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/top_level.txt +1 -1
- {codemap_python-0.1.4 → codemap_python-0.1.6}/pyproject.toml +2 -2
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_cache_cli_commands.py +35 -30
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_cli_invalid_escape_warnings.py +1 -1
- codemap_python-0.1.6/tests/test_codemap_cli_entrypoint.py +12 -0
- codemap_python-0.1.6/tests/test_entrypoint_detector.py +60 -0
- codemap_python-0.1.6/tests/test_explain_runner_collection.py +37 -0
- codemap_python-0.1.6/tests/test_repo_walk_filters.py +57 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_security_cli_integration.py +6 -6
- codemap_python-0.1.6/tests/test_summary_generator.py +12 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/app.py +87 -81
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/static/app.js +909 -393
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/static/styles.css +613 -103
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/templates/index.html +73 -57
- codemap_python-0.1.4/analysis/graph/entrypoint_detector.py +0 -1
- {codemap_python-0.1.4 → codemap_python-0.1.6}/README.md +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/architecture/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/architecture/dependency_cycles.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/architecture/risk_radar.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/call_graph_builder.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/call_resolver.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/context_models.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/cross_file_resolver.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/execution_tracker.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/flow_builder.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/call_graph/models.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/ast_context.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/ast_parser.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/class_extractor.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/core/function_extractor.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/docstring_extractor.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/repo_summary_generator.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/return_analyzer.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/risk_flags.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/explain/signature_extractor.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/graph/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/graph/callgraph_index.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/graph/impact_analyzer.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/indexing/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/indexing/import_resolver.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/indexing/symbol_index.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/runners/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/ast_helpers.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/bom_handler.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/path_resolver.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/progress_spinner.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/analysis/utils/repo_fetcher.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/dependency_links.txt +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/entry_points.txt +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/codemap_python.egg-info/requires.txt +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/security_utils.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/setup.cfg +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_cache_retention.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_no_key_persistence.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_registry_session_mode.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_security_redaction.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_symbol_explain_cache.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_symbol_info_endpoint.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_ui_private_mode_security.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/tests/test_ui_retention_controls.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/device_id.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/utils/__init__.py +0 -0
- {codemap_python-0.1.4 → codemap_python-0.1.6}/ui/utils/registry_manager.py +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from typing import Dict, Any, Optional, Set
|
|
5
|
+
from analysis.graph.entrypoint_detector import detect_entry_points
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
def _kind_for_fqn(fqn: str, repo_prefix: str) -> str:
|
|
@@ -52,6 +53,7 @@ def _infer_repo_prefix(nodes: Set[str]) -> str:
|
|
|
52
53
|
def compute_architecture_metrics(
|
|
53
54
|
callgraph,
|
|
54
55
|
symbol_index,
|
|
56
|
+
repo_dir: Optional[str] = None,
|
|
55
57
|
repo_prefix: Optional[str] = None,
|
|
56
58
|
top_k: int = 25,
|
|
57
59
|
fanout_threshold: int = 10,
|
|
@@ -134,6 +136,8 @@ def compute_architecture_metrics(
|
|
|
134
136
|
"edges": int(edges_per_file.get(fp, 0)),
|
|
135
137
|
}
|
|
136
138
|
|
|
139
|
+
entry_points = detect_entry_points(repo_dir=repo_dir, repo_prefix=prefix) if repo_dir else []
|
|
140
|
+
|
|
137
141
|
return {
|
|
138
142
|
"ok": True,
|
|
139
143
|
"repo_prefix": prefix,
|
|
@@ -142,6 +146,7 @@ def compute_architecture_metrics(
|
|
|
142
146
|
"dead_symbols": sorted(dead_symbols),
|
|
143
147
|
"orchestrators": sorted(orchestrators),
|
|
144
148
|
"critical_symbols": sorted(critical),
|
|
149
|
+
"entry_points": entry_points,
|
|
145
150
|
"top_fan_in": top_fan_in,
|
|
146
151
|
"top_fan_out": top_fan_out,
|
|
147
152
|
},
|
|
@@ -85,8 +85,11 @@ class FunctionCallVisitor(ast.NodeVisitor):
|
|
|
85
85
|
def extract_function_calls(file_path):
|
|
86
86
|
source = read_source_file(file_path)
|
|
87
87
|
tree = parse_source_to_ast(source, file_path=file_path)
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
88
|
+
return extract_function_calls_from_tree(tree, file_path)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def extract_function_calls_from_tree(tree, file_path):
|
|
92
|
+
visitor = FunctionCallVisitor(file_path)
|
|
93
|
+
visitor.visit(tree)
|
|
94
|
+
|
|
95
|
+
return visitor.calls
|
|
@@ -3,43 +3,47 @@
|
|
|
3
3
|
|
|
4
4
|
import ast
|
|
5
5
|
from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_imports_from_tree(tree, file_path):
|
|
9
|
+
"""Extract imports from an already-parsed AST tree."""
|
|
10
|
+
imports = []
|
|
11
|
+
|
|
12
|
+
for node in ast.walk(tree):
|
|
13
|
+
|
|
14
|
+
# import module
|
|
15
|
+
if isinstance(node, ast.Import):
|
|
16
|
+
for alias in node.names:
|
|
17
|
+
imports.append({
|
|
18
|
+
"type": "import",
|
|
19
|
+
"module": alias.name,
|
|
20
|
+
"name": None,
|
|
21
|
+
"alias": alias.asname,
|
|
22
|
+
"line": node.lineno,
|
|
23
|
+
"file": file_path
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
# from module import name
|
|
27
|
+
elif isinstance(node, ast.ImportFrom):
|
|
28
|
+
module = node.module
|
|
29
|
+
level = node.level # 0 = absolute, >0 = relative
|
|
30
|
+
|
|
31
|
+
for alias in node.names:
|
|
32
|
+
imports.append({
|
|
33
|
+
"type": "from_import",
|
|
34
|
+
"module": module,
|
|
35
|
+
"name": alias.name,
|
|
36
|
+
"alias": alias.asname,
|
|
37
|
+
"level": level,
|
|
38
|
+
"line": node.lineno,
|
|
39
|
+
"file": file_path
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
return imports
|
|
43
|
+
|
|
44
|
+
|
|
8
45
|
def extract_imports(file_path):
|
|
9
46
|
"""Extract imports from a Python file with automatic encoding and BOM handling."""
|
|
10
47
|
source = read_source_file(file_path)
|
|
11
48
|
tree = parse_source_to_ast(source, file_path=file_path)
|
|
12
|
-
|
|
13
|
-
imports = []
|
|
14
|
-
|
|
15
|
-
for node in ast.walk(tree):
|
|
16
|
-
|
|
17
|
-
# import module
|
|
18
|
-
if isinstance(node, ast.Import):
|
|
19
|
-
for alias in node.names:
|
|
20
|
-
imports.append({
|
|
21
|
-
"type": "import",
|
|
22
|
-
"module": alias.name,
|
|
23
|
-
"name": None,
|
|
24
|
-
"alias": alias.asname,
|
|
25
|
-
"line": node.lineno,
|
|
26
|
-
"file": file_path
|
|
27
|
-
})
|
|
28
|
-
|
|
29
|
-
# from module import name
|
|
30
|
-
elif isinstance(node, ast.ImportFrom):
|
|
31
|
-
module = node.module
|
|
32
|
-
level = node.level # 0 = absolute, >0 = relative
|
|
33
|
-
|
|
34
|
-
for alias in node.names:
|
|
35
|
-
imports.append({
|
|
36
|
-
"type": "from_import",
|
|
37
|
-
"module": module,
|
|
38
|
-
"name": alias.name,
|
|
39
|
-
"alias": alias.asname,
|
|
40
|
-
"level": level,
|
|
41
|
-
"line": node.lineno,
|
|
42
|
-
"file": file_path
|
|
43
|
-
})
|
|
44
|
-
|
|
45
|
-
return imports
|
|
49
|
+
return extract_imports_from_tree(tree, file_path)
|
|
@@ -7,22 +7,24 @@ from typing import Optional, Dict, Any
|
|
|
7
7
|
|
|
8
8
|
import json
|
|
9
9
|
import os
|
|
10
|
-
|
|
11
|
-
from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
|
|
12
|
-
from analysis.graph.callgraph_index import CallGraphIndex, CallSite
|
|
13
|
-
from analysis.explain.docstring_extractor import extract_docstrings
|
|
14
|
-
from analysis.explain.signature_extractor import extract_signatures
|
|
15
|
-
from analysis.explain.return_analyzer import analyze_returns
|
|
16
|
-
from analysis.explain.summary_generator import generate_symbol_summary
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
10
|
+
|
|
11
|
+
from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
|
|
12
|
+
from analysis.graph.callgraph_index import CallGraphIndex, CallSite
|
|
13
|
+
from analysis.explain.docstring_extractor import extract_docstrings
|
|
14
|
+
from analysis.explain.signature_extractor import extract_signatures
|
|
15
|
+
from analysis.explain.return_analyzer import analyze_returns
|
|
16
|
+
from analysis.explain.summary_generator import generate_symbol_summary
|
|
17
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def collect_python_files(root_dir: str):
|
|
21
|
+
py_files = []
|
|
22
|
+
for root, dirs, files in os.walk(root_dir):
|
|
23
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
24
|
+
for file in files:
|
|
25
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
26
|
+
py_files.append(os.path.join(root, file))
|
|
27
|
+
return py_files
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
def parse_ast(file_path: str):
|
|
@@ -78,7 +80,11 @@ def merge_maps(dst: dict, src: dict):
|
|
|
78
80
|
dst[k].update(src.get(k, {}))
|
|
79
81
|
|
|
80
82
|
|
|
81
|
-
def run(
|
|
83
|
+
def run(
|
|
84
|
+
repo_dir: Optional[str] = None,
|
|
85
|
+
output_dir: Optional[str] = None,
|
|
86
|
+
symbol_snapshot: Optional[list] = None,
|
|
87
|
+
) -> Dict[str, Any]:
|
|
82
88
|
"""
|
|
83
89
|
Callable explain pipeline (Phase-5/6), suitable for CLI/VS Code.
|
|
84
90
|
|
|
@@ -114,23 +120,25 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dic
|
|
|
114
120
|
# 2) Collect repo python files
|
|
115
121
|
python_files = collect_python_files(repo_dir)
|
|
116
122
|
|
|
117
|
-
# 3) Build symbol index + extractors across repo
|
|
118
|
-
symbol_index = SymbolIndex()
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
123
|
+
# 3) Build symbol index + extractors across repo
|
|
124
|
+
symbol_index = SymbolIndex()
|
|
125
|
+
loaded_snapshot = False
|
|
126
|
+
if isinstance(symbol_snapshot, list) and symbol_snapshot:
|
|
127
|
+
symbol_index.load_snapshot(symbol_snapshot)
|
|
128
|
+
loaded_snapshot = True
|
|
129
|
+
|
|
130
|
+
repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
|
|
131
|
+
repo_signatures = {"functions": {}, "methods": {}}
|
|
132
|
+
repo_returns = {"functions": {}, "methods": {}}
|
|
133
|
+
|
|
134
|
+
for file_path in python_files:
|
|
135
|
+
tree = parse_ast(file_path)
|
|
136
|
+
if not loaded_snapshot:
|
|
137
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
138
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
139
|
+
|
|
140
|
+
# extract per-file and merge
|
|
141
|
+
merge_maps(repo_docstrings, extract_docstrings(tree))
|
|
134
142
|
|
|
135
143
|
sigs = extract_signatures(tree)
|
|
136
144
|
repo_signatures["functions"].update(sigs.get("functions", {}))
|
|
@@ -11,11 +11,14 @@ from analysis.indexing.symbol_index import SymbolInfo
|
|
|
11
11
|
from analysis.graph.callgraph_index import CallGraphIndex
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def _first_line(text: Optional[str]) -> Optional[str]:
|
|
15
|
-
if not text:
|
|
16
|
-
return None
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
def _first_line(text: Optional[str]) -> Optional[str]:
|
|
15
|
+
if not text:
|
|
16
|
+
return None
|
|
17
|
+
stripped = text.strip()
|
|
18
|
+
if not stripped:
|
|
19
|
+
return None
|
|
20
|
+
line = stripped.splitlines()[0].strip()
|
|
21
|
+
return line or None
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
def _humanize_name(name: str) -> str:
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
6
|
+
|
|
7
|
+
from analysis.utils.bom_handler import read_and_parse_python_file
|
|
8
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
_ROUTE_DECORATORS = {
|
|
12
|
+
"get",
|
|
13
|
+
"post",
|
|
14
|
+
"put",
|
|
15
|
+
"delete",
|
|
16
|
+
"patch",
|
|
17
|
+
"options",
|
|
18
|
+
"head",
|
|
19
|
+
"websocket",
|
|
20
|
+
"route",
|
|
21
|
+
"api_route",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
_CLI_DECORATOR_SUFFIXES = {
|
|
25
|
+
".command",
|
|
26
|
+
".group",
|
|
27
|
+
".callback",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _collect_python_files(repo_dir: str) -> List[str]:
|
|
32
|
+
py_files: List[str] = []
|
|
33
|
+
for root, dirs, files in os.walk(repo_dir):
|
|
34
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
35
|
+
for file_name in files:
|
|
36
|
+
if not file_name.endswith(".py"):
|
|
37
|
+
continue
|
|
38
|
+
if file_name.startswith("__") and file_name != "__main__.py":
|
|
39
|
+
continue
|
|
40
|
+
py_files.append(os.path.join(root, file_name))
|
|
41
|
+
return sorted(py_files)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _file_to_module(file_path: str, repo_root: str, repo_prefix: str) -> str:
|
|
45
|
+
rel = os.path.relpath(os.path.abspath(file_path), os.path.abspath(repo_root)).replace(os.sep, ".")
|
|
46
|
+
if rel.endswith(".py"):
|
|
47
|
+
rel = rel[:-3]
|
|
48
|
+
prefix = str(repo_prefix or os.path.basename(os.path.abspath(repo_root).rstrip("\\/"))).strip()
|
|
49
|
+
return f"{prefix}.{rel}" if prefix else rel
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _dotted_name(node: ast.AST) -> str:
|
|
53
|
+
if isinstance(node, ast.Call):
|
|
54
|
+
return _dotted_name(node.func)
|
|
55
|
+
if isinstance(node, ast.Name):
|
|
56
|
+
return node.id
|
|
57
|
+
if isinstance(node, ast.Attribute):
|
|
58
|
+
parent = _dotted_name(node.value)
|
|
59
|
+
return f"{parent}.{node.attr}" if parent else node.attr
|
|
60
|
+
return ""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _str_constant(node: Optional[ast.AST]) -> str:
|
|
64
|
+
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
|
65
|
+
return node.value
|
|
66
|
+
return ""
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _main_guard(node: ast.AST) -> bool:
|
|
70
|
+
if not isinstance(node, ast.If):
|
|
71
|
+
return False
|
|
72
|
+
test = node.test
|
|
73
|
+
if not isinstance(test, ast.Compare) or len(test.ops) != 1 or len(test.comparators) != 1:
|
|
74
|
+
return False
|
|
75
|
+
left = test.left
|
|
76
|
+
right = test.comparators[0]
|
|
77
|
+
if not isinstance(test.ops[0], ast.Eq):
|
|
78
|
+
return False
|
|
79
|
+
return (
|
|
80
|
+
isinstance(left, ast.Name)
|
|
81
|
+
and left.id == "__name__"
|
|
82
|
+
and isinstance(right, ast.Constant)
|
|
83
|
+
and right.value == "__main__"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class _EntryPointVisitor(ast.NodeVisitor):
|
|
88
|
+
def __init__(self, module: str, file_path: str):
|
|
89
|
+
self.module = module
|
|
90
|
+
self.file_path = file_path
|
|
91
|
+
self.class_stack: List[str] = []
|
|
92
|
+
self.entries: List[Dict[str, Any]] = []
|
|
93
|
+
self._seen: Set[Tuple[str, str, int]] = set()
|
|
94
|
+
|
|
95
|
+
def _fqn_for(self, name: str) -> str:
|
|
96
|
+
if self.class_stack:
|
|
97
|
+
return f"{self.module}.{'.'.join(self.class_stack)}.{name}"
|
|
98
|
+
return f"{self.module}.{name}"
|
|
99
|
+
|
|
100
|
+
def _add_entry(
|
|
101
|
+
self,
|
|
102
|
+
*,
|
|
103
|
+
kind: str,
|
|
104
|
+
title: str,
|
|
105
|
+
reason: str,
|
|
106
|
+
line: int,
|
|
107
|
+
fqn: Optional[str] = None,
|
|
108
|
+
) -> None:
|
|
109
|
+
key = (str(kind), str(fqn or title), int(line or 1))
|
|
110
|
+
if key in self._seen:
|
|
111
|
+
return
|
|
112
|
+
self._seen.add(key)
|
|
113
|
+
self.entries.append(
|
|
114
|
+
{
|
|
115
|
+
"kind": kind,
|
|
116
|
+
"title": title,
|
|
117
|
+
"reason": reason,
|
|
118
|
+
"fqn": fqn or "",
|
|
119
|
+
"file": self.file_path,
|
|
120
|
+
"line": int(line or 1),
|
|
121
|
+
}
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _record_function_entrypoints(self, node: ast.AST, name: str, decorators: List[ast.AST]) -> None:
|
|
125
|
+
fqn = self._fqn_for(name)
|
|
126
|
+
for decorator in decorators:
|
|
127
|
+
dotted = _dotted_name(decorator)
|
|
128
|
+
if not dotted:
|
|
129
|
+
continue
|
|
130
|
+
last = dotted.split(".")[-1].lower()
|
|
131
|
+
if last in _ROUTE_DECORATORS:
|
|
132
|
+
path = ""
|
|
133
|
+
if isinstance(decorator, ast.Call) and decorator.args:
|
|
134
|
+
path = _str_constant(decorator.args[0])
|
|
135
|
+
method_label = "Web route" if last in {"route", "api_route"} else last.upper()
|
|
136
|
+
title = f"{method_label} {path}".strip()
|
|
137
|
+
reason = "This function looks like a web request entry point."
|
|
138
|
+
self._add_entry(
|
|
139
|
+
kind="api_route",
|
|
140
|
+
title=title,
|
|
141
|
+
reason=reason,
|
|
142
|
+
line=getattr(node, "lineno", 1),
|
|
143
|
+
fqn=fqn,
|
|
144
|
+
)
|
|
145
|
+
if any(dotted.endswith(suffix) for suffix in _CLI_DECORATOR_SUFFIXES) or dotted.startswith("click.") or dotted.startswith("typer."):
|
|
146
|
+
self._add_entry(
|
|
147
|
+
kind="cli_command",
|
|
148
|
+
title=f"CLI command: {name}",
|
|
149
|
+
reason="This function looks like a command-line entry point.",
|
|
150
|
+
line=getattr(node, "lineno", 1),
|
|
151
|
+
fqn=fqn,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if not self.class_stack and name == "main":
|
|
155
|
+
self._add_entry(
|
|
156
|
+
kind="script_start",
|
|
157
|
+
title="Script start: main()",
|
|
158
|
+
reason="This is a common starting function for running the file directly.",
|
|
159
|
+
line=getattr(node, "lineno", 1),
|
|
160
|
+
fqn=fqn,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
164
|
+
self.class_stack.append(node.name)
|
|
165
|
+
self.generic_visit(node)
|
|
166
|
+
self.class_stack.pop()
|
|
167
|
+
|
|
168
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
169
|
+
self._record_function_entrypoints(node, node.name, list(node.decorator_list or []))
|
|
170
|
+
self.generic_visit(node)
|
|
171
|
+
|
|
172
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
173
|
+
self._record_function_entrypoints(node, node.name, list(node.decorator_list or []))
|
|
174
|
+
self.generic_visit(node)
|
|
175
|
+
|
|
176
|
+
def visit_If(self, node: ast.If) -> None:
|
|
177
|
+
if _main_guard(node):
|
|
178
|
+
module_fqn = f"{self.module}.<module>"
|
|
179
|
+
self._add_entry(
|
|
180
|
+
kind="script_start",
|
|
181
|
+
title="Run this file directly",
|
|
182
|
+
reason="This file has a __main__ block, so it can start execution directly.",
|
|
183
|
+
line=getattr(node, "lineno", 1),
|
|
184
|
+
fqn=module_fqn,
|
|
185
|
+
)
|
|
186
|
+
for child in ast.walk(node):
|
|
187
|
+
if isinstance(child, ast.Call) and isinstance(child.func, ast.Name):
|
|
188
|
+
called_name = child.func.id
|
|
189
|
+
if called_name:
|
|
190
|
+
self._add_entry(
|
|
191
|
+
kind="script_start",
|
|
192
|
+
title=f"Script start: {called_name}()",
|
|
193
|
+
reason="This function is called from the file's __main__ block.",
|
|
194
|
+
line=getattr(child, "lineno", getattr(node, "lineno", 1)),
|
|
195
|
+
fqn=f"{self.module}.{called_name}",
|
|
196
|
+
)
|
|
197
|
+
self.generic_visit(node)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def detect_entry_points(repo_dir: str, repo_prefix: str = "") -> List[Dict[str, Any]]:
|
|
201
|
+
repo_root = os.path.abspath(repo_dir)
|
|
202
|
+
prefix = str(repo_prefix or os.path.basename(repo_root.rstrip("\\/"))).strip()
|
|
203
|
+
rows: List[Dict[str, Any]] = []
|
|
204
|
+
|
|
205
|
+
for file_path in _collect_python_files(repo_root):
|
|
206
|
+
try:
|
|
207
|
+
tree = read_and_parse_python_file(file_path)
|
|
208
|
+
except Exception:
|
|
209
|
+
continue
|
|
210
|
+
module = _file_to_module(file_path, repo_root, prefix)
|
|
211
|
+
visitor = _EntryPointVisitor(module=module, file_path=file_path)
|
|
212
|
+
visitor.visit(tree)
|
|
213
|
+
rows.extend(visitor.entries)
|
|
214
|
+
|
|
215
|
+
order = {"api_route": 0, "cli_command": 1, "script_start": 2}
|
|
216
|
+
rows.sort(key=lambda item: (order.get(str(item.get("kind", "")), 99), str(item.get("file", "")), int(item.get("line", 1))))
|
|
217
|
+
return rows[:50]
|
|
@@ -5,25 +5,25 @@ from typing import Optional, Dict, Any, List
|
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
7
|
import json
|
|
8
|
-
from analysis.indexing.symbol_index import SymbolIndex
|
|
9
|
-
from analysis.indexing.import_resolver import ImportResolver
|
|
10
|
-
from analysis.call_graph.cross_file_resolver import CrossFileResolver
|
|
11
|
-
from analysis.call_graph.call_extractor import
|
|
12
|
-
from analysis.core.import_extractor import
|
|
13
|
-
from analysis.graph.callgraph_index import build_caller_fqn
|
|
8
|
+
from analysis.indexing.symbol_index import SymbolIndex
|
|
9
|
+
from analysis.indexing.import_resolver import ImportResolver
|
|
10
|
+
from analysis.call_graph.cross_file_resolver import CrossFileResolver
|
|
11
|
+
from analysis.call_graph.call_extractor import extract_function_calls_from_tree
|
|
12
|
+
from analysis.core.import_extractor import extract_imports_from_tree
|
|
13
|
+
from analysis.graph.callgraph_index import build_caller_fqn
|
|
14
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
|
|
17
18
|
|
|
18
19
|
|
|
19
|
-
def collect_python_files(root_dir: str) -> List[str]:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
py_files.append(os.path.join(root, file))
|
|
20
|
+
def collect_python_files(root_dir: str) -> List[str]:
|
|
21
|
+
py_files: List[str] = []
|
|
22
|
+
for root, dirs, files in os.walk(root_dir):
|
|
23
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
24
|
+
for file in files:
|
|
25
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
26
|
+
py_files.append(os.path.join(root, file))
|
|
27
27
|
return py_files
|
|
28
28
|
|
|
29
29
|
|
|
@@ -75,25 +75,27 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_
|
|
|
75
75
|
|
|
76
76
|
os.makedirs(output_dir, exist_ok=True)
|
|
77
77
|
|
|
78
|
-
python_files = collect_python_files(repo_dir)
|
|
79
|
-
symbol_index = SymbolIndex()
|
|
80
|
-
file_module_map: Dict[str, str] = {}
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
78
|
+
python_files = collect_python_files(repo_dir)
|
|
79
|
+
symbol_index = SymbolIndex()
|
|
80
|
+
file_module_map: Dict[str, str] = {}
|
|
81
|
+
parsed_trees: Dict[str, Any] = {}
|
|
82
|
+
|
|
83
|
+
for file_path in python_files:
|
|
84
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
85
|
+
file_module_map[file_path] = module_path
|
|
86
|
+
tree = parse_ast(file_path)
|
|
87
|
+
parsed_trees[file_path] = tree
|
|
88
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
89
|
+
|
|
90
|
+
import_resolver = ImportResolver(symbol_index)
|
|
91
|
+
for file_path in python_files:
|
|
92
|
+
module_path = file_module_map[file_path]
|
|
93
|
+
imports = extract_imports_from_tree(parsed_trees[file_path], file_path)
|
|
94
|
+
import_resolver.index_module_imports(module_path, imports)
|
|
95
|
+
|
|
96
|
+
all_calls = []
|
|
97
|
+
for file_path in python_files:
|
|
98
|
+
all_calls.extend(extract_function_calls_from_tree(parsed_trees[file_path], file_path))
|
|
97
99
|
|
|
98
100
|
cross_resolver = CrossFileResolver(symbol_index, import_resolver)
|
|
99
101
|
resolved_calls = []
|
|
@@ -5,15 +5,15 @@ import json
|
|
|
5
5
|
import os
|
|
6
6
|
import shutil
|
|
7
7
|
import tempfile
|
|
8
|
-
from datetime import datetime, timezone
|
|
9
|
-
from threading import RLock
|
|
10
|
-
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
-
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from threading import RLock
|
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
13
|
+
from security_utils import redact_secrets
|
|
14
|
+
|
|
15
|
+
_LOCK = RLock()
|
|
16
|
+
_SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def _project_root() -> str:
|
|
@@ -195,14 +195,14 @@ def save_policy(policy: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[
|
|
|
195
195
|
|
|
196
196
|
def collect_fingerprints(repo_dir: str) -> Dict[str, Dict[str, int]]:
|
|
197
197
|
repo_root = os.path.abspath(repo_dir)
|
|
198
|
-
out: Dict[str, Dict[str, int]] = {}
|
|
199
|
-
if not os.path.isdir(repo_root):
|
|
200
|
-
return out
|
|
201
|
-
for root, dirs, files in os.walk(repo_root):
|
|
202
|
-
dirs[:] =
|
|
203
|
-
for name in files:
|
|
204
|
-
if not name.endswith(".py"):
|
|
205
|
-
continue
|
|
198
|
+
out: Dict[str, Dict[str, int]] = {}
|
|
199
|
+
if not os.path.isdir(repo_root):
|
|
200
|
+
return out
|
|
201
|
+
for root, dirs, files in os.walk(repo_root):
|
|
202
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
203
|
+
for name in files:
|
|
204
|
+
if not name.endswith(".py"):
|
|
205
|
+
continue
|
|
206
206
|
fp = os.path.join(root, name)
|
|
207
207
|
try:
|
|
208
208
|
st = os.stat(fp)
|
|
@@ -250,16 +250,21 @@ def save_manifest(repo_dir: str, manifest: Dict[str, Any], base_dir: Optional[st
|
|
|
250
250
|
_atomic_json_write(_manifest_path(repo_dir, base_dir), _scrub_payload(payload))
|
|
251
251
|
|
|
252
252
|
|
|
253
|
-
def should_rebuild(
|
|
253
|
+
def should_rebuild(
|
|
254
|
+
repo_dir: str,
|
|
255
|
+
analysis_version: str = "2.2",
|
|
256
|
+
base_dir: Optional[str] = None,
|
|
257
|
+
current_fingerprints: Optional[Dict[str, Any]] = None,
|
|
258
|
+
) -> bool:
|
|
254
259
|
manifest = load_manifest(repo_dir, base_dir=base_dir)
|
|
255
260
|
if not manifest:
|
|
256
261
|
return True
|
|
257
262
|
if str(manifest.get("analysis_version", "") or "") != str(analysis_version or ""):
|
|
258
263
|
return True
|
|
259
264
|
previous = manifest.get("fingerprints", {}) if isinstance(manifest.get("fingerprints"), dict) else {}
|
|
260
|
-
current = collect_fingerprints(repo_dir)
|
|
261
|
-
delta = diff_fingerprints(previous, current)
|
|
262
|
-
return bool(delta.get("changed_count", 0))
|
|
265
|
+
current = current_fingerprints if isinstance(current_fingerprints, dict) else collect_fingerprints(repo_dir)
|
|
266
|
+
delta = diff_fingerprints(previous, current)
|
|
267
|
+
return bool(delta.get("changed_count", 0))
|
|
263
268
|
|
|
264
269
|
|
|
265
270
|
def _default_metadata(repo_hash: str) -> Dict[str, Any]:
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Shared repository walking rules for source analysis."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
SKIP_DIR_NAMES = {
|
|
9
|
+
".git",
|
|
10
|
+
"__pycache__",
|
|
11
|
+
".codemap_cache",
|
|
12
|
+
".venv",
|
|
13
|
+
"venv",
|
|
14
|
+
"env",
|
|
15
|
+
"ENV",
|
|
16
|
+
".env",
|
|
17
|
+
"node_modules",
|
|
18
|
+
"site-packages",
|
|
19
|
+
"dist-packages",
|
|
20
|
+
".tox",
|
|
21
|
+
".nox",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def filter_skipped_dirs(dir_names: Iterable[str]) -> list[str]:
|
|
26
|
+
"""Return directory names that should still be traversed."""
|
|
27
|
+
return [name for name in dir_names if name not in SKIP_DIR_NAMES]
|