codemap-python 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codemap_python-0.1.3 → codemap_python-0.1.5}/PKG-INFO +24 -12
- {codemap_python-0.1.3 → codemap_python-0.1.5}/README.md +22 -9
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/call_extractor.py +15 -16
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/ast_parser.py +7 -13
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/import_extractor.py +46 -46
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/explain_runner.py +50 -47
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/summary_generator.py +8 -5
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/runners/phase4_runner.py +41 -44
- codemap_python-0.1.5/analysis/utils/bom_handler.py +119 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/utils/cache_manager.py +26 -21
- codemap_python-0.1.5/analysis/utils/progress_spinner.py +85 -0
- codemap_python-0.1.5/analysis/utils/repo_walk.py +27 -0
- codemap_python-0.1.3/codemap_cli.py → codemap_python-0.1.5/cli.py +11 -11
- codemap_python-0.1.3/cli.py → codemap_python-0.1.5/codemap_app.py +203 -132
- codemap_python-0.1.5/codemap_cli.py +11 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/PKG-INFO +24 -12
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/SOURCES.txt +8 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/top_level.txt +1 -1
- {codemap_python-0.1.3 → codemap_python-0.1.5}/pyproject.toml +3 -4
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_cache_cli_commands.py +35 -30
- codemap_python-0.1.5/tests/test_cli_invalid_escape_warnings.py +35 -0
- codemap_python-0.1.5/tests/test_codemap_cli_entrypoint.py +12 -0
- codemap_python-0.1.5/tests/test_explain_runner_collection.py +37 -0
- codemap_python-0.1.5/tests/test_repo_walk_filters.py +57 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_security_cli_integration.py +6 -6
- codemap_python-0.1.5/tests/test_summary_generator.py +12 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/app.py +10 -6
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/static/app.js +69 -10
- codemap_python-0.1.3/analysis/utils/bom_handler.py +0 -55
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/architecture/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/architecture/architecture_engine.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/architecture/dependency_cycles.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/architecture/risk_radar.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/call_graph_builder.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/call_resolver.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/context_models.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/cross_file_resolver.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/execution_tracker.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/flow_builder.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/call_graph/models.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/ast_context.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/class_extractor.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/core/function_extractor.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/docstring_extractor.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/repo_summary_generator.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/return_analyzer.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/risk_flags.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/explain/signature_extractor.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/graph/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/graph/callgraph_index.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/graph/entrypoint_detector.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/graph/impact_analyzer.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/indexing/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/indexing/import_resolver.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/indexing/symbol_index.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/runners/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/utils/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/utils/ast_helpers.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/utils/path_resolver.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/analysis/utils/repo_fetcher.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/dependency_links.txt +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/entry_points.txt +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/codemap_python.egg-info/requires.txt +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/security_utils.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/setup.cfg +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_cache_retention.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_no_key_persistence.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_registry_session_mode.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_security_redaction.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_symbol_explain_cache.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_symbol_info_endpoint.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_ui_private_mode_security.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/tests/test_ui_retention_controls.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/device_id.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/static/styles.css +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/templates/index.html +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/utils/__init__.py +0 -0
- {codemap_python-0.1.3 → codemap_python-0.1.5}/ui/utils/registry_manager.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codemap-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: Local Python code analysis tool - understand architecture, dependencies, and call graphs
|
|
5
5
|
Author-email: ADITYA <aditykushwaha69@gmail.com>
|
|
6
|
-
License: MIT
|
|
6
|
+
License-Expression: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/ADITYA-kus/codemap_ai
|
|
8
8
|
Project-URL: Repository, https://github.com/ADITYA-kus/codemap_ai.git
|
|
9
9
|
Project-URL: Issues, https://github.com/ADITYA-kus/codemap_ai/issues
|
|
@@ -11,7 +11,6 @@ Project-URL: Documentation, https://github.com/ADITYA-kus/codemap_ai#readme
|
|
|
11
11
|
Keywords: code-analysis,python,architecture,call-graph,cli,dashboard,local,privacy
|
|
12
12
|
Classifier: Development Status :: 4 - Beta
|
|
13
13
|
Classifier: Intended Audience :: Developers
|
|
14
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
15
14
|
Classifier: Programming Language :: Python :: 3
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
@@ -159,22 +158,35 @@ codemap open --port 8000
|
|
|
159
158
|
|
|
160
159
|
### Cache Management
|
|
161
160
|
```bash
|
|
162
|
-
# List all analyzed repositories
|
|
161
|
+
# 📋 List all analyzed repositories and their cache info
|
|
163
162
|
codemap cache list
|
|
164
163
|
|
|
165
|
-
# Show cache
|
|
166
|
-
codemap cache info <
|
|
164
|
+
# 📊 Show detailed cache information for a specific repository
|
|
165
|
+
codemap cache info --path <repo_directory>
|
|
167
166
|
|
|
168
|
-
#
|
|
169
|
-
codemap cache
|
|
167
|
+
# ⏱️ Set cache retention policy (automatically clean old caches)
|
|
168
|
+
codemap cache retention --path <repo_directory> --days 30 --yes
|
|
170
169
|
|
|
171
|
-
#
|
|
172
|
-
codemap cache
|
|
170
|
+
# 🧹 Preview what would be cleaned (safe, no deletion)
|
|
171
|
+
codemap cache sweep --dry-run
|
|
173
172
|
|
|
174
|
-
#
|
|
175
|
-
codemap cache sweep
|
|
173
|
+
# 🧹 Actually clean up expired caches (requires --yes confirmation)
|
|
174
|
+
codemap cache sweep --yes
|
|
175
|
+
|
|
176
|
+
# 🗑️ Clear cache for a specific repository (preview first)
|
|
177
|
+
codemap cache clear --path <repo_directory> --dry-run
|
|
178
|
+
|
|
179
|
+
# 🗑️ Actually delete a repository's cache (requires --yes confirmation)
|
|
180
|
+
codemap cache clear --path <repo_directory> --yes
|
|
176
181
|
```
|
|
177
182
|
|
|
183
|
+
**Cache Management Tips:**
|
|
184
|
+
- ✅ Always use `--dry-run` first to preview changes
|
|
185
|
+
- ✅ Add `--yes` flag to skip confirmation (useful in scripts)
|
|
186
|
+
- ✅ Default retention is 14 days; adjust with `--days <number>`
|
|
187
|
+
- ✅ Cache is stored in: `~/.codemap_cache/` (varies by OS)
|
|
188
|
+
- ✅ Use `cache list` to see all cached repositories and their sizes
|
|
189
|
+
|
|
178
190
|
**Get GitHub Token (for private repos):**
|
|
179
191
|
1. Go to https://github.com/settings/tokens
|
|
180
192
|
2. Click "Generate new token" → "Generate new token (classic)"
|
|
@@ -131,22 +131,35 @@ codemap open --port 8000
|
|
|
131
131
|
|
|
132
132
|
### Cache Management
|
|
133
133
|
```bash
|
|
134
|
-
# List all analyzed repositories
|
|
134
|
+
# 📋 List all analyzed repositories and their cache info
|
|
135
135
|
codemap cache list
|
|
136
136
|
|
|
137
|
-
# Show cache
|
|
138
|
-
codemap cache info <
|
|
137
|
+
# 📊 Show detailed cache information for a specific repository
|
|
138
|
+
codemap cache info --path <repo_directory>
|
|
139
139
|
|
|
140
|
-
#
|
|
141
|
-
codemap cache
|
|
140
|
+
# ⏱️ Set cache retention policy (automatically clean old caches)
|
|
141
|
+
codemap cache retention --path <repo_directory> --days 30 --yes
|
|
142
142
|
|
|
143
|
-
#
|
|
144
|
-
codemap cache
|
|
143
|
+
# 🧹 Preview what would be cleaned (safe, no deletion)
|
|
144
|
+
codemap cache sweep --dry-run
|
|
145
145
|
|
|
146
|
-
#
|
|
147
|
-
codemap cache sweep
|
|
146
|
+
# 🧹 Actually clean up expired caches (requires --yes confirmation)
|
|
147
|
+
codemap cache sweep --yes
|
|
148
|
+
|
|
149
|
+
# 🗑️ Clear cache for a specific repository (preview first)
|
|
150
|
+
codemap cache clear --path <repo_directory> --dry-run
|
|
151
|
+
|
|
152
|
+
# 🗑️ Actually delete a repository's cache (requires --yes confirmation)
|
|
153
|
+
codemap cache clear --path <repo_directory> --yes
|
|
148
154
|
```
|
|
149
155
|
|
|
156
|
+
**Cache Management Tips:**
|
|
157
|
+
- ✅ Always use `--dry-run` first to preview changes
|
|
158
|
+
- ✅ Add `--yes` flag to skip confirmation (useful in scripts)
|
|
159
|
+
- ✅ Default retention is 14 days; adjust with `--days <number>`
|
|
160
|
+
- ✅ Cache is stored in: `~/.codemap_cache/` (varies by OS)
|
|
161
|
+
- ✅ Use `cache list` to see all cached repositories and their sizes
|
|
162
|
+
|
|
150
163
|
**Get GitHub Token (for private repos):**
|
|
151
164
|
1. Go to https://github.com/settings/tokens
|
|
152
165
|
2. Click "Generate new token" → "Generate new token (classic)"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
# AST Call detection
|
|
2
|
-
|
|
3
|
-
import ast
|
|
4
|
-
from analysis.utils.bom_handler import
|
|
1
|
+
# AST Call detection
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
|
|
5
5
|
|
|
6
6
|
class FunctionCallVisitor(ast.NodeVisitor):
|
|
7
7
|
def __init__(self, file_path):
|
|
@@ -82,15 +82,14 @@ class FunctionCallVisitor(ast.NodeVisitor):
|
|
|
82
82
|
return None
|
|
83
83
|
|
|
84
84
|
|
|
85
|
-
def extract_function_calls(file_path):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
visitor
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return visitor.calls
|
|
85
|
+
def extract_function_calls(file_path):
|
|
86
|
+
source = read_source_file(file_path)
|
|
87
|
+
tree = parse_source_to_ast(source, file_path=file_path)
|
|
88
|
+
return extract_function_calls_from_tree(tree, file_path)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def extract_function_calls_from_tree(tree, file_path):
|
|
92
|
+
visitor = FunctionCallVisitor(file_path)
|
|
93
|
+
visitor.visit(tree)
|
|
94
|
+
|
|
95
|
+
return visitor.calls
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
# AST Parser Module
|
|
2
|
-
import
|
|
3
|
-
from analysis.utils.bom_handler import remove_bom
|
|
1
|
+
# AST Parser Module
|
|
2
|
+
from analysis.utils.bom_handler import read_and_parse_python_file
|
|
4
3
|
|
|
5
4
|
|
|
6
|
-
def parse_python_file(file_path):
|
|
7
|
-
"""Parse a Python file
|
|
5
|
+
def parse_python_file(file_path):
|
|
6
|
+
"""Parse a Python file with automatic encoding and BOM handling.
|
|
8
7
|
|
|
9
8
|
This function:
|
|
10
|
-
1. Reads the file with UTF-8
|
|
9
|
+
1. Reads the file with automatic encoding detection (UTF-8 → Latin-1)
|
|
11
10
|
2. Removes any BOM characters automatically
|
|
12
11
|
3. Parses the cleaned source code
|
|
13
12
|
|
|
@@ -20,12 +19,7 @@ def parse_python_file(file_path):
|
|
|
20
19
|
Raises:
|
|
21
20
|
SyntaxError: If source code has syntax errors
|
|
22
21
|
FileNotFoundError: If file doesn't exist
|
|
22
|
+
ValueError: If file encoding cannot be determined
|
|
23
23
|
"""
|
|
24
|
-
|
|
25
|
-
source = f.read()
|
|
26
|
-
|
|
27
|
-
# Remove BOM if present (handles files from Windows editors, etc.)
|
|
28
|
-
source = remove_bom(source)
|
|
29
|
-
|
|
30
|
-
return ast.parse(source)
|
|
24
|
+
return read_and_parse_python_file(file_path)
|
|
31
25
|
|
|
@@ -1,49 +1,49 @@
|
|
|
1
1
|
# Import Extractor Module
|
|
2
2
|
# analysis/import_extractor.py
|
|
3
3
|
|
|
4
|
-
import ast
|
|
5
|
-
from analysis.utils.bom_handler import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def
|
|
9
|
-
"""Extract imports from
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
return
|
|
4
|
+
import ast
|
|
5
|
+
from analysis.utils.bom_handler import read_source_file, parse_source_to_ast
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def extract_imports_from_tree(tree, file_path):
|
|
9
|
+
"""Extract imports from an already-parsed AST tree."""
|
|
10
|
+
imports = []
|
|
11
|
+
|
|
12
|
+
for node in ast.walk(tree):
|
|
13
|
+
|
|
14
|
+
# import module
|
|
15
|
+
if isinstance(node, ast.Import):
|
|
16
|
+
for alias in node.names:
|
|
17
|
+
imports.append({
|
|
18
|
+
"type": "import",
|
|
19
|
+
"module": alias.name,
|
|
20
|
+
"name": None,
|
|
21
|
+
"alias": alias.asname,
|
|
22
|
+
"line": node.lineno,
|
|
23
|
+
"file": file_path
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
# from module import name
|
|
27
|
+
elif isinstance(node, ast.ImportFrom):
|
|
28
|
+
module = node.module
|
|
29
|
+
level = node.level # 0 = absolute, >0 = relative
|
|
30
|
+
|
|
31
|
+
for alias in node.names:
|
|
32
|
+
imports.append({
|
|
33
|
+
"type": "from_import",
|
|
34
|
+
"module": module,
|
|
35
|
+
"name": alias.name,
|
|
36
|
+
"alias": alias.asname,
|
|
37
|
+
"level": level,
|
|
38
|
+
"line": node.lineno,
|
|
39
|
+
"file": file_path
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
return imports
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def extract_imports(file_path):
|
|
46
|
+
"""Extract imports from a Python file with automatic encoding and BOM handling."""
|
|
47
|
+
source = read_source_file(file_path)
|
|
48
|
+
tree = parse_source_to_ast(source, file_path=file_path)
|
|
49
|
+
return extract_imports_from_tree(tree, file_path)
|
|
@@ -5,35 +5,32 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
from typing import Optional, Dict, Any
|
|
7
7
|
|
|
8
|
-
import
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
from analysis.
|
|
12
|
-
|
|
13
|
-
from analysis.
|
|
14
|
-
from analysis.
|
|
15
|
-
from analysis.explain.
|
|
16
|
-
from analysis.explain.
|
|
17
|
-
from analysis.
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
for file in files:
|
|
25
|
-
if file.endswith(".py") and not file.startswith("__"):
|
|
26
|
-
py_files.append(os.path.join(root, file))
|
|
27
|
-
return py_files
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def parse_ast(file_path: str)
|
|
31
|
-
"""Parse a Python file
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
# Remove BOM if present
|
|
35
|
-
source = remove_bom(source)
|
|
36
|
-
return ast.parse(source)
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
from analysis.indexing.symbol_index import SymbolIndex, SymbolInfo
|
|
12
|
+
from analysis.graph.callgraph_index import CallGraphIndex, CallSite
|
|
13
|
+
from analysis.explain.docstring_extractor import extract_docstrings
|
|
14
|
+
from analysis.explain.signature_extractor import extract_signatures
|
|
15
|
+
from analysis.explain.return_analyzer import analyze_returns
|
|
16
|
+
from analysis.explain.summary_generator import generate_symbol_summary
|
|
17
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def collect_python_files(root_dir: str):
|
|
21
|
+
py_files = []
|
|
22
|
+
for root, dirs, files in os.walk(root_dir):
|
|
23
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
24
|
+
for file in files:
|
|
25
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
26
|
+
py_files.append(os.path.join(root, file))
|
|
27
|
+
return py_files
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def parse_ast(file_path: str):
|
|
31
|
+
"""Parse a Python file with automatic encoding and BOM handling."""
|
|
32
|
+
from analysis.utils.bom_handler import read_and_parse_python_file
|
|
33
|
+
return read_and_parse_python_file(file_path)
|
|
37
34
|
|
|
38
35
|
|
|
39
36
|
def file_to_module(file_path: str, repo_root: str) -> str:
|
|
@@ -83,7 +80,11 @@ def merge_maps(dst: dict, src: dict):
|
|
|
83
80
|
dst[k].update(src.get(k, {}))
|
|
84
81
|
|
|
85
82
|
|
|
86
|
-
def run(
|
|
83
|
+
def run(
|
|
84
|
+
repo_dir: Optional[str] = None,
|
|
85
|
+
output_dir: Optional[str] = None,
|
|
86
|
+
symbol_snapshot: Optional[list] = None,
|
|
87
|
+
) -> Dict[str, Any]:
|
|
87
88
|
"""
|
|
88
89
|
Callable explain pipeline (Phase-5/6), suitable for CLI/VS Code.
|
|
89
90
|
|
|
@@ -119,23 +120,25 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None) -> Dic
|
|
|
119
120
|
# 2) Collect repo python files
|
|
120
121
|
python_files = collect_python_files(repo_dir)
|
|
121
122
|
|
|
122
|
-
# 3) Build symbol index + extractors across repo
|
|
123
|
-
symbol_index = SymbolIndex()
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
123
|
+
# 3) Build symbol index + extractors across repo
|
|
124
|
+
symbol_index = SymbolIndex()
|
|
125
|
+
loaded_snapshot = False
|
|
126
|
+
if isinstance(symbol_snapshot, list) and symbol_snapshot:
|
|
127
|
+
symbol_index.load_snapshot(symbol_snapshot)
|
|
128
|
+
loaded_snapshot = True
|
|
129
|
+
|
|
130
|
+
repo_docstrings = {"module": None, "classes": {}, "functions": {}, "methods": {}}
|
|
131
|
+
repo_signatures = {"functions": {}, "methods": {}}
|
|
132
|
+
repo_returns = {"functions": {}, "methods": {}}
|
|
133
|
+
|
|
134
|
+
for file_path in python_files:
|
|
135
|
+
tree = parse_ast(file_path)
|
|
136
|
+
if not loaded_snapshot:
|
|
137
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
138
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
139
|
+
|
|
140
|
+
# extract per-file and merge
|
|
141
|
+
merge_maps(repo_docstrings, extract_docstrings(tree))
|
|
139
142
|
|
|
140
143
|
sigs = extract_signatures(tree)
|
|
141
144
|
repo_signatures["functions"].update(sigs.get("functions", {}))
|
|
@@ -11,11 +11,14 @@ from analysis.indexing.symbol_index import SymbolInfo
|
|
|
11
11
|
from analysis.graph.callgraph_index import CallGraphIndex
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def _first_line(text: Optional[str]) -> Optional[str]:
|
|
15
|
-
if not text:
|
|
16
|
-
return None
|
|
17
|
-
|
|
18
|
-
|
|
14
|
+
def _first_line(text: Optional[str]) -> Optional[str]:
|
|
15
|
+
if not text:
|
|
16
|
+
return None
|
|
17
|
+
stripped = text.strip()
|
|
18
|
+
if not stripped:
|
|
19
|
+
return None
|
|
20
|
+
line = stripped.splitlines()[0].strip()
|
|
21
|
+
return line or None
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
def _humanize_name(name: str) -> str:
|
|
@@ -3,39 +3,34 @@ from __future__ import annotations
|
|
|
3
3
|
|
|
4
4
|
from typing import Optional, Dict, Any, List
|
|
5
5
|
|
|
6
|
-
import os
|
|
7
|
-
import
|
|
8
|
-
import
|
|
9
|
-
from analysis.indexing.
|
|
10
|
-
from analysis.
|
|
11
|
-
from analysis.call_graph.
|
|
12
|
-
from analysis.
|
|
13
|
-
from analysis.
|
|
14
|
-
from analysis.
|
|
15
|
-
from analysis.utils.bom_handler import remove_bom
|
|
6
|
+
import os
|
|
7
|
+
import json
|
|
8
|
+
from analysis.indexing.symbol_index import SymbolIndex
|
|
9
|
+
from analysis.indexing.import_resolver import ImportResolver
|
|
10
|
+
from analysis.call_graph.cross_file_resolver import CrossFileResolver
|
|
11
|
+
from analysis.call_graph.call_extractor import extract_function_calls_from_tree
|
|
12
|
+
from analysis.core.import_extractor import extract_imports_from_tree
|
|
13
|
+
from analysis.graph.callgraph_index import build_caller_fqn
|
|
14
|
+
from analysis.utils.repo_walk import filter_skipped_dirs
|
|
16
15
|
|
|
17
16
|
|
|
18
17
|
PROJECT_ROOT = os.path.dirname(os.path.dirname(__file__))
|
|
19
18
|
|
|
20
19
|
|
|
21
|
-
def collect_python_files(root_dir: str) -> List[str]:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
py_files.append(os.path.join(root, file))
|
|
20
|
+
def collect_python_files(root_dir: str) -> List[str]:
|
|
21
|
+
py_files: List[str] = []
|
|
22
|
+
for root, dirs, files in os.walk(root_dir):
|
|
23
|
+
dirs[:] = filter_skipped_dirs(dirs)
|
|
24
|
+
for file in files:
|
|
25
|
+
if file.endswith(".py") and not file.startswith("__"):
|
|
26
|
+
py_files.append(os.path.join(root, file))
|
|
29
27
|
return py_files
|
|
30
28
|
|
|
31
29
|
|
|
32
|
-
def parse_ast(file_path: str):
|
|
33
|
-
"""Parse a Python file, automatically handling UTF-8 BOM."""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
# Remove BOM if present
|
|
37
|
-
source = remove_bom(source)
|
|
38
|
-
return ast.parse(source)
|
|
30
|
+
def parse_ast(file_path: str):
|
|
31
|
+
"""Parse a Python file, automatically handling encoding and UTF-8 BOM."""
|
|
32
|
+
from analysis.utils.bom_handler import read_and_parse_python_file
|
|
33
|
+
return read_and_parse_python_file(file_path)
|
|
39
34
|
|
|
40
35
|
|
|
41
36
|
def file_to_module(file_path: str, repo_root: str) -> str:
|
|
@@ -80,25 +75,27 @@ def run(repo_dir: Optional[str] = None, output_dir: Optional[str] = None, force_
|
|
|
80
75
|
|
|
81
76
|
os.makedirs(output_dir, exist_ok=True)
|
|
82
77
|
|
|
83
|
-
python_files = collect_python_files(repo_dir)
|
|
84
|
-
symbol_index = SymbolIndex()
|
|
85
|
-
file_module_map: Dict[str, str] = {}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
78
|
+
python_files = collect_python_files(repo_dir)
|
|
79
|
+
symbol_index = SymbolIndex()
|
|
80
|
+
file_module_map: Dict[str, str] = {}
|
|
81
|
+
parsed_trees: Dict[str, Any] = {}
|
|
82
|
+
|
|
83
|
+
for file_path in python_files:
|
|
84
|
+
module_path = file_to_module(file_path, repo_dir)
|
|
85
|
+
file_module_map[file_path] = module_path
|
|
86
|
+
tree = parse_ast(file_path)
|
|
87
|
+
parsed_trees[file_path] = tree
|
|
88
|
+
symbol_index.index_file(tree, module_path, file_path)
|
|
89
|
+
|
|
90
|
+
import_resolver = ImportResolver(symbol_index)
|
|
91
|
+
for file_path in python_files:
|
|
92
|
+
module_path = file_module_map[file_path]
|
|
93
|
+
imports = extract_imports_from_tree(parsed_trees[file_path], file_path)
|
|
94
|
+
import_resolver.index_module_imports(module_path, imports)
|
|
95
|
+
|
|
96
|
+
all_calls = []
|
|
97
|
+
for file_path in python_files:
|
|
98
|
+
all_calls.extend(extract_function_calls_from_tree(parsed_trees[file_path], file_path))
|
|
102
99
|
|
|
103
100
|
cross_resolver = CrossFileResolver(symbol_index, import_resolver)
|
|
104
101
|
resolved_calls = []
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""BOM (Byte Order Mark), encoding, and AST parsing utilities for CodeMap.
|
|
2
|
+
|
|
3
|
+
This module provides utilities to handle:
|
|
4
|
+
1. UTF-8 BOM (Byte Order Mark) characters added by certain editors
|
|
5
|
+
2. Non-UTF-8 encoded files (e.g., Latin-1, Windows-1252)
|
|
6
|
+
|
|
7
|
+
Issues handled:
|
|
8
|
+
- BOM (U+FEFF): invisible character causing "invalid non-printable character U+FEFF"
|
|
9
|
+
- Non-UTF-8: files with different encodings causing UnicodeDecodeError
|
|
10
|
+
|
|
11
|
+
Solution: Detect encoding with fallback chain, strip BOM, and parse quietly.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import ast
|
|
15
|
+
import warnings
|
|
16
|
+
from typing import Tuple
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def remove_bom(source: str) -> str:
|
|
20
|
+
"""Remove UTF-8 BOM (Byte Order Mark) from source code if present.
|
|
21
|
+
|
|
22
|
+
BOM is a special character (U+FEFF) that some editors (especially Notepad
|
|
23
|
+
on Windows) add to the start of files. Python's AST parser doesn't handle it.
|
|
24
|
+
|
|
25
|
+
This function silently removes it if present, or returns the source unchanged.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
source: Python source code as string
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Source code with BOM removed if present
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> source_with_bom = '\\ufeffdef hello(): pass'
|
|
35
|
+
>>> clean_source = remove_bom(source_with_bom)
|
|
36
|
+
>>> print(clean_source)
|
|
37
|
+
def hello(): pass
|
|
38
|
+
"""
|
|
39
|
+
if source.startswith('\ufeff'):
|
|
40
|
+
return source[1:]
|
|
41
|
+
return source
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def detect_encoding(file_path: str) -> Tuple[str, bool]:
|
|
45
|
+
"""Detect file encoding by trying multiple decodings.
|
|
46
|
+
|
|
47
|
+
Tries encodings in this order:
|
|
48
|
+
1. UTF-8 (most common for Python files)
|
|
49
|
+
2. System default encoding
|
|
50
|
+
3. Latin-1 / ISO-8859-1 (accepts any byte sequence)
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
file_path: Path to file to detect encoding for
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Tuple of (encoding_name: str, is_fallback: bool)
|
|
57
|
+
is_fallback=True means file uses non-standard encoding
|
|
58
|
+
|
|
59
|
+
Raises:
|
|
60
|
+
FileNotFoundError: If file doesn't exist
|
|
61
|
+
"""
|
|
62
|
+
import sys
|
|
63
|
+
|
|
64
|
+
encodings_to_try = [
|
|
65
|
+
('utf-8', False),
|
|
66
|
+
(sys.getdefaultencoding(), False),
|
|
67
|
+
('latin-1', True), # Latin-1 accepts any byte sequence
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
for encoding, is_fallback in encodings_to_try:
|
|
71
|
+
try:
|
|
72
|
+
with open(file_path, 'rb') as f:
|
|
73
|
+
f.read().decode(encoding)
|
|
74
|
+
return (encoding, is_fallback)
|
|
75
|
+
except (UnicodeDecodeError, LookupError):
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# Should never reach here since Latin-1 accepts all bytes
|
|
79
|
+
return ('latin-1', True)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def read_source_file(file_path: str) -> str:
|
|
83
|
+
"""Read a Python file with automatic encoding detection and BOM removal.
|
|
84
|
+
|
|
85
|
+
Handles files with different encodings gracefully by trying multiple
|
|
86
|
+
decodings in order of likelihood, then falling back to Latin-1.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
file_path: Path to Python file to read
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Source code with BOM removed
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
FileNotFoundError: If file doesn't exist
|
|
96
|
+
"""
|
|
97
|
+
encoding, _is_fallback = detect_encoding(file_path)
|
|
98
|
+
with open(file_path, 'r', encoding=encoding, errors='replace') as f:
|
|
99
|
+
source = f.read()
|
|
100
|
+
return remove_bom(source)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def parse_source_to_ast(source: str, file_path: str = "<unknown>") -> ast.AST:
|
|
104
|
+
"""Parse source code while suppressing noisy invalid-escape warnings.
|
|
105
|
+
|
|
106
|
+
Some user repositories contain regular string literals like ``"\\S"`` or
|
|
107
|
+
``"\\["``. Python can emit ``SyntaxWarning: invalid escape sequence`` while
|
|
108
|
+
parsing those files even though analysis can continue normally. For CodeMap,
|
|
109
|
+
these warnings are implementation noise, so we suppress them here.
|
|
110
|
+
"""
|
|
111
|
+
with warnings.catch_warnings():
|
|
112
|
+
warnings.filterwarnings("ignore", category=SyntaxWarning)
|
|
113
|
+
return ast.parse(source, filename=file_path)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def read_and_parse_python_file(file_path: str) -> ast.AST:
|
|
117
|
+
"""Read a Python file with encoding/BOM handling and return its AST."""
|
|
118
|
+
source = read_source_file(file_path)
|
|
119
|
+
return parse_source_to_ast(source, file_path=file_path)
|