codeanalyzer-python 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeanalyzer/__main__.py +40 -14
- codeanalyzer/config/config.py +2 -2
- codeanalyzer/core.py +317 -32
- codeanalyzer/schema/__init__.py +10 -0
- codeanalyzer/schema/py_schema.py +12 -3
- codeanalyzer/syntactic_analysis/__init__.py +16 -0
- codeanalyzer/syntactic_analysis/exceptions.py +15 -0
- codeanalyzer/syntactic_analysis/symbol_table_builder.py +104 -167
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/METADATA +38 -28
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/RECORD +14 -13
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/WHEEL +0 -0
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/entry_points.txt +0 -0
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/licenses/LICENSE +0 -0
- {codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/licenses/NOTICE +0 -0
codeanalyzer/__main__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, Annotated
|
|
3
3
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
@@ -7,7 +7,6 @@ from codeanalyzer.core import Codeanalyzer
|
|
|
7
7
|
from codeanalyzer.utils import _set_log_level, logger
|
|
8
8
|
from codeanalyzer.config import OutputFormat
|
|
9
9
|
|
|
10
|
-
|
|
11
10
|
def main(
|
|
12
11
|
input: Annotated[
|
|
13
12
|
Path, typer.Option("-i", "--input", help="Path to the project root directory.")
|
|
@@ -32,6 +31,12 @@ def main(
|
|
|
32
31
|
using_codeql: Annotated[
|
|
33
32
|
bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
|
|
34
33
|
] = False,
|
|
34
|
+
using_ray: Annotated[
|
|
35
|
+
bool,
|
|
36
|
+
typer.Option(
|
|
37
|
+
"--ray/--no-ray", help="Enable Ray for distributed analysis."
|
|
38
|
+
),
|
|
39
|
+
] = False,
|
|
35
40
|
rebuild_analysis: Annotated[
|
|
36
41
|
bool,
|
|
37
42
|
typer.Option(
|
|
@@ -39,18 +44,32 @@ def main(
|
|
|
39
44
|
help="Enable eager or lazy analysis. Defaults to lazy.",
|
|
40
45
|
),
|
|
41
46
|
] = False,
|
|
47
|
+
skip_tests: Annotated[
|
|
48
|
+
bool,
|
|
49
|
+
typer.Option(
|
|
50
|
+
"--skip-tests/--include-tests",
|
|
51
|
+
help="Skip test files in analysis.",
|
|
52
|
+
),
|
|
53
|
+
] = True,
|
|
54
|
+
file_name: Annotated[
|
|
55
|
+
Optional[Path],
|
|
56
|
+
typer.Option(
|
|
57
|
+
"--file-name",
|
|
58
|
+
help="Analyze only the specified file (relative to input directory).",
|
|
59
|
+
),
|
|
60
|
+
] = None,
|
|
42
61
|
cache_dir: Annotated[
|
|
43
62
|
Optional[Path],
|
|
44
63
|
typer.Option(
|
|
45
64
|
"-c",
|
|
46
65
|
"--cache-dir",
|
|
47
|
-
help="Directory to store analysis cache.",
|
|
66
|
+
help="Directory to store analysis cache. Defaults to '.codeanalyzer' in the input directory.",
|
|
48
67
|
),
|
|
49
68
|
] = None,
|
|
50
69
|
clear_cache: Annotated[
|
|
51
70
|
bool,
|
|
52
|
-
typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis."),
|
|
53
|
-
] =
|
|
71
|
+
typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis. By default, cache is retained."),
|
|
72
|
+
] = False,
|
|
54
73
|
verbosity: Annotated[
|
|
55
74
|
int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
|
|
56
75
|
] = 0,
|
|
@@ -62,21 +81,28 @@ def main(
|
|
|
62
81
|
logger.error(f"Input path '{input}' does not exist.")
|
|
63
82
|
raise typer.Exit(code=1)
|
|
64
83
|
|
|
84
|
+
# Validate file_name if provided
|
|
85
|
+
if file_name is not None:
|
|
86
|
+
full_file_path = input / file_name
|
|
87
|
+
if not full_file_path.exists():
|
|
88
|
+
logger.error(f"Specified file '{file_name}' does not exist in '{input}'.")
|
|
89
|
+
raise typer.Exit(code=1)
|
|
90
|
+
if not full_file_path.is_file():
|
|
91
|
+
logger.error(f"Specified path '{file_name}' is not a file.")
|
|
92
|
+
raise typer.Exit(code=1)
|
|
93
|
+
if not str(file_name).endswith('.py'):
|
|
94
|
+
logger.error(f"Specified file '{file_name}' is not a Python file (.py).")
|
|
95
|
+
raise typer.Exit(code=1)
|
|
96
|
+
|
|
65
97
|
with Codeanalyzer(
|
|
66
|
-
input, analysis_level, using_codeql, rebuild_analysis, cache_dir, clear_cache
|
|
98
|
+
input, analysis_level, skip_tests, using_codeql, rebuild_analysis, cache_dir, clear_cache, using_ray, file_name
|
|
67
99
|
) as analyzer:
|
|
68
100
|
artifacts = analyzer.analyze()
|
|
69
101
|
|
|
70
102
|
# Handle output based on format
|
|
71
103
|
if output is None:
|
|
72
104
|
# Output to stdout (only for JSON)
|
|
73
|
-
|
|
74
|
-
print(artifacts.model_dump_json(separators=(",", ":")))
|
|
75
|
-
else:
|
|
76
|
-
logger.error(
|
|
77
|
-
f"Format '{format.value}' requires an output directory (use -o/--output)"
|
|
78
|
-
)
|
|
79
|
-
raise typer.Exit(code=1)
|
|
105
|
+
print(artifacts.json(separators=(",", ":")))
|
|
80
106
|
else:
|
|
81
107
|
# Output to file
|
|
82
108
|
output.mkdir(parents=True, exist_ok=True)
|
|
@@ -88,7 +114,7 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
|
|
|
88
114
|
if format == OutputFormat.JSON:
|
|
89
115
|
output_file = output_dir / "analysis.json"
|
|
90
116
|
# Use Pydantic's json() with separators for compact output
|
|
91
|
-
json_str = artifacts.
|
|
117
|
+
json_str = artifacts.json(indent=None)
|
|
92
118
|
with output_file.open("w") as f:
|
|
93
119
|
f.write(json_str)
|
|
94
120
|
logger.info(f"Analysis saved to {output_file}")
|
codeanalyzer/config/config.py
CHANGED
codeanalyzer/core.py
CHANGED
|
@@ -4,13 +4,39 @@ import shutil
|
|
|
4
4
|
import subprocess
|
|
5
5
|
import sys
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Dict, Optional, Union
|
|
7
|
+
from typing import Any, Dict, Optional, Union, List
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
import ray
|
|
10
|
+
from codeanalyzer.utils import logger
|
|
11
|
+
from codeanalyzer.schema import PyApplication, PyModule
|
|
10
12
|
from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
|
|
11
13
|
from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import CodeQLExceptions
|
|
14
|
+
from codeanalyzer.syntactic_analysis.exceptions import SymbolTableBuilderRayError
|
|
12
15
|
from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
|
|
13
|
-
from codeanalyzer.utils import
|
|
16
|
+
from codeanalyzer.utils import ProgressBar
|
|
17
|
+
|
|
18
|
+
@ray.remote
|
|
19
|
+
def _process_file_with_ray(py_file: Union[Path, str], project_dir: Union[Path, str], virtualenv: Union[Path, str, None]) -> Dict[str, PyModule]:
|
|
20
|
+
"""Processes files in the project directory using Ray for distributed processing.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
py_file (Union[Path, str]): Path to the Python file to process.
|
|
24
|
+
project_dir (Union[Path, str]): Path to the project directory.
|
|
25
|
+
virtualenv (Union[Path, str, None]): Path to the virtual environment directory.
|
|
26
|
+
Returns:
|
|
27
|
+
Dict[str, PyModule]: A dictionary mapping file paths to PyModule objects.
|
|
28
|
+
"""
|
|
29
|
+
from rich.console import Console
|
|
30
|
+
console = Console()
|
|
31
|
+
module_map: Dict[str, PyModule] = {}
|
|
32
|
+
try:
|
|
33
|
+
py_file = Path(py_file)
|
|
34
|
+
symbol_table_builder = SymbolTableBuilder(project_dir, virtualenv)
|
|
35
|
+
module_map[str(py_file)] = symbol_table_builder.build_pymodule_from_file(py_file)
|
|
36
|
+
except Exception as e:
|
|
37
|
+
console.log(f"❌ Failed to process {py_file}: {e}")
|
|
38
|
+
raise SymbolTableBuilderRayError(f"Ray processing error for {py_file}: {e}")
|
|
39
|
+
return module_map
|
|
14
40
|
|
|
15
41
|
|
|
16
42
|
class Codeanalyzer:
|
|
@@ -28,14 +54,18 @@ class Codeanalyzer:
|
|
|
28
54
|
def __init__(
|
|
29
55
|
self,
|
|
30
56
|
project_dir: Union[str, Path],
|
|
31
|
-
analysis_depth: int
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
57
|
+
analysis_depth: int,
|
|
58
|
+
skip_tests: bool,
|
|
59
|
+
using_codeql: bool,
|
|
60
|
+
rebuild_analysis: bool,
|
|
61
|
+
cache_dir: Optional[Path],
|
|
62
|
+
clear_cache: bool,
|
|
63
|
+
using_ray: bool,
|
|
64
|
+
file_name: Optional[Path] = None,
|
|
36
65
|
) -> None:
|
|
37
66
|
self.analysis_depth = analysis_depth
|
|
38
67
|
self.project_dir = Path(project_dir).resolve()
|
|
68
|
+
self.skip_tests = skip_tests
|
|
39
69
|
self.using_codeql = using_codeql
|
|
40
70
|
self.rebuild_analysis = rebuild_analysis
|
|
41
71
|
self.cache_dir = (
|
|
@@ -45,10 +75,12 @@ class Codeanalyzer:
|
|
|
45
75
|
self.db_path: Optional[Path] = None
|
|
46
76
|
self.codeql_bin: Optional[Path] = None
|
|
47
77
|
self.virtualenv: Optional[Path] = None
|
|
78
|
+
self.using_ray: bool = using_ray
|
|
79
|
+
self.file_name: Optional[Path] = file_name
|
|
48
80
|
|
|
49
81
|
@staticmethod
|
|
50
82
|
def _cmd_exec_helper(
|
|
51
|
-
cmd:
|
|
83
|
+
cmd: List[str],
|
|
52
84
|
cwd: Optional[Path] = None,
|
|
53
85
|
capture_output: bool = True,
|
|
54
86
|
check: bool = True,
|
|
@@ -126,7 +158,8 @@ class Codeanalyzer:
|
|
|
126
158
|
# We're inside a virtual environment; need to find the base interpreter
|
|
127
159
|
|
|
128
160
|
# First, check if user explicitly set SYSTEM_PYTHON
|
|
129
|
-
|
|
161
|
+
system_python = os.getenv("SYSTEM_PYTHON")
|
|
162
|
+
if system_python:
|
|
130
163
|
system_python_path = Path(system_python)
|
|
131
164
|
if system_python_path.exists() and system_python_path.is_file():
|
|
132
165
|
return system_python_path
|
|
@@ -142,14 +175,16 @@ class Codeanalyzer:
|
|
|
142
175
|
|
|
143
176
|
# Use shutil.which to find python3 and python in PATH
|
|
144
177
|
for python_name in ["python3", "python"]:
|
|
145
|
-
|
|
178
|
+
python_path = shutil.which(python_name)
|
|
179
|
+
if python_path:
|
|
146
180
|
candidate = Path(python_path)
|
|
147
181
|
# Skip if this is the current virtual environment's python
|
|
148
182
|
if not str(candidate).startswith(sys.prefix):
|
|
149
183
|
python_candidates.append(candidate)
|
|
150
184
|
|
|
151
185
|
# Check pyenv installation
|
|
152
|
-
|
|
186
|
+
pyenv_root = os.getenv("PYENV_ROOT")
|
|
187
|
+
if pyenv_root:
|
|
153
188
|
pyenv_python = Path(pyenv_root) / "shims" / "python"
|
|
154
189
|
if pyenv_python.exists():
|
|
155
190
|
python_candidates.append(pyenv_python)
|
|
@@ -160,15 +195,17 @@ class Codeanalyzer:
|
|
|
160
195
|
python_candidates.append(home_pyenv)
|
|
161
196
|
|
|
162
197
|
# Check conda base environment
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
conda_python = Path(conda_prefix) / "bin" / "python"
|
|
198
|
+
conda_base = os.getenv("CONDA_PREFIX")
|
|
199
|
+
if conda_base:
|
|
200
|
+
conda_python = Path(conda_base) / "bin" / "python"
|
|
167
201
|
if conda_python.exists():
|
|
168
202
|
python_candidates.append(conda_python)
|
|
169
203
|
|
|
170
204
|
# Check asdf
|
|
171
|
-
|
|
205
|
+
asdf_dir = os.getenv("ASDF_DIR")
|
|
206
|
+
# If ASDF_DIR is set, use its shims directory
|
|
207
|
+
# Otherwise, check if asdf is installed in the default location
|
|
208
|
+
if asdf_dir:
|
|
172
209
|
asdf_python = Path(asdf_dir) / "shims" / "python"
|
|
173
210
|
if asdf_python.exists():
|
|
174
211
|
python_candidates.append(asdf_python)
|
|
@@ -211,14 +248,61 @@ class Codeanalyzer:
|
|
|
211
248
|
# Find python in the virtual environment
|
|
212
249
|
venv_python = venv_path / "bin" / "python"
|
|
213
250
|
|
|
214
|
-
#
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
251
|
+
# First, install dependencies from various dependency files
|
|
252
|
+
dependency_files = [
|
|
253
|
+
("requirements.txt", ["-r"]),
|
|
254
|
+
("requirements-dev.txt", ["-r"]),
|
|
255
|
+
("dev-requirements.txt", ["-r"]),
|
|
256
|
+
("test-requirements.txt", ["-r"]),
|
|
257
|
+
]
|
|
258
|
+
|
|
259
|
+
for dep_file, pip_args in dependency_files:
|
|
260
|
+
if (self.project_dir / dep_file).exists():
|
|
261
|
+
logger.info(f"Installing dependencies from {dep_file}")
|
|
262
|
+
self._cmd_exec_helper(
|
|
263
|
+
[str(venv_python), "-m", "pip", "install", "-U"] + pip_args + [str(self.project_dir / dep_file)],
|
|
264
|
+
cwd=self.project_dir,
|
|
265
|
+
check=True,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Handle Pipenv files
|
|
269
|
+
if (self.project_dir / "Pipfile").exists():
|
|
270
|
+
logger.info("Installing dependencies from Pipfile")
|
|
271
|
+
# Note: This would require pipenv to be installed
|
|
272
|
+
self._cmd_exec_helper(
|
|
273
|
+
[str(venv_python), "-m", "pip", "install", "pipenv"],
|
|
274
|
+
cwd=self.project_dir,
|
|
275
|
+
check=True,
|
|
276
|
+
)
|
|
277
|
+
self._cmd_exec_helper(
|
|
278
|
+
["pipenv", "install", "--dev"],
|
|
279
|
+
cwd=self.project_dir,
|
|
280
|
+
check=True,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Handle conda environment files
|
|
284
|
+
conda_files = ["conda.yml", "environment.yml"]
|
|
285
|
+
for conda_file in conda_files:
|
|
286
|
+
if (self.project_dir / conda_file).exists():
|
|
287
|
+
logger.info(f"Found {conda_file} - note that conda environments should be handled outside this tool")
|
|
288
|
+
break
|
|
289
|
+
|
|
290
|
+
# Now install the project itself in editable mode (only if package definition exists)
|
|
291
|
+
package_definition_files = [
|
|
292
|
+
"pyproject.toml", # Modern Python packaging (PEP 518/621)
|
|
293
|
+
"setup.py", # Traditional setuptools
|
|
294
|
+
"setup.cfg", # Setup configuration
|
|
295
|
+
]
|
|
296
|
+
|
|
297
|
+
if any((self.project_dir / file).exists() for file in package_definition_files):
|
|
298
|
+
logger.info("Installing project in editable mode")
|
|
299
|
+
self._cmd_exec_helper(
|
|
300
|
+
[str(venv_python), "-m", "pip", "install", "-e", str(self.project_dir)],
|
|
301
|
+
cwd=self.project_dir,
|
|
302
|
+
check=True,
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
logger.warning("No package definition files found, skipping editable installation")
|
|
222
306
|
|
|
223
307
|
if self.using_codeql:
|
|
224
308
|
logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
|
|
@@ -280,14 +364,95 @@ class Codeanalyzer:
|
|
|
280
364
|
|
|
281
365
|
return self
|
|
282
366
|
|
|
283
|
-
def __exit__(self,
|
|
367
|
+
def __exit__(self, *args, **kwargs) -> None:
|
|
284
368
|
if self.clear_cache and self.cache_dir.exists():
|
|
285
369
|
logger.info(f"Clearing cache directory: {self.cache_dir}")
|
|
286
370
|
shutil.rmtree(self.cache_dir)
|
|
287
371
|
|
|
288
372
|
def analyze(self) -> PyApplication:
|
|
289
|
-
"""
|
|
290
|
-
|
|
373
|
+
"""Analyze the project and return a PyApplication with symbol table.
|
|
374
|
+
|
|
375
|
+
Uses caching to avoid re-analyzing unchanged files.
|
|
376
|
+
"""
|
|
377
|
+
cache_file = self.cache_dir / "analysis_cache.json"
|
|
378
|
+
|
|
379
|
+
# Try to load existing cached analysis
|
|
380
|
+
cached_pyapplication = None
|
|
381
|
+
if not self.rebuild_analysis and cache_file.exists():
|
|
382
|
+
try:
|
|
383
|
+
cached_pyapplication = self._load_pyapplication_from_cache(cache_file)
|
|
384
|
+
logger.info("Loaded cached analysis")
|
|
385
|
+
except Exception as e:
|
|
386
|
+
logger.warning(f"Failed to load cache: {e}. Rebuilding analysis.")
|
|
387
|
+
cached_pyapplication = None
|
|
388
|
+
|
|
389
|
+
# Build symbol table from cached application if available (if no available, the build a new one)
|
|
390
|
+
symbol_table = self._build_symbol_table(cached_pyapplication.symbol_table if cached_pyapplication else {})
|
|
391
|
+
|
|
392
|
+
# Recreate pyapplication
|
|
393
|
+
app = PyApplication.builder().symbol_table(symbol_table).build()
|
|
394
|
+
|
|
395
|
+
# Save to cache
|
|
396
|
+
self._save_analysis_cache(app, cache_file)
|
|
397
|
+
|
|
398
|
+
return app
|
|
399
|
+
|
|
400
|
+
def _load_pyapplication_from_cache(self, cache_file: Path) -> PyApplication:
|
|
401
|
+
"""Load cached analysis from file.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
cache_file: Path to the cache file
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
PyApplication: The cached application data
|
|
408
|
+
"""
|
|
409
|
+
with cache_file.open('r') as f:
|
|
410
|
+
data = f.read()
|
|
411
|
+
return PyApplication.parse_raw(data)
|
|
412
|
+
|
|
413
|
+
def _save_analysis_cache(self, app: PyApplication, cache_file: Path) -> None:
|
|
414
|
+
"""Save analysis to cache file.
|
|
415
|
+
|
|
416
|
+
Args:
|
|
417
|
+
app: The PyApplication to cache
|
|
418
|
+
cache_file: Path to save the cache file
|
|
419
|
+
"""
|
|
420
|
+
# Ensure cache directory exists
|
|
421
|
+
cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
422
|
+
|
|
423
|
+
with cache_file.open('w') as f:
|
|
424
|
+
f.write(app.json(indent=2))
|
|
425
|
+
|
|
426
|
+
logger.info(f"Analysis cached to {cache_file}")
|
|
427
|
+
|
|
428
|
+
def _file_unchanged(self, file_path: Path, cached_module: PyModule) -> bool:
|
|
429
|
+
"""Check if a file has changed since it was cached.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
file_path: Path to the file to check
|
|
433
|
+
cached_module: The cached PyModule for this file
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
bool: True if file is unchanged, False otherwise
|
|
437
|
+
"""
|
|
438
|
+
try:
|
|
439
|
+
# Check last modified time and file size
|
|
440
|
+
if (cached_module.last_modified is not None and
|
|
441
|
+
cached_module.file_size is not None and
|
|
442
|
+
cached_module.last_modified == file_path.stat().st_mtime and
|
|
443
|
+
cached_module.file_size == file_path.stat().st_size):
|
|
444
|
+
return True
|
|
445
|
+
# Also check content hash for extra safety
|
|
446
|
+
if cached_module.content_hash is not None:
|
|
447
|
+
content_hash = hashlib.sha256(file_path.read_bytes()).hexdigest()
|
|
448
|
+
return content_hash == cached_module.content_hash
|
|
449
|
+
|
|
450
|
+
# No cached metadata mismatch, assume file changed
|
|
451
|
+
return False
|
|
452
|
+
|
|
453
|
+
except Exception as e:
|
|
454
|
+
logger.debug(f"Error checking file {file_path}: {e}")
|
|
455
|
+
return False
|
|
291
456
|
|
|
292
457
|
def _compute_checksum(self, root: Path) -> str:
|
|
293
458
|
"""Compute SHA256 checksum of all Python source files in a project directory. If somethings changes, the
|
|
@@ -304,11 +469,131 @@ class Codeanalyzer:
|
|
|
304
469
|
sha256.update(py_file.read_bytes())
|
|
305
470
|
return sha256.hexdigest()
|
|
306
471
|
|
|
307
|
-
def _build_symbol_table(self) -> Dict[str, PyModule]:
|
|
308
|
-
"""
|
|
309
|
-
|
|
472
|
+
def _build_symbol_table(self, cached_symbol_table: Optional[Dict[str, PyModule]] = None) -> Dict[str, PyModule]:
|
|
473
|
+
"""Builds the symbol table for the project.
|
|
474
|
+
|
|
475
|
+
This method scans the project directory, identifies Python files,
|
|
476
|
+
and constructs a symbol table containing information about classes,
|
|
477
|
+
functions, and variables defined in those files.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
cached_app: Previously cached PyApplication to reuse unchanged files
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
Dict[str, PyModule]: A dictionary mapping file paths to PyModule objects.
|
|
484
|
+
"""
|
|
485
|
+
symbol_table: Dict[str, PyModule] = {}
|
|
486
|
+
|
|
487
|
+
# Handle single file analysis
|
|
488
|
+
if self.file_name is not None:
|
|
489
|
+
single_file = self.project_dir / self.file_name
|
|
490
|
+
logger.info(f"Analyzing single file: {single_file}")
|
|
491
|
+
|
|
492
|
+
# Check if file is in cache and unchanged
|
|
493
|
+
file_key = str(single_file)
|
|
494
|
+
if file_key in cached_symbol_table and not self.rebuild_analysis:
|
|
495
|
+
# Compute file checksum to see if it changed
|
|
496
|
+
if self._file_unchanged(single_file, cached_symbol_table[file_key]):
|
|
497
|
+
logger.info(f"Using cached analysis for {single_file}")
|
|
498
|
+
symbol_table[file_key] = cached_symbol_table[file_key]
|
|
499
|
+
return symbol_table
|
|
500
|
+
|
|
501
|
+
# File is new or changed, analyze it
|
|
502
|
+
try:
|
|
503
|
+
symbol_table_builder = SymbolTableBuilder(self.project_dir, self.virtualenv)
|
|
504
|
+
py_module = symbol_table_builder.build_pymodule_from_file(single_file)
|
|
505
|
+
symbol_table[file_key] = py_module
|
|
506
|
+
logger.info("✅ Single file analysis complete.")
|
|
507
|
+
return symbol_table
|
|
508
|
+
except Exception as e:
|
|
509
|
+
logger.error(f"Failed to process {single_file}: {e}")
|
|
510
|
+
return symbol_table
|
|
511
|
+
|
|
512
|
+
# Get all Python files first to show accurate progress
|
|
513
|
+
py_files = []
|
|
514
|
+
for py_file in self.project_dir.rglob("*.py"):
|
|
515
|
+
rel_path = py_file.relative_to(self.project_dir)
|
|
516
|
+
path_parts = rel_path.parts
|
|
517
|
+
filename = py_file.name
|
|
518
|
+
|
|
519
|
+
# Skip directories we don't care about
|
|
520
|
+
if (
|
|
521
|
+
"site-packages" in path_parts
|
|
522
|
+
or ".venv" in path_parts
|
|
523
|
+
or ".codeanalyzer" in path_parts
|
|
524
|
+
):
|
|
525
|
+
continue
|
|
526
|
+
|
|
527
|
+
# Skip test files if enabled
|
|
528
|
+
if self.skip_tests and (
|
|
529
|
+
"test" in path_parts
|
|
530
|
+
or "tests" in path_parts
|
|
531
|
+
or filename.startswith("test_")
|
|
532
|
+
or filename.endswith("_test.py")
|
|
533
|
+
):
|
|
534
|
+
continue
|
|
535
|
+
|
|
536
|
+
py_files.append(py_file)
|
|
537
|
+
|
|
538
|
+
if self.using_ray:
|
|
539
|
+
logger.info("Using Ray for distributed symbol table generation.")
|
|
540
|
+
# Separate files into cached and new/changed
|
|
541
|
+
files_to_process = []
|
|
542
|
+
for py_file in py_files:
|
|
543
|
+
file_key = str(py_file)
|
|
544
|
+
if file_key in cached_symbol_table and not self.rebuild_analysis:
|
|
545
|
+
if self._file_unchanged(py_file, cached_symbol_table[file_key]):
|
|
546
|
+
# Use cached version
|
|
547
|
+
symbol_table[file_key] = cached_symbol_table[file_key]
|
|
548
|
+
continue
|
|
549
|
+
files_to_process.append(py_file)
|
|
550
|
+
|
|
551
|
+
# Process only new/changed files with Ray
|
|
552
|
+
if files_to_process:
|
|
553
|
+
futures = [_process_file_with_ray.remote(py_file, self.project_dir, str(self.virtualenv) if self.virtualenv else None) for py_file in files_to_process]
|
|
554
|
+
|
|
555
|
+
with ProgressBar(len(futures), "Building symbol table (parallel)") as progress:
|
|
556
|
+
pending = futures[:]
|
|
557
|
+
while pending:
|
|
558
|
+
done, pending = ray.wait(pending, num_returns=1)
|
|
559
|
+
result = ray.get(done[0])
|
|
560
|
+
if result:
|
|
561
|
+
symbol_table.update(result)
|
|
562
|
+
progress.advance()
|
|
563
|
+
else:
|
|
564
|
+
logger.info("Building symbol table serially.")
|
|
565
|
+
symbol_table_builder = SymbolTableBuilder(self.project_dir, self.virtualenv)
|
|
566
|
+
files_processed = 0
|
|
567
|
+
files_from_cache = 0
|
|
568
|
+
|
|
569
|
+
with ProgressBar(len(py_files), "Building symbol table") as progress:
|
|
570
|
+
for py_file in py_files:
|
|
571
|
+
file_key = str(py_file)
|
|
572
|
+
|
|
573
|
+
# Check if file is cached and unchanged
|
|
574
|
+
if file_key in cached_symbol_table and not self.rebuild_analysis:
|
|
575
|
+
if self._file_unchanged(py_file, cached_symbol_table[file_key]):
|
|
576
|
+
symbol_table[file_key] = cached_symbol_table[file_key]
|
|
577
|
+
files_from_cache += 1
|
|
578
|
+
progress.advance()
|
|
579
|
+
continue
|
|
580
|
+
|
|
581
|
+
# File is new or changed, analyze it
|
|
582
|
+
try:
|
|
583
|
+
py_module = symbol_table_builder.build_pymodule_from_file(py_file)
|
|
584
|
+
symbol_table[file_key] = py_module
|
|
585
|
+
files_processed += 1
|
|
586
|
+
except Exception as e:
|
|
587
|
+
logger.error(f"Failed to process {py_file}: {e}")
|
|
588
|
+
progress.advance()
|
|
589
|
+
|
|
590
|
+
if files_from_cache > 0:
|
|
591
|
+
logger.info(f"Reused {files_from_cache} files from cache, processed {files_processed} new/changed files")
|
|
592
|
+
|
|
593
|
+
logger.info("✅ Symbol table generation complete.")
|
|
594
|
+
return symbol_table
|
|
310
595
|
|
|
311
596
|
def _get_call_graph(self) -> Dict[str, Any]:
|
|
312
597
|
"""Retrieve call graph from CodeQL database."""
|
|
313
598
|
logger.warning("Call graph extraction not yet implemented.")
|
|
314
|
-
return {}
|
|
599
|
+
return {}
|
codeanalyzer/schema/__init__.py
CHANGED
|
@@ -21,3 +21,13 @@ __all__ = [
|
|
|
21
21
|
"PyClassAttribute",
|
|
22
22
|
"PyCallableParameter",
|
|
23
23
|
]
|
|
24
|
+
|
|
25
|
+
# Resolve forward references
|
|
26
|
+
PyCallable.update_forward_refs(PyClass=PyClass)
|
|
27
|
+
PyClass.update_forward_refs(PyCallable=PyCallable)
|
|
28
|
+
PyModule.update_forward_refs(PyCallable=PyCallable, PyClass=PyClass)
|
|
29
|
+
PyApplication.update_forward_refs(
|
|
30
|
+
PyCallable=PyCallable,
|
|
31
|
+
PyClass=PyClass,
|
|
32
|
+
PyModule=PyModule
|
|
33
|
+
)
|
codeanalyzer/schema/py_schema.py
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
This module defines the data models used to represent Python code structures
|
|
20
20
|
for static analysis purposes.
|
|
21
21
|
"""
|
|
22
|
-
|
|
22
|
+
from __future__ import annotations
|
|
23
23
|
import inspect
|
|
24
24
|
from pathlib import Path
|
|
25
25
|
from typing import Any, Dict, List, Optional
|
|
@@ -148,7 +148,8 @@ def builder(cls):
|
|
|
148
148
|
|
|
149
149
|
method.__name__ = f"{f}"
|
|
150
150
|
method.__annotations__ = {"value": t, "return": builder_name}
|
|
151
|
-
|
|
151
|
+
# Check if 't' has '__name__' attribute, otherwise use a fallback
|
|
152
|
+
method.__doc__ = f"Set {f} ({getattr(t, '__name__', str(t))})"
|
|
152
153
|
return method
|
|
153
154
|
|
|
154
155
|
namespace[f"{field}"] = make_method()
|
|
@@ -275,12 +276,16 @@ class PyCallable(BaseModel):
|
|
|
275
276
|
code_start_line: int = -1
|
|
276
277
|
accessed_symbols: List[PySymbol] = []
|
|
277
278
|
call_sites: List[PyCallsite] = []
|
|
279
|
+
inner_callables: Dict[str, "PyCallable"] = {}
|
|
280
|
+
inner_classes: Dict[str, "PyClass"] = {}
|
|
278
281
|
local_variables: List[PyVariableDeclaration] = []
|
|
279
282
|
cyclomatic_complexity: int = 0
|
|
280
283
|
|
|
281
284
|
def __hash__(self) -> int:
|
|
282
285
|
"""Generate a hash based on the callable's signature."""
|
|
283
286
|
return hash(self.signature)
|
|
287
|
+
|
|
288
|
+
|
|
284
289
|
|
|
285
290
|
|
|
286
291
|
@builder
|
|
@@ -328,6 +333,10 @@ class PyModule(BaseModel):
|
|
|
328
333
|
classes: Dict[str, PyClass] = {}
|
|
329
334
|
functions: Dict[str, PyCallable] = {}
|
|
330
335
|
variables: List[PyVariableDeclaration] = []
|
|
336
|
+
# Metadata for caching
|
|
337
|
+
content_hash: Optional[str] = None
|
|
338
|
+
last_modified: Optional[float] = None
|
|
339
|
+
file_size: Optional[int] = None
|
|
331
340
|
|
|
332
341
|
|
|
333
342
|
@builder
|
|
@@ -335,4 +344,4 @@ class PyModule(BaseModel):
|
|
|
335
344
|
class PyApplication(BaseModel):
|
|
336
345
|
"""Represents a Python application."""
|
|
337
346
|
|
|
338
|
-
symbol_table:
|
|
347
|
+
symbol_table: Dict[str, PyModule]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from codeanalyzer.syntactic_analysis.exceptions import (
|
|
2
|
+
SymbolTableBuilderException,
|
|
3
|
+
SymbolTableBuilderFileNotFoundError,
|
|
4
|
+
SymbolTableBuilderParsingError,
|
|
5
|
+
SymbolTableBuilderRayError,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SymbolTableBuilder",
|
|
12
|
+
"SymbolTableBuilderException",
|
|
13
|
+
"SymbolTableBuilderFileNotFoundError",
|
|
14
|
+
"SymbolTableBuilderParsingError",
|
|
15
|
+
"SymbolTableBuilderRayError",
|
|
16
|
+
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class SymbolTableBuilderException(Exception):
|
|
2
|
+
"""Base exception for symbol table builder errors."""
|
|
3
|
+
pass
|
|
4
|
+
|
|
5
|
+
class SymbolTableBuilderFileNotFoundError(SymbolTableBuilderException):
|
|
6
|
+
"""Exception raised when a source file is not found."""
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
class SymbolTableBuilderParsingError(SymbolTableBuilderException):
|
|
10
|
+
"""Exception raised when a source file cannot be parsed."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
class SymbolTableBuilderRayError(SymbolTableBuilderException):
|
|
14
|
+
"""Exception raised when there is an error in Ray processing."""
|
|
15
|
+
pass
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import ast
|
|
2
|
+
import hashlib
|
|
2
3
|
import tokenize
|
|
3
4
|
from ast import AST, ClassDef
|
|
4
5
|
from io import StringIO
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Dict, List, Optional
|
|
7
|
+
from typing import Dict, List, Optional, Union
|
|
7
8
|
|
|
8
9
|
import jedi
|
|
9
10
|
from jedi.api import Script
|
|
@@ -21,14 +22,12 @@ from codeanalyzer.schema.py_schema import (
|
|
|
21
22
|
PySymbol,
|
|
22
23
|
PyVariableDeclaration,
|
|
23
24
|
)
|
|
24
|
-
from codeanalyzer.utils import logger
|
|
25
|
-
from codeanalyzer.utils.progress_bar import ProgressBar
|
|
26
25
|
|
|
27
26
|
|
|
28
27
|
class SymbolTableBuilder:
|
|
29
28
|
"""A class for building a symbol table for a Python project."""
|
|
30
29
|
|
|
31
|
-
def __init__(self, project_dir: Path
|
|
30
|
+
def __init__(self, project_dir: Union[Path, str], virtualenv: Union[Path, str, None]) -> None:
|
|
32
31
|
self.project_dir = Path(project_dir)
|
|
33
32
|
if virtualenv is None:
|
|
34
33
|
# If no virtual environment is provided, create a jedi project without an environment.
|
|
@@ -72,7 +71,7 @@ class SymbolTableBuilder:
|
|
|
72
71
|
pass
|
|
73
72
|
return None
|
|
74
73
|
|
|
75
|
-
def
|
|
74
|
+
def build_pymodule_from_file(self, py_file: Path) -> PyModule:
|
|
76
75
|
"""Builds a PyModule from a Python file.
|
|
77
76
|
|
|
78
77
|
Args:
|
|
@@ -83,18 +82,17 @@ class SymbolTableBuilder:
|
|
|
83
82
|
"""
|
|
84
83
|
# Get the raw source code from the file
|
|
85
84
|
source = py_file.read_text(encoding="utf-8")
|
|
85
|
+
|
|
86
|
+
# Get file metadata for caching
|
|
87
|
+
stat = py_file.stat()
|
|
88
|
+
file_size = stat.st_size
|
|
89
|
+
last_modified = stat.st_mtime
|
|
90
|
+
content_hash = hashlib.sha256(source.encode('utf-8')).hexdigest()
|
|
91
|
+
|
|
86
92
|
# Create a Jedi script for the file
|
|
87
93
|
script: Script = Script(path=str(py_file), project=self.jedi_project)
|
|
88
94
|
module = ast.parse(source, filename=str(py_file))
|
|
89
|
-
|
|
90
|
-
classes = {}
|
|
91
|
-
functions = {}
|
|
92
|
-
for node in ast.iter_child_nodes(module):
|
|
93
|
-
if isinstance(node, ClassDef):
|
|
94
|
-
classes.update(self._add_class(node, script))
|
|
95
|
-
elif isinstance(node, ast.FunctionDef):
|
|
96
|
-
functions.update(self._callables(node, script))
|
|
97
|
-
|
|
95
|
+
|
|
98
96
|
return (
|
|
99
97
|
PyModule.builder()
|
|
100
98
|
.file_path(str(py_file))
|
|
@@ -102,8 +100,11 @@ class SymbolTableBuilder:
|
|
|
102
100
|
.comments(self._pycomments(module, source))
|
|
103
101
|
.imports(self._imports(module))
|
|
104
102
|
.variables(self._module_variables(module, script))
|
|
105
|
-
.classes(
|
|
106
|
-
.functions(
|
|
103
|
+
.classes(self._add_class(module, script))
|
|
104
|
+
.functions(self._callables(module, script))
|
|
105
|
+
.content_hash(content_hash)
|
|
106
|
+
.last_modified(last_modified)
|
|
107
|
+
.file_size(file_size)
|
|
107
108
|
.build()
|
|
108
109
|
)
|
|
109
110
|
|
|
@@ -156,144 +157,112 @@ class SymbolTableBuilder:
|
|
|
156
157
|
|
|
157
158
|
return imports
|
|
158
159
|
|
|
159
|
-
def _add_class(
|
|
160
|
-
|
|
161
|
-
) -> Dict[str, PyClass]:
|
|
162
|
-
"""Builds a PyClass from a class definition node.
|
|
163
|
-
|
|
164
|
-
Args:
|
|
165
|
-
class_node (ast.ClassDef): The AST node representing the class.
|
|
166
|
-
script (Script): The Jedi script object for the module.
|
|
160
|
+
def _add_class(self, node: AST, script: Script, prefix: str = "") -> Dict[str, PyClass]:
|
|
161
|
+
classes: Dict[str, PyClass] = {}
|
|
167
162
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
# Try resolving full signature with Jedi
|
|
172
|
-
try:
|
|
173
|
-
definitions = script.goto(
|
|
174
|
-
line=class_node.lineno, column=class_node.col_offset
|
|
175
|
-
)
|
|
176
|
-
signature = next(
|
|
177
|
-
(d.full_name for d in definitions if d.type == "class"),
|
|
178
|
-
f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
|
|
179
|
-
)
|
|
180
|
-
except Exception:
|
|
181
|
-
signature = (
|
|
182
|
-
f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
|
|
183
|
-
)
|
|
163
|
+
for child in ast.iter_child_nodes(node):
|
|
164
|
+
if not isinstance(child, ast.ClassDef):
|
|
165
|
+
continue
|
|
184
166
|
|
|
185
|
-
|
|
167
|
+
class_name = child.name
|
|
168
|
+
start_line = child.lineno
|
|
169
|
+
end_line = getattr(child, "end_lineno", start_line + len(child.body))
|
|
170
|
+
code = ast.unparse(child).strip()
|
|
186
171
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
172
|
+
# Try resolving full signature with Jedi
|
|
173
|
+
if prefix:
|
|
174
|
+
signature = f"{prefix}.{class_name}"
|
|
175
|
+
else:
|
|
176
|
+
try:
|
|
177
|
+
definitions = script.goto(line=start_line, column=child.col_offset)
|
|
178
|
+
signature = next(
|
|
179
|
+
(d.full_name for d in definitions if d.type == "class"),
|
|
180
|
+
f"{Path(script.path).relative_to(self.project_dir).__str__().replace('/', '.').replace('.py', '')}.{class_name}"
|
|
181
|
+
)
|
|
182
|
+
except Exception:
|
|
183
|
+
signature = f"{Path(script.path).relative_to(self.project_dir).__str__().replace('/', '.').replace('.py', '')}.{class_name}"
|
|
184
|
+
py_class = (
|
|
185
|
+
PyClass.builder()
|
|
186
|
+
.name(class_name)
|
|
187
|
+
.signature(signature)
|
|
188
|
+
.start_line(start_line)
|
|
189
|
+
.end_line(end_line)
|
|
190
|
+
.code(code)
|
|
191
|
+
.comments(self._pycomments(child, code))
|
|
192
|
+
.base_classes([
|
|
201
193
|
ast.unparse(base)
|
|
202
|
-
for base in
|
|
194
|
+
for base in child.bases
|
|
203
195
|
if isinstance(base, ast.expr)
|
|
204
|
-
]
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
{
|
|
210
|
-
k: v
|
|
211
|
-
for child in class_node.body
|
|
212
|
-
if isinstance(child, ast.ClassDef)
|
|
213
|
-
for k, v in self._add_class(child, script).items()
|
|
214
|
-
}
|
|
196
|
+
])
|
|
197
|
+
.methods(self._callables(child, script, prefix=signature)) # Pass class signature as prefix
|
|
198
|
+
.attributes(self._class_attributes(child, script))
|
|
199
|
+
.inner_classes(self._add_class(child, script, prefix=signature)) # Pass class signature as prefix
|
|
200
|
+
.build()
|
|
215
201
|
)
|
|
216
|
-
.build()
|
|
217
|
-
)
|
|
218
202
|
|
|
219
|
-
|
|
203
|
+
classes[signature] = py_class
|
|
220
204
|
|
|
221
|
-
|
|
222
|
-
"""
|
|
223
|
-
Builds PyCallable objects from any AST node that may contain functions.
|
|
205
|
+
return classes
|
|
224
206
|
|
|
225
|
-
Args:
|
|
226
|
-
node (AST): The AST node to process (e.g., Module, ClassDef, FunctionDef).
|
|
227
|
-
script (Script): The Jedi script object for the module.
|
|
228
207
|
|
|
229
|
-
|
|
230
|
-
Dict[str, PyCallable]: A dictionary mapping function/method names to PyCallable objects.
|
|
231
|
-
"""
|
|
208
|
+
def _callables(self, node: AST, script: Script, prefix: str = "") -> Dict[str, PyCallable]:
|
|
232
209
|
callables: Dict[str, PyCallable] = {}
|
|
233
|
-
module_path: str = script.path or "<unknown_module>"
|
|
234
|
-
module_name: str = Path(module_path).stem if module_path else "<unknown>"
|
|
235
|
-
|
|
236
|
-
def visit(n: AST, class_prefix: str = ""):
|
|
237
|
-
for child in ast.iter_child_nodes(n):
|
|
238
|
-
if isinstance(child, ast.FunctionDef):
|
|
239
|
-
method_name = child.name
|
|
240
|
-
start_line = child.lineno
|
|
241
|
-
end_line = getattr(
|
|
242
|
-
child, "end_lineno", start_line + len(child.body)
|
|
243
|
-
)
|
|
244
|
-
code_start_line = child.body[0].lineno if child.body else start_line
|
|
245
|
-
code: str = ast.unparse(child).strip()
|
|
246
|
-
decorators = [ast.unparse(d) for d in child.decorator_list]
|
|
247
210
|
|
|
211
|
+
for child in ast.iter_child_nodes(node):
|
|
212
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
213
|
+
method_name = child.name # Keep the actual method name unchanged
|
|
214
|
+
start_line = child.lineno
|
|
215
|
+
end_line = getattr(child, "end_lineno", start_line + len(child.body))
|
|
216
|
+
code = ast.unparse(child).strip()
|
|
217
|
+
decorators = [ast.unparse(d) for d in child.decorator_list]
|
|
218
|
+
|
|
219
|
+
if prefix:
|
|
220
|
+
# We're in a nested context - build signature with prefix
|
|
221
|
+
signature = f"{prefix}.{method_name}"
|
|
222
|
+
else:
|
|
223
|
+
# Top-level function - try Jedi first, fall back to relative path-based
|
|
248
224
|
try:
|
|
249
|
-
definitions = script.goto(
|
|
250
|
-
|
|
225
|
+
definitions = script.goto(line=start_line, column=child.col_offset)
|
|
226
|
+
signature = next(
|
|
227
|
+
(d.full_name for d in definitions if d.type == "function"),
|
|
228
|
+
None
|
|
251
229
|
)
|
|
252
230
|
except Exception:
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
signature
|
|
256
|
-
|
|
257
|
-
|
|
231
|
+
signature = None
|
|
232
|
+
|
|
233
|
+
# If Jedi didn't provide a signature, build one relative to project_dir
|
|
234
|
+
if not signature:
|
|
235
|
+
relative_path = Path(script.path).relative_to(self.project_dir)
|
|
236
|
+
signature = f"{str(relative_path).replace('/', '.').replace('.py', '')}.{method_name}"
|
|
237
|
+
py_callable = (
|
|
238
|
+
PyCallable.builder()
|
|
239
|
+
.name(method_name) # Use the actual method name, not the full signature
|
|
240
|
+
.path(str(script.path))
|
|
241
|
+
.signature(signature) # Use the full signature here
|
|
242
|
+
.decorators(decorators)
|
|
243
|
+
.code(code)
|
|
244
|
+
.start_line(start_line)
|
|
245
|
+
.end_line(end_line)
|
|
246
|
+
.code_start_line(child.body[0].lineno if child.body else start_line)
|
|
247
|
+
.accessed_symbols(self._accessed_symbols(child, script))
|
|
248
|
+
.call_sites(self._call_sites(child, script))
|
|
249
|
+
.local_variables(self._local_variables(child, script))
|
|
250
|
+
.cyclomatic_complexity(self._cyclomatic_complexity(child))
|
|
251
|
+
.parameters(self._callable_parameters(child, script))
|
|
252
|
+
.return_type(
|
|
253
|
+
ast.unparse(child.returns)
|
|
254
|
+
if child.returns else self._infer_type(script, child.lineno, child.col_offset)
|
|
258
255
|
)
|
|
256
|
+
.comments(self._pycomments(child, code))
|
|
257
|
+
.inner_callables(self._callables(child, script, signature)) # Pass current signature as prefix
|
|
258
|
+
.inner_classes(self._add_class(child, script, signature)) # Pass current signature as prefix
|
|
259
|
+
.build()
|
|
260
|
+
)
|
|
259
261
|
|
|
260
|
-
|
|
261
|
-
PyCallable.builder()
|
|
262
|
-
.name(method_name)
|
|
263
|
-
.path(script.path.__str__())
|
|
264
|
-
.signature(signature)
|
|
265
|
-
.decorators(decorators)
|
|
266
|
-
.code(code)
|
|
267
|
-
.start_line(start_line)
|
|
268
|
-
.end_line(end_line)
|
|
269
|
-
.code_start_line(code_start_line)
|
|
270
|
-
.accessed_symbols(self._accessed_symbols(child, script))
|
|
271
|
-
.call_sites(self._call_sites(child, script))
|
|
272
|
-
.local_variables(self._local_variables(child, script))
|
|
273
|
-
.cyclomatic_complexity(self._cyclomatic_complexity(child))
|
|
274
|
-
.parameters(self._callable_parameters(child, script))
|
|
275
|
-
.return_type(
|
|
276
|
-
ast.unparse(child.returns)
|
|
277
|
-
if child.returns
|
|
278
|
-
else self._infer_type(
|
|
279
|
-
script, child.lineno, child.col_offset
|
|
280
|
-
)
|
|
281
|
-
)
|
|
282
|
-
.comments(self._pycomments(child, code))
|
|
283
|
-
.build()
|
|
284
|
-
)
|
|
285
|
-
|
|
286
|
-
visit(child, class_prefix + method_name + ".")
|
|
287
|
-
|
|
288
|
-
elif isinstance(child, ast.ClassDef):
|
|
289
|
-
visit(child, class_prefix + child.name + ".")
|
|
290
|
-
|
|
291
|
-
elif hasattr(child, "body"):
|
|
292
|
-
visit(child, class_prefix)
|
|
262
|
+
callables[method_name] = py_callable # Key by method name, not full signature
|
|
293
263
|
|
|
294
|
-
visit(node)
|
|
295
264
|
return callables
|
|
296
|
-
|
|
265
|
+
|
|
297
266
|
def _pycomments(self, node: ast.AST, source: str) -> List[PyComment]:
|
|
298
267
|
"""
|
|
299
268
|
Extracts all PyComment instances (docstring and # comments) from within a specific AST node's body.
|
|
@@ -868,35 +837,3 @@ class SymbolTableBuilder:
|
|
|
868
837
|
.col_offset(col_offset)
|
|
869
838
|
.build()
|
|
870
839
|
)
|
|
871
|
-
|
|
872
|
-
def build(self) -> Dict[str, PyModule]:
|
|
873
|
-
"""Builds the symbol table for the project.
|
|
874
|
-
|
|
875
|
-
This method scans the project directory, identifies Python files,
|
|
876
|
-
and constructs a symbol table containing information about classes,
|
|
877
|
-
functions, and variables defined in those files.
|
|
878
|
-
"""
|
|
879
|
-
symbol_table: Dict[str, PyModule] = {}
|
|
880
|
-
# Get all Python files first to show accurate progress
|
|
881
|
-
py_files = [
|
|
882
|
-
py_file
|
|
883
|
-
for py_file in self.project_dir.rglob("*.py")
|
|
884
|
-
if "site-packages"
|
|
885
|
-
not in py_file.resolve().__str__() # exclude site-packages
|
|
886
|
-
and ".venv"
|
|
887
|
-
not in py_file.resolve().__str__() # exclude virtual environments
|
|
888
|
-
and ".codeanalyzer"
|
|
889
|
-
not in py_file.resolve().__str__() # exclude internal cache directories
|
|
890
|
-
]
|
|
891
|
-
|
|
892
|
-
with ProgressBar(len(py_files), "Building symbol table") as progress:
|
|
893
|
-
for py_file in py_files:
|
|
894
|
-
try:
|
|
895
|
-
py_module = self._module(py_file)
|
|
896
|
-
symbol_table[str(py_file)] = py_module
|
|
897
|
-
except Exception as e:
|
|
898
|
-
logger.error(f"Failed to process {py_file}: {e}")
|
|
899
|
-
progress.advance()
|
|
900
|
-
progress.finish("✅ Symbol table generation complete.")
|
|
901
|
-
|
|
902
|
-
return symbol_table
|
|
@@ -1,20 +1,22 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codeanalyzer-python
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.10
|
|
4
4
|
Summary: Static Analysis on Python source code using Jedi, CodeQL and Treesitter.
|
|
5
5
|
Author-email: Rahul Krishna <i.m.ralk@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
License-File: NOTICE
|
|
8
|
-
Requires-Python: >=3.
|
|
9
|
-
Requires-Dist: jedi
|
|
10
|
-
Requires-Dist:
|
|
11
|
-
Requires-Dist:
|
|
12
|
-
Requires-Dist:
|
|
13
|
-
Requires-Dist: pandas
|
|
14
|
-
Requires-Dist: pydantic
|
|
15
|
-
Requires-Dist:
|
|
16
|
-
Requires-Dist:
|
|
17
|
-
Requires-Dist:
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Requires-Dist: jedi<0.20.0,>=0.18.0
|
|
10
|
+
Requires-Dist: msgpack<1.0.7,>=1.0.0
|
|
11
|
+
Requires-Dist: networkx<3.2.0,>=2.6.0
|
|
12
|
+
Requires-Dist: numpy<1.24.0,>=1.21.0
|
|
13
|
+
Requires-Dist: pandas<2.0.0,>=1.3.0
|
|
14
|
+
Requires-Dist: pydantic<2.0.0,>=1.8.0
|
|
15
|
+
Requires-Dist: ray<3.0.0,>=2.0.0
|
|
16
|
+
Requires-Dist: requests<3.0.0,>=2.20.0
|
|
17
|
+
Requires-Dist: rich<14.0.0,>=12.6.0
|
|
18
|
+
Requires-Dist: typer<1.0.0,>=0.9.0
|
|
19
|
+
Requires-Dist: typing-extensions>=4.0.0
|
|
18
20
|
Description-Content-Type: text/markdown
|
|
19
21
|
|
|
20
22
|

|
|
@@ -95,17 +97,18 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
95
97
|
Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.
|
|
96
98
|
|
|
97
99
|
|
|
98
|
-
╭─ Options
|
|
99
|
-
│ * --input -i PATH
|
|
100
|
-
│ --output -o PATH
|
|
101
|
-
│ --
|
|
102
|
-
│ --
|
|
103
|
-
│ --
|
|
104
|
-
│ --
|
|
105
|
-
│ --
|
|
106
|
-
│
|
|
107
|
-
│
|
|
108
|
-
|
|
100
|
+
╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
|
|
101
|
+
│ * --input -i PATH Path to the project root directory. [default: None] [required] │
|
|
102
|
+
│ --output -o PATH Output directory for artifacts. [default: None] │
|
|
103
|
+
│ --format -f [json|msgpack] Output format: json or msgpack. [default: json] │
|
|
104
|
+
│ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1] │
|
|
105
|
+
│ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql] │
|
|
106
|
+
│ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy] │
|
|
107
|
+
│ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
|
|
108
|
+
│ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache] │
|
|
109
|
+
│ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0] │
|
|
110
|
+
│ --help Show this message and exit. │
|
|
111
|
+
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
|
|
109
112
|
```
|
|
110
113
|
|
|
111
114
|
### Examples
|
|
@@ -123,7 +126,14 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
123
126
|
|
|
124
127
|
Now, you can find the analysis results in `analysis.json` in the specified directory.
|
|
125
128
|
|
|
126
|
-
2. **
|
|
129
|
+
2. **Change output format to msgpack:**
|
|
130
|
+
```bash
|
|
131
|
+
codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
This will save the analysis results in `analysis.msgpack` in the specified directory.
|
|
135
|
+
|
|
136
|
+
3. **Toggle analysis levels with `--analysis-level`:**
|
|
127
137
|
```bash
|
|
128
138
|
codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
|
|
129
139
|
```
|
|
@@ -133,7 +143,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
133
143
|
```
|
|
134
144
|
***Note: The `--analysis-level=2` is not yet implemented in this version.***
|
|
135
145
|
|
|
136
|
-
|
|
146
|
+
4. **Analysis with CodeQL enabled:**
|
|
137
147
|
```bash
|
|
138
148
|
codeanalyzer --input ./my-python-project --codeql
|
|
139
149
|
```
|
|
@@ -141,7 +151,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
141
151
|
|
|
142
152
|
***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
|
|
143
153
|
|
|
144
|
-
|
|
154
|
+
5. **Eager analysis with custom cache directory:**
|
|
145
155
|
```bash
|
|
146
156
|
codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
|
|
147
157
|
```
|
|
@@ -149,14 +159,14 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
|
|
|
149
159
|
|
|
150
160
|
If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
|
|
151
161
|
|
|
152
|
-
|
|
162
|
+
6. **Quiet mode (minimal output):**
|
|
153
163
|
```bash
|
|
154
164
|
codeanalyzer --input /path/to/my-python-project --quiet
|
|
155
165
|
```
|
|
156
166
|
|
|
157
|
-
|
|
167
|
+
## Output
|
|
158
168
|
|
|
159
|
-
By default, analysis results are printed to stdout in JSON format. When using the `--output` option, results are saved to `analysis.json` in the specified directory.
|
|
169
|
+
By default, analysis results are printed to stdout in JSON format. When using the `--output` option, results are saved to `analysis.json` in the specified directory. If you use the `--format=msgpack` option, the results will be saved in `analysis.msgpack`, which is a binary format that can be more efficient for storage and transmission.
|
|
160
170
|
|
|
161
171
|
## Development
|
|
162
172
|
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
codeanalyzer/__init__.py,sha256=BZ3Kuwl-F_F-8H8cepLnVJ4Ku4NNUjjqg0Y6ujPQSsI,108
|
|
2
|
-
codeanalyzer/__main__.py,sha256=
|
|
3
|
-
codeanalyzer/core.py,sha256=
|
|
2
|
+
codeanalyzer/__main__.py,sha256=x2LNDxYndzV2LkauBz7-_0qR58seRx-yJ07-obs4o9Q,4855
|
|
3
|
+
codeanalyzer/core.py,sha256=RUJrtmtCFAxBLyI_eHp3Oi0tWAU09DeI7wUD7vfQwi0,24926
|
|
4
4
|
codeanalyzer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
codeanalyzer/config/__init__.py,sha256=9XBxAn1oWGRuhg3bEBUuVGs3hFNXEAKrr-Ce7tq9a2k,61
|
|
6
|
-
codeanalyzer/config/config.py,sha256=
|
|
6
|
+
codeanalyzer/config/config.py,sha256=ZiKzc5uEUCIvih58-6BDtLLI1hPij41wGQjBcj9KNQM,188
|
|
7
7
|
codeanalyzer/jedi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
codeanalyzer/jedi/jedi.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
codeanalyzer/schema/__init__.py,sha256=
|
|
10
|
-
codeanalyzer/schema/py_schema.py,sha256=
|
|
9
|
+
codeanalyzer/schema/__init__.py,sha256=k6N1AfXe1J7cSFBdRJlYo1FPVrr4HeXgzEmVy8MUhC4,694
|
|
10
|
+
codeanalyzer/schema/py_schema.py,sha256=04K19tDtmg2tPXjwu_8BcmVpenk1ibVwNv6bHWZHOLY,10851
|
|
11
11
|
codeanalyzer/semantic_analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
codeanalyzer/semantic_analysis/codeql/__init__.py,sha256=ODMkdGvs3ebJdfIZle8T4VcHoCBhH_ZehWuWFpNh3NI,1022
|
|
13
13
|
codeanalyzer/semantic_analysis/codeql/codeql_analysis.py,sha256=-mVTm2JdQlfmV_9T4xLgjxwROKS83aP7lJAmQHh37xY,5312
|
|
@@ -15,14 +15,15 @@ codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py,sha256=PnJOasW9rP68SE
|
|
|
15
15
|
codeanalyzer/semantic_analysis/codeql/codeql_loader.py,sha256=o0BW-6yHkN6kLG66rOYQQ_ToQUn5Ivl9h9ZdBM3_E_Q,2288
|
|
16
16
|
codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py,sha256=QJtID1YZkO6Wyns_qTJFqOSiV238ArLXwgLv105B27E,6520
|
|
17
17
|
codeanalyzer/semantic_analysis/wala/__init__.py,sha256=JSDvkrpJ2U90Ikex34EluSHmoGutlmRhV2xvInt6tB8,743
|
|
18
|
-
codeanalyzer/syntactic_analysis/__init__.py,sha256=
|
|
19
|
-
codeanalyzer/syntactic_analysis/
|
|
18
|
+
codeanalyzer/syntactic_analysis/__init__.py,sha256=EUQkJEh6wHjWx2qTTKbTbUgwSbfKeNieKHNy7RknVXA,476
|
|
19
|
+
codeanalyzer/syntactic_analysis/exceptions.py,sha256=whs_n0vIu655Jkk1a7iOoXY6iIca4pZqJnU40V9Ejaw,537
|
|
20
|
+
codeanalyzer/syntactic_analysis/symbol_table_builder.py,sha256=0FE_ZdlyP77P1B70QXhPKO4AEPm2KvA3-llaAjIrOJU,34639
|
|
20
21
|
codeanalyzer/utils/__init__.py,sha256=hC6VWdR5rerSqBxzu9KQHTASWqwrrYJv-CMDwrTlzkc,137
|
|
21
22
|
codeanalyzer/utils/logging.py,sha256=0vTkGSl5EZN8yhhWa_5Mrn1n_twRCSW53rNwjzQ9RbI,601
|
|
22
23
|
codeanalyzer/utils/progress_bar.py,sha256=ZHJzGiCo5q4dyXq4CtsrJeq9Ip7sD84T3yZjNX7TBys,2443
|
|
23
|
-
codeanalyzer_python-0.1.
|
|
24
|
-
codeanalyzer_python-0.1.
|
|
25
|
-
codeanalyzer_python-0.1.
|
|
26
|
-
codeanalyzer_python-0.1.
|
|
27
|
-
codeanalyzer_python-0.1.
|
|
28
|
-
codeanalyzer_python-0.1.
|
|
24
|
+
codeanalyzer_python-0.1.10.dist-info/METADATA,sha256=hZtJ0nMdkzF9Bm1JTzaypIFLWpl9bOL1xrtrvdKBjRU,15261
|
|
25
|
+
codeanalyzer_python-0.1.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
26
|
+
codeanalyzer_python-0.1.10.dist-info/entry_points.txt,sha256=eUrB7Jq5Oav6RblMX_RYfVLSw_h15NbzC3fNSnGsPuM,59
|
|
27
|
+
codeanalyzer_python-0.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
28
|
+
codeanalyzer_python-0.1.10.dist-info/licenses/NOTICE,sha256=YU0Z9NDWqKY-2jfFcbxeZ6fbnzz0oZeKmnUcO8a-bcQ,901
|
|
29
|
+
codeanalyzer_python-0.1.10.dist-info/RECORD,,
|
|
File without changes
|
{codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{codeanalyzer_python-0.1.8.dist-info → codeanalyzer_python-0.1.10.dist-info}/licenses/NOTICE
RENAMED
|
File without changes
|