codeanalyzer-python 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeanalyzer/__main__.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from pathlib import Path
2
- from typing import Annotated, Optional
2
+ from typing import Optional, Annotated
3
3
 
4
4
  import typer
5
5
 
6
6
  from codeanalyzer.core import Codeanalyzer
7
7
  from codeanalyzer.utils import _set_log_level, logger
8
8
  from codeanalyzer.config import OutputFormat
9
-
9
+ from codeanalyzer.schema import model_dump_json
10
10
 
11
11
  def main(
12
12
  input: Annotated[
@@ -32,6 +32,12 @@ def main(
32
32
  using_codeql: Annotated[
33
33
  bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
34
34
  ] = False,
35
+ using_ray: Annotated[
36
+ bool,
37
+ typer.Option(
38
+ "--ray/--no-ray", help="Enable Ray for distributed analysis."
39
+ ),
40
+ ] = False,
35
41
  rebuild_analysis: Annotated[
36
42
  bool,
37
43
  typer.Option(
@@ -39,18 +45,32 @@ def main(
39
45
  help="Enable eager or lazy analysis. Defaults to lazy.",
40
46
  ),
41
47
  ] = False,
48
+ skip_tests: Annotated[
49
+ bool,
50
+ typer.Option(
51
+ "--skip-tests/--include-tests",
52
+ help="Skip test files in analysis.",
53
+ ),
54
+ ] = True,
55
+ file_name: Annotated[
56
+ Optional[Path],
57
+ typer.Option(
58
+ "--file-name",
59
+ help="Analyze only the specified file (relative to input directory).",
60
+ ),
61
+ ] = None,
42
62
  cache_dir: Annotated[
43
63
  Optional[Path],
44
64
  typer.Option(
45
65
  "-c",
46
66
  "--cache-dir",
47
- help="Directory to store analysis cache.",
67
+ help="Directory to store analysis cache. Defaults to '.codeanalyzer' in the input directory.",
48
68
  ),
49
69
  ] = None,
50
70
  clear_cache: Annotated[
51
71
  bool,
52
- typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis."),
53
- ] = True,
72
+ typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis. By default, cache is retained."),
73
+ ] = False,
54
74
  verbosity: Annotated[
55
75
  int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
56
76
  ] = 0,
@@ -62,21 +82,28 @@ def main(
62
82
  logger.error(f"Input path '{input}' does not exist.")
63
83
  raise typer.Exit(code=1)
64
84
 
85
+ # Validate file_name if provided
86
+ if file_name is not None:
87
+ full_file_path = input / file_name
88
+ if not full_file_path.exists():
89
+ logger.error(f"Specified file '{file_name}' does not exist in '{input}'.")
90
+ raise typer.Exit(code=1)
91
+ if not full_file_path.is_file():
92
+ logger.error(f"Specified path '{file_name}' is not a file.")
93
+ raise typer.Exit(code=1)
94
+ if not str(file_name).endswith('.py'):
95
+ logger.error(f"Specified file '{file_name}' is not a Python file (.py).")
96
+ raise typer.Exit(code=1)
97
+
65
98
  with Codeanalyzer(
66
- input, analysis_level, using_codeql, rebuild_analysis, cache_dir, clear_cache
99
+ input, analysis_level, skip_tests, using_codeql, rebuild_analysis, cache_dir, clear_cache, using_ray, file_name
67
100
  ) as analyzer:
68
101
  artifacts = analyzer.analyze()
69
102
 
70
103
  # Handle output based on format
71
104
  if output is None:
72
105
  # Output to stdout (only for JSON)
73
- if format == OutputFormat.JSON:
74
- print(artifacts.model_dump_json(separators=(",", ":")))
75
- else:
76
- logger.error(
77
- f"Format '{format.value}' requires an output directory (use -o/--output)"
78
- )
79
- raise typer.Exit(code=1)
106
+ print(model_dump_json(artifacts, separators=(",", ":")))
80
107
  else:
81
108
  # Output to file
82
109
  output.mkdir(parents=True, exist_ok=True)
@@ -87,8 +114,8 @@ def _write_output(artifacts, output_dir: Path, format: OutputFormat):
87
114
  """Write artifacts to file in the specified format."""
88
115
  if format == OutputFormat.JSON:
89
116
  output_file = output_dir / "analysis.json"
90
- # Use Pydantic's json() with separators for compact output
91
- json_str = artifacts.model_dump_json(indent=None)
117
+ # Use Pydantic's model_dump_json() for compact output
118
+ json_str = model_dump_json(artifacts, indent=None)
92
119
  with output_file.open("w") as f:
93
120
  f.write(json_str)
94
121
  logger.info(f"Analysis saved to {output_file}")
codeanalyzer/core.py CHANGED
@@ -4,13 +4,39 @@ import shutil
4
4
  import subprocess
5
5
  import sys
6
6
  from pathlib import Path
7
- from typing import Any, Dict, Optional, Union
7
+ from typing import Any, Dict, Optional, Union, List
8
8
 
9
- from codeanalyzer.schema.py_schema import PyApplication, PyModule
9
+ import ray
10
+ from codeanalyzer.utils import logger
11
+ from codeanalyzer.schema import PyApplication, PyModule, model_dump_json, model_validate_json
10
12
  from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
11
13
  from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import CodeQLExceptions
14
+ from codeanalyzer.syntactic_analysis.exceptions import SymbolTableBuilderRayError
12
15
  from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
13
- from codeanalyzer.utils import logger
16
+ from codeanalyzer.utils import ProgressBar
17
+
18
+ @ray.remote
19
+ def _process_file_with_ray(py_file: Union[Path, str], project_dir: Union[Path, str], virtualenv: Union[Path, str, None]) -> Dict[str, PyModule]:
20
+ """Processes files in the project directory using Ray for distributed processing.
21
+
22
+ Args:
23
+ py_file (Union[Path, str]): Path to the Python file to process.
24
+ project_dir (Union[Path, str]): Path to the project directory.
25
+ virtualenv (Union[Path, str, None]): Path to the virtual environment directory.
26
+ Returns:
27
+ Dict[str, PyModule]: A dictionary mapping file paths to PyModule objects.
28
+ """
29
+ from rich.console import Console
30
+ console = Console()
31
+ module_map: Dict[str, PyModule] = {}
32
+ try:
33
+ py_file = Path(py_file)
34
+ symbol_table_builder = SymbolTableBuilder(project_dir, virtualenv)
35
+ module_map[str(py_file)] = symbol_table_builder.build_pymodule_from_file(py_file)
36
+ except Exception as e:
37
+ console.log(f"❌ Failed to process {py_file}: {e}")
38
+ raise SymbolTableBuilderRayError(f"Ray processing error for {py_file}: {e}")
39
+ return module_map
14
40
 
15
41
 
16
42
  class Codeanalyzer:
@@ -28,14 +54,18 @@ class Codeanalyzer:
28
54
  def __init__(
29
55
  self,
30
56
  project_dir: Union[str, Path],
31
- analysis_depth: int = 1,
32
- using_codeql: bool = False,
33
- rebuild_analysis: bool = False,
34
- cache_dir: Optional[Path] = None,
35
- clear_cache: bool = True,
57
+ analysis_depth: int,
58
+ skip_tests: bool,
59
+ using_codeql: bool,
60
+ rebuild_analysis: bool,
61
+ cache_dir: Optional[Path],
62
+ clear_cache: bool,
63
+ using_ray: bool,
64
+ file_name: Optional[Path] = None,
36
65
  ) -> None:
37
66
  self.analysis_depth = analysis_depth
38
67
  self.project_dir = Path(project_dir).resolve()
68
+ self.skip_tests = skip_tests
39
69
  self.using_codeql = using_codeql
40
70
  self.rebuild_analysis = rebuild_analysis
41
71
  self.cache_dir = (
@@ -45,10 +75,12 @@ class Codeanalyzer:
45
75
  self.db_path: Optional[Path] = None
46
76
  self.codeql_bin: Optional[Path] = None
47
77
  self.virtualenv: Optional[Path] = None
78
+ self.using_ray: bool = using_ray
79
+ self.file_name: Optional[Path] = file_name
48
80
 
49
81
  @staticmethod
50
82
  def _cmd_exec_helper(
51
- cmd: list[str],
83
+ cmd: List[str],
52
84
  cwd: Optional[Path] = None,
53
85
  capture_output: bool = True,
54
86
  check: bool = True,
@@ -126,7 +158,8 @@ class Codeanalyzer:
126
158
  # We're inside a virtual environment; need to find the base interpreter
127
159
 
128
160
  # First, check if user explicitly set SYSTEM_PYTHON
129
- if system_python := os.getenv("SYSTEM_PYTHON"):
161
+ system_python = os.getenv("SYSTEM_PYTHON")
162
+ if system_python:
130
163
  system_python_path = Path(system_python)
131
164
  if system_python_path.exists() and system_python_path.is_file():
132
165
  return system_python_path
@@ -142,14 +175,16 @@ class Codeanalyzer:
142
175
 
143
176
  # Use shutil.which to find python3 and python in PATH
144
177
  for python_name in ["python3", "python"]:
145
- if python_path := shutil.which(python_name):
178
+ python_path = shutil.which(python_name)
179
+ if python_path:
146
180
  candidate = Path(python_path)
147
181
  # Skip if this is the current virtual environment's python
148
182
  if not str(candidate).startswith(sys.prefix):
149
183
  python_candidates.append(candidate)
150
184
 
151
185
  # Check pyenv installation
152
- if pyenv_root := os.getenv("PYENV_ROOT"):
186
+ pyenv_root = os.getenv("PYENV_ROOT")
187
+ if pyenv_root:
153
188
  pyenv_python = Path(pyenv_root) / "shims" / "python"
154
189
  if pyenv_python.exists():
155
190
  python_candidates.append(pyenv_python)
@@ -160,15 +195,17 @@ class Codeanalyzer:
160
195
  python_candidates.append(home_pyenv)
161
196
 
162
197
  # Check conda base environment
163
- if conda_prefix := os.getenv(
164
- "CONDA_PREFIX_1"
165
- ): # Original conda env before activation
166
- conda_python = Path(conda_prefix) / "bin" / "python"
198
+ conda_base = os.getenv("CONDA_PREFIX")
199
+ if conda_base:
200
+ conda_python = Path(conda_base) / "bin" / "python"
167
201
  if conda_python.exists():
168
202
  python_candidates.append(conda_python)
169
203
 
170
204
  # Check asdf
171
- if asdf_dir := os.getenv("ASDF_DIR"):
205
+ asdf_dir = os.getenv("ASDF_DIR")
206
+ # If ASDF_DIR is set, use its shims directory
207
+ # Otherwise, check if asdf is installed in the default location
208
+ if asdf_dir:
172
209
  asdf_python = Path(asdf_dir) / "shims" / "python"
173
210
  if asdf_python.exists():
174
211
  python_candidates.append(asdf_python)
@@ -211,14 +248,61 @@ class Codeanalyzer:
211
248
  # Find python in the virtual environment
212
249
  venv_python = venv_path / "bin" / "python"
213
250
 
214
- # Install the project itself (reads pyproject.toml)
215
- self._cmd_exec_helper(
216
- [str(venv_python), "-m", "pip", "install", "-U", f"{self.project_dir}"],
217
- cwd=self.project_dir,
218
- check=True,
219
- )
220
- # Install the project dependencies
221
- self.virtualenv = venv_path
251
+ # First, install dependencies from various dependency files
252
+ dependency_files = [
253
+ ("requirements.txt", ["-r"]),
254
+ ("requirements-dev.txt", ["-r"]),
255
+ ("dev-requirements.txt", ["-r"]),
256
+ ("test-requirements.txt", ["-r"]),
257
+ ]
258
+
259
+ for dep_file, pip_args in dependency_files:
260
+ if (self.project_dir / dep_file).exists():
261
+ logger.info(f"Installing dependencies from {dep_file}")
262
+ self._cmd_exec_helper(
263
+ [str(venv_python), "-m", "pip", "install", "-U"] + pip_args + [str(self.project_dir / dep_file)],
264
+ cwd=self.project_dir,
265
+ check=True,
266
+ )
267
+
268
+ # Handle Pipenv files
269
+ if (self.project_dir / "Pipfile").exists():
270
+ logger.info("Installing dependencies from Pipfile")
271
+ # Note: This would require pipenv to be installed
272
+ self._cmd_exec_helper(
273
+ [str(venv_python), "-m", "pip", "install", "pipenv"],
274
+ cwd=self.project_dir,
275
+ check=True,
276
+ )
277
+ self._cmd_exec_helper(
278
+ ["pipenv", "install", "--dev"],
279
+ cwd=self.project_dir,
280
+ check=True,
281
+ )
282
+
283
+ # Handle conda environment files
284
+ conda_files = ["conda.yml", "environment.yml"]
285
+ for conda_file in conda_files:
286
+ if (self.project_dir / conda_file).exists():
287
+ logger.info(f"Found {conda_file} - note that conda environments should be handled outside this tool")
288
+ break
289
+
290
+ # Now install the project itself in editable mode (only if package definition exists)
291
+ package_definition_files = [
292
+ "pyproject.toml", # Modern Python packaging (PEP 518/621)
293
+ "setup.py", # Traditional setuptools
294
+ "setup.cfg", # Setup configuration
295
+ ]
296
+
297
+ if any((self.project_dir / file).exists() for file in package_definition_files):
298
+ logger.info("Installing project in editable mode")
299
+ self._cmd_exec_helper(
300
+ [str(venv_python), "-m", "pip", "install", "-e", str(self.project_dir)],
301
+ cwd=self.project_dir,
302
+ check=True,
303
+ )
304
+ else:
305
+ logger.warning("No package definition files found, skipping editable installation")
222
306
 
223
307
  if self.using_codeql:
224
308
  logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
@@ -280,14 +364,95 @@ class Codeanalyzer:
280
364
 
281
365
  return self
282
366
 
283
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
367
+ def __exit__(self, *args, **kwargs) -> None:
284
368
  if self.clear_cache and self.cache_dir.exists():
285
369
  logger.info(f"Clearing cache directory: {self.cache_dir}")
286
370
  shutil.rmtree(self.cache_dir)
287
371
 
288
372
  def analyze(self) -> PyApplication:
289
- """Return the path to the CodeQL database."""
290
- return PyApplication.builder().symbol_table(self._build_symbol_table()).build()
373
+ """Analyze the project and return a PyApplication with symbol table.
374
+
375
+ Uses caching to avoid re-analyzing unchanged files.
376
+ """
377
+ cache_file = self.cache_dir / "analysis_cache.json"
378
+
379
+ # Try to load existing cached analysis
380
+ cached_pyapplication = None
381
+ if not self.rebuild_analysis and cache_file.exists():
382
+ try:
383
+ cached_pyapplication = self._load_pyapplication_from_cache(cache_file)
384
+ logger.info("Loaded cached analysis")
385
+ except Exception as e:
386
+ logger.warning(f"Failed to load cache: {e}. Rebuilding analysis.")
387
+ cached_pyapplication = None
388
+
389
+ # Build symbol table from cached application if available (if no available, the build a new one)
390
+ symbol_table = self._build_symbol_table(cached_pyapplication.symbol_table if cached_pyapplication else {})
391
+
392
+ # Recreate pyapplication
393
+ app = PyApplication.builder().symbol_table(symbol_table).build()
394
+
395
+ # Save to cache
396
+ self._save_analysis_cache(app, cache_file)
397
+
398
+ return app
399
+
400
+ def _load_pyapplication_from_cache(self, cache_file: Path) -> PyApplication:
401
+ """Load cached analysis from file.
402
+
403
+ Args:
404
+ cache_file: Path to the cache file
405
+
406
+ Returns:
407
+ PyApplication: The cached application data
408
+ """
409
+ with cache_file.open('r') as f:
410
+ data = f.read()
411
+ return model_validate_json(PyApplication, data)
412
+
413
+ def _save_analysis_cache(self, app: PyApplication, cache_file: Path) -> None:
414
+ """Save analysis to cache file.
415
+
416
+ Args:
417
+ app: The PyApplication to cache
418
+ cache_file: Path to save the cache file
419
+ """
420
+ # Ensure cache directory exists
421
+ cache_file.parent.mkdir(parents=True, exist_ok=True)
422
+
423
+ with cache_file.open('w') as f:
424
+ f.write(model_dump_json(app, indent=2))
425
+
426
+ logger.info(f"Analysis cached to {cache_file}")
427
+
428
+ def _file_unchanged(self, file_path: Path, cached_module: PyModule) -> bool:
429
+ """Check if a file has changed since it was cached.
430
+
431
+ Args:
432
+ file_path: Path to the file to check
433
+ cached_module: The cached PyModule for this file
434
+
435
+ Returns:
436
+ bool: True if file is unchanged, False otherwise
437
+ """
438
+ try:
439
+ # Check last modified time and file size
440
+ if (cached_module.last_modified is not None and
441
+ cached_module.file_size is not None and
442
+ cached_module.last_modified == file_path.stat().st_mtime and
443
+ cached_module.file_size == file_path.stat().st_size):
444
+ return True
445
+ # Also check content hash for extra safety
446
+ if cached_module.content_hash is not None:
447
+ content_hash = hashlib.sha256(file_path.read_bytes()).hexdigest()
448
+ return content_hash == cached_module.content_hash
449
+
450
+ # No cached metadata mismatch, assume file changed
451
+ return False
452
+
453
+ except Exception as e:
454
+ logger.debug(f"Error checking file {file_path}: {e}")
455
+ return False
291
456
 
292
457
  def _compute_checksum(self, root: Path) -> str:
293
458
  """Compute SHA256 checksum of all Python source files in a project directory. If somethings changes, the
@@ -304,11 +469,131 @@ class Codeanalyzer:
304
469
  sha256.update(py_file.read_bytes())
305
470
  return sha256.hexdigest()
306
471
 
307
- def _build_symbol_table(self) -> Dict[str, PyModule]:
308
- """Retrieve a symbol table of the whole project."""
309
- return SymbolTableBuilder(self.project_dir, self.virtualenv).build()
472
+ def _build_symbol_table(self, cached_symbol_table: Optional[Dict[str, PyModule]] = None) -> Dict[str, PyModule]:
473
+ """Builds the symbol table for the project.
474
+
475
+ This method scans the project directory, identifies Python files,
476
+ and constructs a symbol table containing information about classes,
477
+ functions, and variables defined in those files.
478
+
479
+ Args:
480
+ cached_app: Previously cached PyApplication to reuse unchanged files
481
+
482
+ Returns:
483
+ Dict[str, PyModule]: A dictionary mapping file paths to PyModule objects.
484
+ """
485
+ symbol_table: Dict[str, PyModule] = {}
486
+
487
+ # Handle single file analysis
488
+ if self.file_name is not None:
489
+ single_file = self.project_dir / self.file_name
490
+ logger.info(f"Analyzing single file: {single_file}")
491
+
492
+ # Check if file is in cache and unchanged
493
+ file_key = str(single_file)
494
+ if file_key in cached_symbol_table and not self.rebuild_analysis:
495
+ # Compute file checksum to see if it changed
496
+ if self._file_unchanged(single_file, cached_symbol_table[file_key]):
497
+ logger.info(f"Using cached analysis for {single_file}")
498
+ symbol_table[file_key] = cached_symbol_table[file_key]
499
+ return symbol_table
500
+
501
+ # File is new or changed, analyze it
502
+ try:
503
+ symbol_table_builder = SymbolTableBuilder(self.project_dir, self.virtualenv)
504
+ py_module = symbol_table_builder.build_pymodule_from_file(single_file)
505
+ symbol_table[file_key] = py_module
506
+ logger.info("✅ Single file analysis complete.")
507
+ return symbol_table
508
+ except Exception as e:
509
+ logger.error(f"Failed to process {single_file}: {e}")
510
+ return symbol_table
511
+
512
+ # Get all Python files first to show accurate progress
513
+ py_files = []
514
+ for py_file in self.project_dir.rglob("*.py"):
515
+ rel_path = py_file.relative_to(self.project_dir)
516
+ path_parts = rel_path.parts
517
+ filename = py_file.name
518
+
519
+ # Skip directories we don't care about
520
+ if (
521
+ "site-packages" in path_parts
522
+ or ".venv" in path_parts
523
+ or ".codeanalyzer" in path_parts
524
+ ):
525
+ continue
526
+
527
+ # Skip test files if enabled
528
+ if self.skip_tests and (
529
+ "test" in path_parts
530
+ or "tests" in path_parts
531
+ or filename.startswith("test_")
532
+ or filename.endswith("_test.py")
533
+ ):
534
+ continue
535
+
536
+ py_files.append(py_file)
537
+
538
+ if self.using_ray:
539
+ logger.info("Using Ray for distributed symbol table generation.")
540
+ # Separate files into cached and new/changed
541
+ files_to_process = []
542
+ for py_file in py_files:
543
+ file_key = str(py_file)
544
+ if file_key in cached_symbol_table and not self.rebuild_analysis:
545
+ if self._file_unchanged(py_file, cached_symbol_table[file_key]):
546
+ # Use cached version
547
+ symbol_table[file_key] = cached_symbol_table[file_key]
548
+ continue
549
+ files_to_process.append(py_file)
550
+
551
+ # Process only new/changed files with Ray
552
+ if files_to_process:
553
+ futures = [_process_file_with_ray.remote(py_file, self.project_dir, str(self.virtualenv) if self.virtualenv else None) for py_file in files_to_process]
554
+
555
+ with ProgressBar(len(futures), "Building symbol table (parallel)") as progress:
556
+ pending = futures[:]
557
+ while pending:
558
+ done, pending = ray.wait(pending, num_returns=1)
559
+ result = ray.get(done[0])
560
+ if result:
561
+ symbol_table.update(result)
562
+ progress.advance()
563
+ else:
564
+ logger.info("Building symbol table serially.")
565
+ symbol_table_builder = SymbolTableBuilder(self.project_dir, self.virtualenv)
566
+ files_processed = 0
567
+ files_from_cache = 0
568
+
569
+ with ProgressBar(len(py_files), "Building symbol table") as progress:
570
+ for py_file in py_files:
571
+ file_key = str(py_file)
572
+
573
+ # Check if file is cached and unchanged
574
+ if file_key in cached_symbol_table and not self.rebuild_analysis:
575
+ if self._file_unchanged(py_file, cached_symbol_table[file_key]):
576
+ symbol_table[file_key] = cached_symbol_table[file_key]
577
+ files_from_cache += 1
578
+ progress.advance()
579
+ continue
580
+
581
+ # File is new or changed, analyze it
582
+ try:
583
+ py_module = symbol_table_builder.build_pymodule_from_file(py_file)
584
+ symbol_table[file_key] = py_module
585
+ files_processed += 1
586
+ except Exception as e:
587
+ logger.error(f"Failed to process {py_file}: {e}")
588
+ progress.advance()
589
+
590
+ if files_from_cache > 0:
591
+ logger.info(f"Reused {files_from_cache} files from cache, processed {files_processed} new/changed files")
592
+
593
+ logger.info("✅ Symbol table generation complete.")
594
+ return symbol_table
310
595
 
311
596
  def _get_call_graph(self) -> Dict[str, Any]:
312
597
  """Retrieve call graph from CodeQL database."""
313
598
  logger.warning("Call graph extraction not yet implemented.")
314
- return {}
599
+ return {}
@@ -1,3 +1,6 @@
1
+ from importlib.metadata import version, PackageNotFoundError
2
+ from packaging.version import parse as parse_version
3
+
1
4
  from .py_schema import (
2
5
  PyApplication,
3
6
  PyCallable,
@@ -21,3 +24,49 @@ __all__ = [
21
24
  "PyClassAttribute",
22
25
  "PyCallableParameter",
23
26
  ]
27
+
28
+ try:
29
+ pydantic_version = version("pydantic")
30
+ except PackageNotFoundError:
31
+ pydantic_version = "0.0.0" # fallback or raise if appropriate
32
+
33
+ PYDANTIC_V2 = parse_version(pydantic_version) >= parse_version("2.0.0")
34
+
35
+ if not PYDANTIC_V2:
36
+ # Safe to pass localns
37
+ PyCallable.update_forward_refs(PyClass=PyClass)
38
+ PyClass.update_forward_refs(PyCallable=PyCallable)
39
+ PyModule.update_forward_refs(PyCallable=PyCallable, PyClass=PyClass)
40
+ PyApplication.update_forward_refs(
41
+ PyCallable=PyCallable,
42
+ PyClass=PyClass,
43
+ PyModule=PyModule
44
+ )
45
+
46
+ # Compatibility helpers for Pydantic v1/v2
47
+ def model_dump_json(model, **kwargs):
48
+ """Compatibility helper for JSON serialization."""
49
+ if PYDANTIC_V2:
50
+ return model.model_dump_json(**kwargs)
51
+ else:
52
+ # Map Pydantic v2 parameters to v1 equivalents
53
+ v1_kwargs = {}
54
+ if 'indent' in kwargs:
55
+ v1_kwargs['indent'] = kwargs['indent']
56
+ if 'separators' in kwargs:
57
+ # In v1, separators is passed to dumps_kwargs
58
+ v1_kwargs['separators'] = kwargs['separators']
59
+ return model.json(**v1_kwargs)
60
+
61
+ def model_validate_json(model_class, json_data):
62
+ """Compatibility helper for JSON deserialization."""
63
+ if PYDANTIC_V2:
64
+ return model_class.model_validate_json(json_data)
65
+ else:
66
+ return model_class.parse_raw(json_data)
67
+
68
+ __all__.extend([
69
+ "PYDANTIC_V2",
70
+ "model_dump_json",
71
+ "model_validate_json"
72
+ ])
@@ -19,7 +19,7 @@
19
19
  This module defines the data models used to represent Python code structures
20
20
  for static analysis purposes.
21
21
  """
22
-
22
+ from __future__ import annotations
23
23
  import inspect
24
24
  from pathlib import Path
25
25
  from typing import Any, Dict, List, Optional
@@ -148,7 +148,8 @@ def builder(cls):
148
148
 
149
149
  method.__name__ = f"{f}"
150
150
  method.__annotations__ = {"value": t, "return": builder_name}
151
- method.__doc__ = f"Set {f} ({t.__name__})"
151
+ # Check if 't' has '__name__' attribute, otherwise use a fallback
152
+ method.__doc__ = f"Set {f} ({getattr(t, '__name__', str(t))})"
152
153
  return method
153
154
 
154
155
  namespace[f"{field}"] = make_method()
@@ -275,12 +276,16 @@ class PyCallable(BaseModel):
275
276
  code_start_line: int = -1
276
277
  accessed_symbols: List[PySymbol] = []
277
278
  call_sites: List[PyCallsite] = []
279
+ inner_callables: Dict[str, "PyCallable"] = {}
280
+ inner_classes: Dict[str, "PyClass"] = {}
278
281
  local_variables: List[PyVariableDeclaration] = []
279
282
  cyclomatic_complexity: int = 0
280
283
 
281
284
  def __hash__(self) -> int:
282
285
  """Generate a hash based on the callable's signature."""
283
286
  return hash(self.signature)
287
+
288
+
284
289
 
285
290
 
286
291
  @builder
@@ -328,6 +333,10 @@ class PyModule(BaseModel):
328
333
  classes: Dict[str, PyClass] = {}
329
334
  functions: Dict[str, PyCallable] = {}
330
335
  variables: List[PyVariableDeclaration] = []
336
+ # Metadata for caching
337
+ content_hash: Optional[str] = None
338
+ last_modified: Optional[float] = None
339
+ file_size: Optional[int] = None
331
340
 
332
341
 
333
342
  @builder
@@ -335,4 +344,4 @@ class PyModule(BaseModel):
335
344
  class PyApplication(BaseModel):
336
345
  """Represents a Python application."""
337
346
 
338
- symbol_table: dict[Path, PyModule]
347
+ symbol_table: Dict[str, PyModule]
@@ -0,0 +1,16 @@
1
+ from codeanalyzer.syntactic_analysis.exceptions import (
2
+ SymbolTableBuilderException,
3
+ SymbolTableBuilderFileNotFoundError,
4
+ SymbolTableBuilderParsingError,
5
+ SymbolTableBuilderRayError,
6
+ )
7
+
8
+ from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
9
+
10
+ __all__ = [
11
+ "SymbolTableBuilder",
12
+ "SymbolTableBuilderException",
13
+ "SymbolTableBuilderFileNotFoundError",
14
+ "SymbolTableBuilderParsingError",
15
+ "SymbolTableBuilderRayError",
16
+ ]
@@ -0,0 +1,15 @@
1
+ class SymbolTableBuilderException(Exception):
2
+ """Base exception for symbol table builder errors."""
3
+ pass
4
+
5
+ class SymbolTableBuilderFileNotFoundError(SymbolTableBuilderException):
6
+ """Exception raised when a source file is not found."""
7
+ pass
8
+
9
+ class SymbolTableBuilderParsingError(SymbolTableBuilderException):
10
+ """Exception raised when a source file cannot be parsed."""
11
+ pass
12
+
13
+ class SymbolTableBuilderRayError(SymbolTableBuilderException):
14
+ """Exception raised when there is an error in Ray processing."""
15
+ pass
@@ -1,9 +1,10 @@
1
1
  import ast
2
+ import hashlib
2
3
  import tokenize
3
4
  from ast import AST, ClassDef
4
5
  from io import StringIO
5
6
  from pathlib import Path
6
- from typing import Dict, List, Optional
7
+ from typing import Dict, List, Optional, Union
7
8
 
8
9
  import jedi
9
10
  from jedi.api import Script
@@ -21,14 +22,12 @@ from codeanalyzer.schema.py_schema import (
21
22
  PySymbol,
22
23
  PyVariableDeclaration,
23
24
  )
24
- from codeanalyzer.utils import logger
25
- from codeanalyzer.utils.progress_bar import ProgressBar
26
25
 
27
26
 
28
27
  class SymbolTableBuilder:
29
28
  """A class for building a symbol table for a Python project."""
30
29
 
31
- def __init__(self, project_dir: Path | str, virtualenv: Path | str | None) -> None:
30
+ def __init__(self, project_dir: Union[Path, str], virtualenv: Union[Path, str, None]) -> None:
32
31
  self.project_dir = Path(project_dir)
33
32
  if virtualenv is None:
34
33
  # If no virtual environment is provided, create a jedi project without an environment.
@@ -72,7 +71,7 @@ class SymbolTableBuilder:
72
71
  pass
73
72
  return None
74
73
 
75
- def _module(self, py_file: Path) -> PyModule:
74
+ def build_pymodule_from_file(self, py_file: Path) -> PyModule:
76
75
  """Builds a PyModule from a Python file.
77
76
 
78
77
  Args:
@@ -83,18 +82,17 @@ class SymbolTableBuilder:
83
82
  """
84
83
  # Get the raw source code from the file
85
84
  source = py_file.read_text(encoding="utf-8")
85
+
86
+ # Get file metadata for caching
87
+ stat = py_file.stat()
88
+ file_size = stat.st_size
89
+ last_modified = stat.st_mtime
90
+ content_hash = hashlib.sha256(source.encode('utf-8')).hexdigest()
91
+
86
92
  # Create a Jedi script for the file
87
93
  script: Script = Script(path=str(py_file), project=self.jedi_project)
88
94
  module = ast.parse(source, filename=str(py_file))
89
-
90
- classes = {}
91
- functions = {}
92
- for node in ast.iter_child_nodes(module):
93
- if isinstance(node, ClassDef):
94
- classes.update(self._add_class(node, script))
95
- elif isinstance(node, ast.FunctionDef):
96
- functions.update(self._callables(node, script))
97
-
95
+
98
96
  return (
99
97
  PyModule.builder()
100
98
  .file_path(str(py_file))
@@ -102,8 +100,11 @@ class SymbolTableBuilder:
102
100
  .comments(self._pycomments(module, source))
103
101
  .imports(self._imports(module))
104
102
  .variables(self._module_variables(module, script))
105
- .classes(classes)
106
- .functions(functions)
103
+ .classes(self._add_class(module, script))
104
+ .functions(self._callables(module, script))
105
+ .content_hash(content_hash)
106
+ .last_modified(last_modified)
107
+ .file_size(file_size)
107
108
  .build()
108
109
  )
109
110
 
@@ -156,144 +157,112 @@ class SymbolTableBuilder:
156
157
 
157
158
  return imports
158
159
 
159
- def _add_class(
160
- self, class_node: ast.ClassDef, script: Script
161
- ) -> Dict[str, PyClass]:
162
- """Builds a PyClass from a class definition node.
163
-
164
- Args:
165
- class_node (ast.ClassDef): The AST node representing the class.
166
- script (Script): The Jedi script object for the module.
160
+ def _add_class(self, node: AST, script: Script, prefix: str = "") -> Dict[str, PyClass]:
161
+ classes: Dict[str, PyClass] = {}
167
162
 
168
- Returns:
169
- Dict[str, PyClass]: Mapping of class signature to PyClass object.
170
- """
171
- # Try resolving full signature with Jedi
172
- try:
173
- definitions = script.goto(
174
- line=class_node.lineno, column=class_node.col_offset
175
- )
176
- signature = next(
177
- (d.full_name for d in definitions if d.type == "class"),
178
- f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
179
- )
180
- except Exception:
181
- signature = (
182
- f"{script.path.__str__().replace('/', '.').replace('.py', '')}.{class_node.name}",
183
- )
163
+ for child in ast.iter_child_nodes(node):
164
+ if not isinstance(child, ast.ClassDef):
165
+ continue
184
166
 
185
- code: str = ast.unparse(class_node).strip()
167
+ class_name = child.name
168
+ start_line = child.lineno
169
+ end_line = getattr(child, "end_lineno", start_line + len(child.body))
170
+ code = ast.unparse(child).strip()
186
171
 
187
- py_class = (
188
- PyClass.builder()
189
- .name(class_node.name)
190
- .signature(signature)
191
- .start_line(class_node.lineno)
192
- .end_line(
193
- getattr(
194
- class_node, "end_lineno", class_node.lineno + len(class_node.body)
195
- )
196
- )
197
- .comments(self._pycomments(class_node, code))
198
- .code(code)
199
- .base_classes(
200
- [
172
+ # Try resolving full signature with Jedi
173
+ if prefix:
174
+ signature = f"{prefix}.{class_name}"
175
+ else:
176
+ try:
177
+ definitions = script.goto(line=start_line, column=child.col_offset)
178
+ signature = next(
179
+ (d.full_name for d in definitions if d.type == "class"),
180
+ f"{Path(script.path).relative_to(self.project_dir).__str__().replace('/', '.').replace('.py', '')}.{class_name}"
181
+ )
182
+ except Exception:
183
+ signature = f"{Path(script.path).relative_to(self.project_dir).__str__().replace('/', '.').replace('.py', '')}.{class_name}"
184
+ py_class = (
185
+ PyClass.builder()
186
+ .name(class_name)
187
+ .signature(signature)
188
+ .start_line(start_line)
189
+ .end_line(end_line)
190
+ .code(code)
191
+ .comments(self._pycomments(child, code))
192
+ .base_classes([
201
193
  ast.unparse(base)
202
- for base in class_node.bases
194
+ for base in child.bases
203
195
  if isinstance(base, ast.expr)
204
- ]
205
- )
206
- .methods(self._callables(class_node, script))
207
- .attributes(self._class_attributes(class_node, script))
208
- .inner_classes(
209
- {
210
- k: v
211
- for child in class_node.body
212
- if isinstance(child, ast.ClassDef)
213
- for k, v in self._add_class(child, script).items()
214
- }
196
+ ])
197
+ .methods(self._callables(child, script, prefix=signature)) # Pass class signature as prefix
198
+ .attributes(self._class_attributes(child, script))
199
+ .inner_classes(self._add_class(child, script, prefix=signature)) # Pass class signature as prefix
200
+ .build()
215
201
  )
216
- .build()
217
- )
218
202
 
219
- return {signature: py_class}
203
+ classes[signature] = py_class
220
204
 
221
- def _callables(self, node: AST, script: Script) -> Dict[str, PyCallable]:
222
- """
223
- Builds PyCallable objects from any AST node that may contain functions.
205
+ return classes
224
206
 
225
- Args:
226
- node (AST): The AST node to process (e.g., Module, ClassDef, FunctionDef).
227
- script (Script): The Jedi script object for the module.
228
207
 
229
- Returns:
230
- Dict[str, PyCallable]: A dictionary mapping function/method names to PyCallable objects.
231
- """
208
+ def _callables(self, node: AST, script: Script, prefix: str = "") -> Dict[str, PyCallable]:
232
209
  callables: Dict[str, PyCallable] = {}
233
- module_path: str = script.path or "<unknown_module>"
234
- module_name: str = Path(module_path).stem if module_path else "<unknown>"
235
-
236
- def visit(n: AST, class_prefix: str = ""):
237
- for child in ast.iter_child_nodes(n):
238
- if isinstance(child, ast.FunctionDef):
239
- method_name = child.name
240
- start_line = child.lineno
241
- end_line = getattr(
242
- child, "end_lineno", start_line + len(child.body)
243
- )
244
- code_start_line = child.body[0].lineno if child.body else start_line
245
- code: str = ast.unparse(child).strip()
246
- decorators = [ast.unparse(d) for d in child.decorator_list]
247
210
 
211
+ for child in ast.iter_child_nodes(node):
212
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)):
213
+ method_name = child.name # Keep the actual method name unchanged
214
+ start_line = child.lineno
215
+ end_line = getattr(child, "end_lineno", start_line + len(child.body))
216
+ code = ast.unparse(child).strip()
217
+ decorators = [ast.unparse(d) for d in child.decorator_list]
218
+
219
+ if prefix:
220
+ # We're in a nested context - build signature with prefix
221
+ signature = f"{prefix}.{method_name}"
222
+ else:
223
+ # Top-level function - try Jedi first, fall back to relative path-based
248
224
  try:
249
- definitions = script.goto(
250
- line=start_line, column=child.col_offset
225
+ definitions = script.goto(line=start_line, column=child.col_offset)
226
+ signature = next(
227
+ (d.full_name for d in definitions if d.type == "function"),
228
+ None
251
229
  )
252
230
  except Exception:
253
- definitions = []
254
-
255
- signature = next(
256
- (d.full_name for d in definitions if d.type == "function"),
257
- f"{module_name}.{class_prefix}{method_name}",
231
+ signature = None
232
+
233
+ # If Jedi didn't provide a signature, build one relative to project_dir
234
+ if not signature:
235
+ relative_path = Path(script.path).relative_to(self.project_dir)
236
+ signature = f"{str(relative_path).replace('/', '.').replace('.py', '')}.{method_name}"
237
+ py_callable = (
238
+ PyCallable.builder()
239
+ .name(method_name) # Use the actual method name, not the full signature
240
+ .path(str(script.path))
241
+ .signature(signature) # Use the full signature here
242
+ .decorators(decorators)
243
+ .code(code)
244
+ .start_line(start_line)
245
+ .end_line(end_line)
246
+ .code_start_line(child.body[0].lineno if child.body else start_line)
247
+ .accessed_symbols(self._accessed_symbols(child, script))
248
+ .call_sites(self._call_sites(child, script))
249
+ .local_variables(self._local_variables(child, script))
250
+ .cyclomatic_complexity(self._cyclomatic_complexity(child))
251
+ .parameters(self._callable_parameters(child, script))
252
+ .return_type(
253
+ ast.unparse(child.returns)
254
+ if child.returns else self._infer_type(script, child.lineno, child.col_offset)
258
255
  )
256
+ .comments(self._pycomments(child, code))
257
+ .inner_callables(self._callables(child, script, signature)) # Pass current signature as prefix
258
+ .inner_classes(self._add_class(child, script, signature)) # Pass current signature as prefix
259
+ .build()
260
+ )
259
261
 
260
- callables[method_name] = (
261
- PyCallable.builder()
262
- .name(method_name)
263
- .path(script.path.__str__())
264
- .signature(signature)
265
- .decorators(decorators)
266
- .code(code)
267
- .start_line(start_line)
268
- .end_line(end_line)
269
- .code_start_line(code_start_line)
270
- .accessed_symbols(self._accessed_symbols(child, script))
271
- .call_sites(self._call_sites(child, script))
272
- .local_variables(self._local_variables(child, script))
273
- .cyclomatic_complexity(self._cyclomatic_complexity(child))
274
- .parameters(self._callable_parameters(child, script))
275
- .return_type(
276
- ast.unparse(child.returns)
277
- if child.returns
278
- else self._infer_type(
279
- script, child.lineno, child.col_offset
280
- )
281
- )
282
- .comments(self._pycomments(child, code))
283
- .build()
284
- )
285
-
286
- visit(child, class_prefix + method_name + ".")
287
-
288
- elif isinstance(child, ast.ClassDef):
289
- visit(child, class_prefix + child.name + ".")
290
-
291
- elif hasattr(child, "body"):
292
- visit(child, class_prefix)
262
+ callables[method_name] = py_callable # Key by method name, not full signature
293
263
 
294
- visit(node)
295
264
  return callables
296
-
265
+
297
266
  def _pycomments(self, node: ast.AST, source: str) -> List[PyComment]:
298
267
  """
299
268
  Extracts all PyComment instances (docstring and # comments) from within a specific AST node's body.
@@ -868,35 +837,3 @@ class SymbolTableBuilder:
868
837
  .col_offset(col_offset)
869
838
  .build()
870
839
  )
871
-
872
- def build(self) -> Dict[str, PyModule]:
873
- """Builds the symbol table for the project.
874
-
875
- This method scans the project directory, identifies Python files,
876
- and constructs a symbol table containing information about classes,
877
- functions, and variables defined in those files.
878
- """
879
- symbol_table: Dict[str, PyModule] = {}
880
- # Get all Python files first to show accurate progress
881
- py_files = [
882
- py_file
883
- for py_file in self.project_dir.rglob("*.py")
884
- if "site-packages"
885
- not in py_file.resolve().__str__() # exclude site-packages
886
- and ".venv"
887
- not in py_file.resolve().__str__() # exclude virtual environments
888
- and ".codeanalyzer"
889
- not in py_file.resolve().__str__() # exclude internal cache directories
890
- ]
891
-
892
- with ProgressBar(len(py_files), "Building symbol table") as progress:
893
- for py_file in py_files:
894
- try:
895
- py_module = self._module(py_file)
896
- symbol_table[str(py_file)] = py_module
897
- except Exception as e:
898
- logger.error(f"Failed to process {py_file}: {e}")
899
- progress.advance()
900
- progress.finish("✅ Symbol table generation complete.")
901
-
902
- return symbol_table
@@ -1,20 +1,34 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codeanalyzer-python
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: Static Analysis on Python source code using Jedi, CodeQL and Treesitter.
5
5
  Author-email: Rahul Krishna <i.m.ralk@gmail.com>
6
6
  License-File: LICENSE
7
7
  License-File: NOTICE
8
- Requires-Python: >=3.10
9
- Requires-Dist: jedi>=0.19.2
10
- Requires-Dist: loguru>=0.7.3
11
- Requires-Dist: msgpack>=1.1.1
12
- Requires-Dist: networkx>=3.4.2
13
- Requires-Dist: pandas>=2.3.1
14
- Requires-Dist: pydantic>=2.11.7
15
- Requires-Dist: requests>=2.32.4
16
- Requires-Dist: rich>=14.0.0
17
- Requires-Dist: typer>=0.16.0
8
+ Requires-Python: >=3.9
9
+ Requires-Dist: jedi<0.20.0,>=0.18.0; python_version < '3.11'
10
+ Requires-Dist: jedi<=0.19.2; python_version >= '3.11'
11
+ Requires-Dist: msgpack<1.0.7,>=1.0.0; python_version < '3.11'
12
+ Requires-Dist: msgpack<2.0.0,>=1.0.7; python_version >= '3.11'
13
+ Requires-Dist: networkx<3.2.0,>=2.6.0; python_version < '3.11'
14
+ Requires-Dist: networkx<4.0.0,>=3.0.0; python_version >= '3.11'
15
+ Requires-Dist: numpy<1.24.0,>=1.21.0; python_version < '3.11'
16
+ Requires-Dist: numpy<2.0.0,>=1.24.0; python_version >= '3.11' and python_version < '3.12'
17
+ Requires-Dist: numpy<2.0.0,>=1.26.0; python_version >= '3.12'
18
+ Requires-Dist: packaging>=25.0
19
+ Requires-Dist: pandas<2.0.0,>=1.3.0; python_version < '3.11'
20
+ Requires-Dist: pandas<3.0.0,>=2.0.0; python_version >= '3.11'
21
+ Requires-Dist: pydantic<2.0.0,>=1.8.0; python_version < '3.11'
22
+ Requires-Dist: pydantic<3.0.0,>=2.0.0; python_version >= '3.11'
23
+ Requires-Dist: ray<3.0.0,>=2.10.0; python_version >= '3.11'
24
+ Requires-Dist: ray==2.0.0; python_version < '3.11'
25
+ Requires-Dist: requests<3.0.0,>=2.20.0; python_version >= '3.11'
26
+ Requires-Dist: rich<14.0.0,>=12.6.0; python_version < '3.11'
27
+ Requires-Dist: rich<15.0.0,>=14.0.0; python_version >= '3.11'
28
+ Requires-Dist: typer<1.0.0,>=0.9.0; python_version < '3.11'
29
+ Requires-Dist: typer<2.0.0,>=0.9.0; python_version >= '3.11'
30
+ Requires-Dist: typing-extensions<5.0.0,>=4.0.0; python_version < '3.11'
31
+ Requires-Dist: typing-extensions<6.0.0,>=4.5.0; python_version >= '3.11'
18
32
  Description-Content-Type: text/markdown
19
33
 
20
34
  ![logo](https://github.com/codellm-devkit/codeanalyzer-python/blob/main/docs/assets/logo.png?raw=true)
@@ -95,17 +109,18 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
95
109
  Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.
96
110
 
97
111
 
98
- ╭─ Options ──────────────────────────────────────────────────────────────────────────────────────────────────────────╮
99
- │ * --input -i PATH Path to the project root directory. [default: None] [required] │
100
- │ --output -o PATH Output directory for artifacts. [default: None] │
101
- │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1]
102
- │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
103
- │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
104
- │ --cache-dir -c PATH Directory to store analysis cache. [default: None]
105
- │ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache] │
106
- -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
107
- --help Show this message and exit.
108
- ╰────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
112
+ ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
113
+ │ * --input -i PATH Path to the project root directory. [default: None] [required] │
114
+ │ --output -o PATH Output directory for artifacts. [default: None] │
115
+ │ --format -f [json|msgpack] Output format: json or msgpack. [default: json]
116
+ │ --analysis-level -a INTEGER 1: symbol table, 2: call graph. [default: 1]
117
+ │ --codeql --no-codeql Enable CodeQL-based analysis. [default: no-codeql]
118
+ │ --eager --lazy Enable eager or lazy analysis. Defaults to lazy. [default: lazy]
119
+ │ --cache-dir -c PATH Directory to store analysis cache. [default: None] │
120
+ --clear-cache --keep-cache Clear cache after analysis. [default: clear-cache]
121
+ -v INTEGER Increase verbosity: -v, -vv, -vvv [default: 0]
122
+ │ --help Show this message and exit. │
123
+ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
109
124
  ```
110
125
 
111
126
  ### Examples
@@ -123,7 +138,14 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
123
138
 
124
139
  Now, you can find the analysis results in `analysis.json` in the specified directory.
125
140
 
126
- 2. **Toggle analysis levels with `--analysis-level`:**
141
+ 2. **Change output format to msgpack:**
142
+ ```bash
143
+ codeanalyzer --input ./my-python-project --output /path/to/analysis-results --format msgpack
144
+ ```
145
+
146
+ This will save the analysis results in `analysis.msgpack` in the specified directory.
147
+
148
+ 3. **Toggle analysis levels with `--analysis-level`:**
127
149
  ```bash
128
150
  codeanalyzer --input ./my-python-project --analysis-level 1 # Symbol table only
129
151
  ```
@@ -133,7 +155,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
133
155
  ```
134
156
  ***Note: The `--analysis-level=2` is not yet implemented in this version.***
135
157
 
136
- 3. **Analysis with CodeQL enabled:**
158
+ 4. **Analysis with CodeQL enabled:**
137
159
  ```bash
138
160
  codeanalyzer --input ./my-python-project --codeql
139
161
  ```
@@ -141,7 +163,7 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
141
163
 
142
164
  ***Note: Not yet fully implemented. Please refrain from using this option until further notice.***
143
165
 
144
- 4. **Eager analysis with custom cache directory:**
166
+ 5. **Eager analysis with custom cache directory:**
145
167
  ```bash
146
168
  codeanalyzer --input ./my-python-project --eager --cache-dir /path/to/custom-cache
147
169
  ```
@@ -149,14 +171,14 @@ To view the available options and commands, run `codeanalyzer --help`. You shoul
149
171
 
150
172
  If you provide --cache-dir, the cache will be stored in that directory. If not specified, it defaults to `.codeanalyzer` in the current working directory (`$PWD`).
151
173
 
152
- 5. **Quiet mode (minimal output):**
174
+ 6. **Quiet mode (minimal output):**
153
175
  ```bash
154
176
  codeanalyzer --input /path/to/my-python-project --quiet
155
177
  ```
156
178
 
157
- ### Output
179
+ ## Output
158
180
 
159
- By default, analysis results are printed to stdout in JSON format. When using the `--output` option, results are saved to `analysis.json` in the specified directory.
181
+ By default, analysis results are printed to stdout in JSON format. When using the `--output` option, results are saved to `analysis.json` in the specified directory. If you use the `--format=msgpack` option, the results will be saved in `analysis.msgpack`, which is a binary format that can be more efficient for storage and transmission.
160
182
 
161
183
  ## Development
162
184
 
@@ -1,13 +1,13 @@
1
1
  codeanalyzer/__init__.py,sha256=BZ3Kuwl-F_F-8H8cepLnVJ4Ku4NNUjjqg0Y6ujPQSsI,108
2
- codeanalyzer/__main__.py,sha256=CHSa6A-AT5XtZ2GJvEEdjz8emr7Mr3xP4qG5xxRWX7k,3863
3
- codeanalyzer/core.py,sha256=BVlfCaMqIRfaUkOk9_5XrnuvzHZ0WBtAkjRj3yOMHRI,12431
2
+ codeanalyzer/__main__.py,sha256=e-AMzR5uR1IsUKhsfk17_qPJkwORRqe9tBxCXwwlxBY,4922
3
+ codeanalyzer/core.py,sha256=swFc-6ICs_rijj7P0bbJv3um2Gr-R8_roGK0P3qo1xc,24979
4
4
  codeanalyzer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  codeanalyzer/config/__init__.py,sha256=9XBxAn1oWGRuhg3bEBUuVGs3hFNXEAKrr-Ce7tq9a2k,61
6
6
  codeanalyzer/config/config.py,sha256=ZiKzc5uEUCIvih58-6BDtLLI1hPij41wGQjBcj9KNQM,188
7
7
  codeanalyzer/jedi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  codeanalyzer/jedi/jedi.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- codeanalyzer/schema/__init__.py,sha256=bIwfGFvgqtNZ0mCAvDXSo9WVJHqhm15iZc7YOuWYSZ0,390
10
- codeanalyzer/schema/py_schema.py,sha256=YFd0EtJkTOiOruaVkO_ibbzTktSPOvKDvsxqcI3n9J8,10471
9
+ codeanalyzer/schema/__init__.py,sha256=HB7y4y-49dkEo-H9GREam1_9Cr1N-GF6MYwx9yoU878,1978
10
+ codeanalyzer/schema/py_schema.py,sha256=04K19tDtmg2tPXjwu_8BcmVpenk1ibVwNv6bHWZHOLY,10851
11
11
  codeanalyzer/semantic_analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  codeanalyzer/semantic_analysis/codeql/__init__.py,sha256=ODMkdGvs3ebJdfIZle8T4VcHoCBhH_ZehWuWFpNh3NI,1022
13
13
  codeanalyzer/semantic_analysis/codeql/codeql_analysis.py,sha256=-mVTm2JdQlfmV_9T4xLgjxwROKS83aP7lJAmQHh37xY,5312
@@ -15,14 +15,15 @@ codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py,sha256=PnJOasW9rP68SE
15
15
  codeanalyzer/semantic_analysis/codeql/codeql_loader.py,sha256=o0BW-6yHkN6kLG66rOYQQ_ToQUn5Ivl9h9ZdBM3_E_Q,2288
16
16
  codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py,sha256=QJtID1YZkO6Wyns_qTJFqOSiV238ArLXwgLv105B27E,6520
17
17
  codeanalyzer/semantic_analysis/wala/__init__.py,sha256=JSDvkrpJ2U90Ikex34EluSHmoGutlmRhV2xvInt6tB8,743
18
- codeanalyzer/syntactic_analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- codeanalyzer/syntactic_analysis/symbol_table_builder.py,sha256=kwmJiJV01olKcKPQKrZ8Mp3CKOMPdrM7dvb7pB6v3hg,36194
18
+ codeanalyzer/syntactic_analysis/__init__.py,sha256=EUQkJEh6wHjWx2qTTKbTbUgwSbfKeNieKHNy7RknVXA,476
19
+ codeanalyzer/syntactic_analysis/exceptions.py,sha256=whs_n0vIu655Jkk1a7iOoXY6iIca4pZqJnU40V9Ejaw,537
20
+ codeanalyzer/syntactic_analysis/symbol_table_builder.py,sha256=0FE_ZdlyP77P1B70QXhPKO4AEPm2KvA3-llaAjIrOJU,34639
20
21
  codeanalyzer/utils/__init__.py,sha256=hC6VWdR5rerSqBxzu9KQHTASWqwrrYJv-CMDwrTlzkc,137
21
22
  codeanalyzer/utils/logging.py,sha256=0vTkGSl5EZN8yhhWa_5Mrn1n_twRCSW53rNwjzQ9RbI,601
22
23
  codeanalyzer/utils/progress_bar.py,sha256=ZHJzGiCo5q4dyXq4CtsrJeq9Ip7sD84T3yZjNX7TBys,2443
23
- codeanalyzer_python-0.1.9.dist-info/METADATA,sha256=HpZwqxr1Q_hz8sXnXFNMiOWhyHafGZ-pNK01MqpKHjs,14460
24
- codeanalyzer_python-0.1.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
25
- codeanalyzer_python-0.1.9.dist-info/entry_points.txt,sha256=eUrB7Jq5Oav6RblMX_RYfVLSw_h15NbzC3fNSnGsPuM,59
26
- codeanalyzer_python-0.1.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- codeanalyzer_python-0.1.9.dist-info/licenses/NOTICE,sha256=YU0Z9NDWqKY-2jfFcbxeZ6fbnzz0oZeKmnUcO8a-bcQ,901
28
- codeanalyzer_python-0.1.9.dist-info/RECORD,,
24
+ codeanalyzer_python-0.1.11.dist-info/METADATA,sha256=yqQiVgSsG1k262sigcRo51BScmuVQywLOtL9rEX5ATI,16283
25
+ codeanalyzer_python-0.1.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
+ codeanalyzer_python-0.1.11.dist-info/entry_points.txt,sha256=eUrB7Jq5Oav6RblMX_RYfVLSw_h15NbzC3fNSnGsPuM,59
27
+ codeanalyzer_python-0.1.11.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
28
+ codeanalyzer_python-0.1.11.dist-info/licenses/NOTICE,sha256=YU0Z9NDWqKY-2jfFcbxeZ6fbnzz0oZeKmnUcO8a-bcQ,901
29
+ codeanalyzer_python-0.1.11.dist-info/RECORD,,