PyPI - codeanalyzer-python - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

codeanalyzer-python 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

codeanalyzer/__init__.py +0 -0
codeanalyzer/__main__.py +84 -0
codeanalyzer/core.py +321 -0
codeanalyzer/jedi/__init__.py +0 -0
codeanalyzer/jedi/jedi.py +0 -0
codeanalyzer/py.typed +0 -0
codeanalyzer/schema/__init__.py +23 -0
codeanalyzer/schema/py_schema.py +360 -0
codeanalyzer/semantic_analysis/__init__.py +0 -0
codeanalyzer/semantic_analysis/codeql/__init__.py +26 -0
codeanalyzer/semantic_analysis/codeql/codeql_analysis.py +133 -0
codeanalyzer/semantic_analysis/codeql/codeql_exceptions.py +12 -0
codeanalyzer/semantic_analysis/codeql/codeql_loader.py +74 -0
codeanalyzer/semantic_analysis/codeql/codeql_query_runner.py +164 -0
codeanalyzer/semantic_analysis/wala/__init__.py +15 -0
codeanalyzer/syntactic_analysis/__init__.py +0 -0
codeanalyzer/syntactic_analysis/symbol_table_builder.py +903 -0
codeanalyzer/utils/__init__.py +5 -0
codeanalyzer/utils/logging.py +18 -0
codeanalyzer/utils/progress_bar.py +69 -0
{codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/METADATA +1 -1
codeanalyzer_python-0.1.2.dist-info/RECORD +26 -0
codeanalyzer_python-0.1.1.dist-info/RECORD +0 -6
{codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/WHEEL +0 -0
{codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/entry_points.txt +0 -0
{codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/LICENSE +0 -0
{codeanalyzer_python-0.1.1.dist-info → codeanalyzer_python-0.1.2.dist-info}/licenses/NOTICE +0 -0

codeanalyzer/__init__.py ADDED Viewed

File without changes

codeanalyzer/__main__.py ADDED Viewed

@@ -0,0 +1,84 @@
+from contextlib import nullcontext
+import sys
+import typer
+from typing import Optional, Annotated
+from pathlib import Path
+from codeanalyzer.utils import _set_log_level
+from codeanalyzer.utils import logger
+from codeanalyzer.core import AnalyzerCore
+def main(
+    input: Annotated[
+        Path, typer.Option("-i", "--input", help="Path to the project root directory.")
+    ],
+    output: Annotated[
+        Optional[Path],
+        typer.Option("-o", "--output", help="Output directory for artifacts."),
+    ] = None,
+    analysis_level: Annotated[
+        int,
+        typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph."),
+    ] = 1,
+    using_codeql: Annotated[
+        bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
+    ] = False,
+    rebuild_analysis: Annotated[
+        bool,
+        typer.Option(
+            "--eager/--lazy",
+            help="Enable eager or lazy analysis. Defaults to lazy.",
+        ),
+    ] = False,
+    cache_dir: Annotated[
+        Optional[Path],
+        typer.Option(
+            "-c",
+            "--cache-dir",
+            help="Directory to store analysis cache.",
+        ),
+    ] = None,
+    clear_cache: Annotated[
+        bool,
+        typer.Option("--clear-cache/--keep-cache", help="Clear cache after analysis."),
+    ] = True,
+    verbosity: Annotated[
+        int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
+    ] = 0,
+):
+    """Static Analysis on Python source code using Jedi, Astroid, and Treesitter."""
+    _set_log_level(verbosity)
+    if not input.exists():
+        logger.error(f"Input path '{input}' does not exist.")
+        raise typer.Exit(code=1)
+    with AnalyzerCore(
+        input, analysis_level, using_codeql, rebuild_analysis, cache_dir, clear_cache
+    ) as analyzer:
+        artifacts = analyzer.analyze()
+        print_stream = sys.stdout
+        stream_context = nullcontext(print_stream)
+        if output is not None:
+            output.mkdir(parents=True, exist_ok=True)
+            output_file = output / "analysis.json"
+            stream_context = output_file.open("w")
+        with stream_context as f:
+            print(artifacts.model_dump_json(indent=4), file=f)
+app = typer.Typer(
+    callback=main,
+    name="codeanalyzer",
+    help="Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.",
+    invoke_without_command=True,
+    no_args_is_help=True,
+    add_completion=False,
+    rich_markup_mode="rich",
+    pretty_exceptions_show_locals=False,
+)
+if __name__ == "__main__":
+    app()

codeanalyzer/core.py ADDED Viewed

@@ -0,0 +1,321 @@
+import hashlib
+import os
+from pdb import set_trace
+import shutil
+import subprocess
+from pathlib import Path
+import sys
+from typing import Any, Dict, Union, Optional
+from codeanalyzer.utils import logger
+from codeanalyzer.schema.py_schema import PyApplication, PyModule
+from codeanalyzer.semantic_analysis.codeql import CodeQLLoader
+from codeanalyzer.semantic_analysis.codeql.codeql_exceptions import (
+    CodeQLExceptions,
+)
+from codeanalyzer.syntactic_analysis.symbol_table_builder import SymbolTableBuilder
+class AnalyzerCore:
+    """Core functionality for CodeQL analysis.
+    Args:
+        project_dir (Union[str, Path]): The root directory of the project to analyze.
+        virtualenv (Optional[Path]): Path to the virtual environment directory.
+        using_codeql (bool): Whether to use CodeQL for analysis.
+        rebuild_analysis (bool): Whether to force rebuild the database.
+        clear_cache (bool): Whether to delete the cached directory after analysis.
+        analysis_depth (int): Depth of analysis (reserved for future use).
+    """
+    def __init__(
+        self,
+        project_dir: Union[str, Path],
+        analysis_depth: int = 1,
+        using_codeql: bool = False,
+        rebuild_analysis: bool = False,
+        cache_dir: Optional[Path] = None,
+        clear_cache: bool = True,
+    ) -> None:
+        self.analysis_depth = analysis_depth
+        self.project_dir = Path(project_dir).resolve()
+        self.using_codeql = using_codeql
+        self.rebuild_analysis = rebuild_analysis
+        self.cache_dir = (
+            cache_dir.resolve() if cache_dir is not None else self.project_dir
+        ) / ".codeanalyzer"
+        self.clear_cache = clear_cache
+        self.db_path: Optional[Path] = None
+        self.codeql_bin: Optional[Path] = None
+        self.virtualenv: Optional[Path] = None
+    @staticmethod
+    def _cmd_exec_helper(
+        cmd: list[str],
+        cwd: Optional[Path] = None,
+        capture_output: bool = True,
+        check: bool = True,
+        suppress_output: bool = False,
+    ) -> subprocess.CompletedProcess:
+        """
+        Runs a subprocess with real-time output streaming to the logger.
+        Args:
+            cmd: Command as a list of arguments.
+            cwd: Working directory to run the command in.
+            capture_output: If True, retains and returns the output.
+            check: If True, raises CalledProcessError on non-zero exit.
+            suppress_output: If True, silences log output.
+        Returns:
+            subprocess.CompletedProcess
+        """
+        logger.info(f"Running: {' '.join(cmd)}")
+        process = subprocess.Popen(
+            cmd,
+            cwd=cwd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+            universal_newlines=True,
+        )
+        assert process.stdout is not None  # for type checking
+        output_lines = []
+        for line in process.stdout:
+            line = line.rstrip()
+            if not suppress_output:
+                logger.debug(line)
+            if capture_output:
+                output_lines.append(line)
+        returncode = process.wait()
+        if check and returncode != 0:
+            error_output = "\n".join(output_lines)
+            logger.error(f"Command failed with exit code {returncode}: {' '.join(cmd)}")
+            if error_output:
+                logger.error(f"Command output:\n{error_output}")
+            raise subprocess.CalledProcessError(returncode, cmd, output=error_output)
+        return subprocess.CompletedProcess(
+            args=cmd,
+            returncode=returncode,
+            stdout="\n".join(output_lines) if capture_output else None,
+            stderr=None,
+        )
+    @staticmethod
+    def _get_base_interpreter() -> Path:
+        """Get the base Python interpreter path.
+        This method finds a suitable base Python interpreter that can be used
+        to create virtual environments, even when running from within a virtual environment.
+        It supports various Python version managers like pyenv, conda, asdf, etc.
+        Returns:
+            Path: The base Python interpreter path.
+        Raises:
+            RuntimeError: If no suitable Python interpreter can be found.
+        """
+        # If we're not in a virtual environment, use the current interpreter
+        if sys.prefix == sys.base_prefix:
+            return Path(sys.executable)
+        # We're inside a virtual environment; need to find the base interpreter
+        # First, check if user explicitly set SYSTEM_PYTHON
+        if system_python := os.getenv("SYSTEM_PYTHON"):
+            system_python_path = Path(system_python)
+            if system_python_path.exists() and system_python_path.is_file():
+                return system_python_path
+        # Try to get the base interpreter from sys.base_executable (Python 3.3+)
+        if hasattr(sys, "base_executable") and sys.base_executable:
+            base_exec = Path(sys.base_executable)
+            if base_exec.exists() and base_exec.is_file():
+                return base_exec
+        # Try to find Python interpreters using shlex.which
+        python_candidates = []
+        # Use shutil.which to find python3 and python in PATH
+        for python_name in ["python3", "python"]:
+            if python_path := shutil.which(python_name):
+                candidate = Path(python_path)
+                # Skip if this is the current virtual environment's python
+                if not str(candidate).startswith(sys.prefix):
+                    python_candidates.append(candidate)
+        # Check pyenv installation
+        if pyenv_root := os.getenv("PYENV_ROOT"):
+            pyenv_python = Path(pyenv_root) / "shims" / "python"
+            if pyenv_python.exists():
+                python_candidates.append(pyenv_python)
+        # Check default pyenv location
+        home_pyenv = Path.home() / ".pyenv" / "shims" / "python"
+        if home_pyenv.exists():
+            python_candidates.append(home_pyenv)
+        # Check conda base environment
+        if conda_prefix := os.getenv(
+            "CONDA_PREFIX_1"
+        ):  # Original conda env before activation
+            conda_python = Path(conda_prefix) / "bin" / "python"
+            if conda_python.exists():
+                python_candidates.append(conda_python)
+        # Check asdf
+        if asdf_dir := os.getenv("ASDF_DIR"):
+            asdf_python = Path(asdf_dir) / "shims" / "python"
+            if asdf_python.exists():
+                python_candidates.append(asdf_python)
+        # Test candidates to find a working Python interpreter
+        for candidate in python_candidates:
+            try:
+                # Test if the interpreter works and can create venv
+                result = subprocess.run(
+                    [str(candidate), "-c", "import venv; print('OK')"],
+                    capture_output=True,
+                    text=True,
+                    timeout=5,
+                )
+                if result.returncode == 0 and "OK" in result.stdout:
+                    return candidate
+            except (subprocess.TimeoutExpired, FileNotFoundError, PermissionError):
+                continue
+        # If nothing works, raise an informative error
+        raise RuntimeError(
+            f"Could not find a suitable base Python interpreter. "
+            f"Current environment: {sys.executable} (prefix: {sys.prefix}). "
+            f"Please set the SYSTEM_PYTHON environment variable to point to "
+            f"a working Python interpreter that can create virtual environments."
+        )
+    def __enter__(self) -> "AnalyzerCore":
+        # If no virtualenv is provided, try to create one using requirements.txt or pyproject.toml
+        venv_path = self.cache_dir / self.project_dir.name / "virtualenv"
+        # Ensure the cache directory exists for this project
+        venv_path.parent.mkdir(parents=True, exist_ok=True)
+        # Create the virtual environment if it does not exist
+        if not venv_path.exists() or self.rebuild_analysis:
+            logger.info(f"(Re-)creating virtual environment at {venv_path}")
+            self._cmd_exec_helper(
+                [str(self._get_base_interpreter()), "-m", "venv", str(venv_path)],
+                check=True,
+            )
+            # Find python in the virtual environment
+            venv_python = venv_path / "bin" / "python"
+            # Install the project itself (reads pyproject.toml)
+            self._cmd_exec_helper(
+                [str(venv_python), "-m", "pip", "install", "-U", f"{self.project_dir}"],
+                cwd=self.project_dir,
+                check=True,
+            )
+            # Install the project dependencies
+            self.virtualenv = venv_path
+        if self.using_codeql:
+            logger.info(f"(Re-)initializing CodeQL analysis for {self.project_dir}")
+            cache_root = self.cache_dir / "codeql"
+            cache_root.mkdir(parents=True, exist_ok=True)
+            self.db_path = cache_root / f"{self.project_dir.name}-db"
+            self.db_path.mkdir(exist_ok=True)
+            checksum_file = self.db_path / ".checksum"
+            current_checksum = self._compute_checksum(self.project_dir)
+            def is_cache_valid() -> bool:
+                if not (self.db_path / "db-python").exists():
+                    return False
+                if not checksum_file.exists():
+                    return False
+                return checksum_file.read_text().strip() == current_checksum
+            if self.rebuild_analysis or not is_cache_valid():
+                logger.info("Creating new CodeQL database...")
+                codeql_in_path = shutil.which("codeql")
+                if codeql_in_path:
+                    self.codeql_bin = Path(codeql_in_path)
+                else:
+                    self.codeql_bin = CodeQLLoader.download_and_extract_codeql(
+                        self.cache_dir / "codeql" / "bin"
+                    )
+                if not shutil.which(str(self.codeql_bin)):
+                    raise FileNotFoundError(
+                        f"CodeQL binary not executable: {self.codeql_bin}"
+                    )
+                cmd = [
+                    str(self.codeql_bin),
+                    "database",
+                    "create",
+                    str(self.db_path),
+                    f"--source-root={self.project_dir}",
+                    "--language=python",
+                    "--overwrite",
+                ]
+                proc = subprocess.Popen(
+                    cmd, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE
+                )
+                _, err = proc.communicate()
+                if proc.returncode != 0:
+                    raise CodeQLExceptions.CodeQLDatabaseBuildException(
+                        f"Error building CodeQL database:\n{err.decode()}"
+                    )
+                checksum_file.write_text(current_checksum)
+            else:
+                logger.info(f"Reusing cached CodeQL DB at {self.db_path}")
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None:
+        if self.clear_cache and self.cache_dir.exists():
+            logger.info(f"Clearing cache directory: {self.cache_dir}")
+            shutil.rmtree(self.cache_dir)
+    def analyze(self) -> PyApplication:
+        """Return the path to the CodeQL database."""
+        return (
+            PyApplication.builder()
+            .with_symbol_table(self._build_symbol_table())
+            .build()
+        )
+    def _compute_checksum(self, root: Path) -> str:
+        """Compute SHA256 checksum of all Python source files in a project directory. If somethings changes, the
+        checksum will change and thus the analysis will be redone.
+        Args:
+            root (Path): Root directory of the project.
+        Returns:
+            str: SHA256 checksum of all Python files in the project.
+        """
+        sha256 = hashlib.sha256()
+        for py_file in sorted(root.rglob("*.py")):
+            sha256.update(py_file.read_bytes())
+        return sha256.hexdigest()
+    def _build_symbol_table(self) -> Dict[str, PyModule]:
+        """Retrieve a symbol table of the whole project."""
+        return SymbolTableBuilder(self.project_dir, self.virtualenv).build()
+    def _get_call_graph(self) -> Dict[str, Any]:
+        """Retrieve call graph from CodeQL database."""
+        logger.warning("Call graph extraction not yet implemented.")
+        return {}

codeanalyzer/jedi/__init__.py ADDED Viewed

File without changes

codeanalyzer/jedi/jedi.py ADDED Viewed

File without changes

codeanalyzer/py.typed ADDED Viewed

File without changes

codeanalyzer/schema/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from .py_schema import (
+    PyApplication,
+    PyImport,
+    PyComment,
+    PyModule,
+    PyClass,
+    PyVariableDeclaration,
+    PyCallable,
+    PyClassAttribute,
+    PyCallableParameter
+)
+__all__ = [
+    "PyApplication",
+    "PyImport",
+    "PyComment",
+    "PyModule",
+    "PyClass",
+    "PyVariableDeclaration",
+    "PyCallable",
+    "PyClassAttribute",
+    "PyCallableParameter"
+]

codeanalyzer-python 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

codeanalyzer-python 0.1.1py3-none-any.whl → 0.1.2py3-none-any.whl