PyPI - fossil-code - Versions diffs - 0.2.0__py3-none-any.whl - Mend

fossil-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

fossil/__init__.py +3 -0
fossil/__main__.py +4 -0
fossil/analyzers.py +221 -0
fossil/cache.py +228 -0
fossil/cli.py +421 -0
fossil/config_manager.py +141 -0
fossil/engine.py +122 -0
fossil/git_miner.py +78 -0
fossil/models.py +109 -0
fossil/patterns.py +79 -0
fossil/py.typed +1 -0
fossil/render.py +436 -0
fossil/repo.py +82 -0
fossil/scoring.py +126 -0
fossil_code-0.2.0.dist-info/METADATA +377 -0
fossil_code-0.2.0.dist-info/RECORD +20 -0
fossil_code-0.2.0.dist-info/WHEEL +5 -0
fossil_code-0.2.0.dist-info/entry_points.txt +2 -0
fossil_code-0.2.0.dist-info/licenses/LICENSE +21 -0
fossil_code-0.2.0.dist-info/top_level.txt +1 -0

fossil/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Dead-code forensics CLI."""
+__version__ = "0.2.0"

fossil/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from fossil.cli import main
+if __name__ == "__main__":
+    main()

fossil/analyzers.py ADDED Viewed

@@ -0,0 +1,221 @@
+from __future__ import annotations
+import ast
+import fnmatch
+import re
+from pathlib import Path
+from fossil.models import Reference, StaticAnalysisResult
+from fossil.repo import relpath
+SOURCE_EXTENSIONS = {
+    ".py": "python",
+    ".js": "javascript",
+    ".jsx": "javascript",
+    ".ts": "typescript",
+    ".tsx": "typescript",
+    ".java": "java",
+    ".go": "go",
+}
+DOC_EXTENSIONS = {".md", ".rst", ".txt", ".adoc"}
+CONFIG_EXTENSIONS = {".toml", ".yaml", ".yml", ".json", ".ini", ".cfg"}
+SKIP_PARTS = {".git", ".fossil", "__pycache__", "node_modules", "dist", "build", ".venv", "venv"}
+def language_for(path: Path) -> str:
+    return SOURCE_EXTENSIONS.get(path.suffix.lower(), "unknown")
+def iter_repo_files(repo_root: Path, exclude: list[str] | None = None) -> list[Path]:
+    exclude = exclude or []
+    files: list[Path] = []
+    for path in repo_root.rglob("*"):
+        if not path.is_file():
+            continue
+        rel = path.relative_to(repo_root).as_posix()
+        if any(part in SKIP_PARTS for part in path.relative_to(repo_root).parts):
+            continue
+        if any(fnmatch.fnmatch(rel, pattern) for pattern in exclude):
+            continue
+        files.append(path)
+    return files
+def exported_symbols(path: Path) -> set[str]:
+    if path.suffix != ".py":
+        return {path.stem}
+    try:
+        tree = ast.parse(path.read_text(encoding="utf-8"))
+    except (SyntaxError, UnicodeDecodeError):
+        return {path.stem}
+    symbols = {path.stem}
+    for node in tree.body:
+        if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
+            if not node.name.startswith("_"):
+                symbols.add(node.name)
+        elif isinstance(node, ast.Assign):
+            for target in node.targets:
+                if isinstance(target, ast.Name) and not target.id.startswith("_"):
+                    symbols.add(target.id)
+    return symbols
+def module_names(path: Path, repo_root: Path) -> set[str]:
+    rel = path.relative_to(repo_root).with_suffix("")
+    parts = list(rel.parts)
+    names = {path.stem, ".".join(parts)}
+    if parts[-1] == "__init__" and len(parts) > 1:
+        names.add(".".join(parts[:-1]))
+    return {name for name in names if name}
+def analyze_file(
+    path: Path, repo_root: Path, exclude: list[str] | None = None
+) -> StaticAnalysisResult:
+    language = language_for(path)
+    symbols = exported_symbols(path)
+    modules = module_names(path, repo_root)
+    result = StaticAnalysisResult(language=language, unknown_language=language == "unknown")
+    files = iter_repo_files(repo_root, exclude)
+    target_rel = relpath(path, repo_root)
+    for other in files:
+        if other.resolve() == path.resolve():
+            continue
+        rel = relpath(other, repo_root)
+        try:
+            text = other.read_text(encoding="utf-8")
+        except UnicodeDecodeError:
+            continue
+        if other.suffix == ".py":
+            _scan_python(other, rel, text, modules, symbols, result)
+        else:
+            _scan_text(other, rel, text, modules, symbols, result, target_rel)
+    _scan_dynamic_and_reflection(files, path, repo_root, modules, result)
+    return result
+def _add_ref(result: StaticAnalysisResult, path: str, line: int, kind: str, text: str) -> None:
+    ref = Reference(path=path, line=line, kind=kind, text=text.strip()[:240])
+    result.references.append(ref)
+    if _is_test_path(path):
+        result.test_file_references += 1
+    elif kind == "import":
+        result.import_references += 1
+    elif kind == "call":
+        result.call_sites += 1
+    elif kind == "doc":
+        result.documentation_references += 1
+    elif kind == "config":
+        result.config_file_references += 1
+def _is_test_path(path: str) -> bool:
+    lower = path.lower()
+    return "/test" in lower or lower.startswith("test") or "_test." in lower
+def _scan_python(
+    path: Path,
+    rel: str,
+    text: str,
+    modules: set[str],
+    symbols: set[str],
+    result: StaticAnalysisResult,
+) -> None:
+    lines = text.splitlines()
+    try:
+        tree = ast.parse(text)
+    except SyntaxError:
+        _scan_text(path, rel, text, modules, symbols, result, "")
+        return
+    imported_aliases: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                if alias.name in modules or any(alias.name.endswith("." + m) for m in modules):
+                    imported_aliases.add(alias.asname or alias.name.split(".")[0])
+                    _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
+        elif isinstance(node, ast.ImportFrom) and node.module:
+            if node.module in modules or any(node.module.endswith("." + m) for m in modules):
+                for alias in node.names:
+                    imported_aliases.add(alias.asname or alias.name)
+                _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
+            elif any(alias.name in symbols for alias in node.names):
+                _add_ref(result, rel, node.lineno, "import", lines[node.lineno - 1])
+        elif isinstance(node, ast.Call):
+            name = _call_name(node.func)
+            if name and (name in symbols or name.split(".")[0] in imported_aliases):
+                _add_ref(result, rel, node.lineno, "call", lines[node.lineno - 1])
+        elif isinstance(node, ast.Name) and node.id in symbols:
+            _add_ref(
+                result,
+                rel,
+                getattr(node, "lineno", 1),
+                "call",
+                lines[getattr(node, "lineno", 1) - 1],
+            )
+def _call_name(node: ast.AST) -> str | None:
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        base = _call_name(node.value)
+        return f"{base}.{node.attr}" if base else node.attr
+    return None
+def _scan_text(
+    path: Path,
+    rel: str,
+    text: str,
+    modules: set[str],
+    symbols: set[str],
+    result: StaticAnalysisResult,
+    target_rel: str,
+) -> None:
+    needles = sorted(
+        modules | symbols | ({target_rel} if target_rel else set()), key=len, reverse=True
+    )
+    if not needles:
+        return
+    pattern = re.compile(r"\b(" + "|".join(re.escape(n) for n in needles if n) + r")\b")
+    kind = (
+        "doc"
+        if path.suffix.lower() in DOC_EXTENSIONS
+        else "config"
+        if path.suffix.lower() in CONFIG_EXTENSIONS
+        else "call"
+    )
+    for idx, line in enumerate(text.splitlines(), 1):
+        if pattern.search(line):
+            _add_ref(result, rel, idx, kind, line)
+def _scan_dynamic_and_reflection(
+    files: list[Path],
+    target: Path,
+    repo_root: Path,
+    modules: set[str],
+    result: StaticAnalysisResult,
+) -> None:
+    dynamic_re = re.compile(r"(importlib\.import_module|__import__)\(([^)]*)\)")
+    reflection_re = re.compile(r"\b(getattr|hasattr|setattr|vars)\(([^)]*)\)")
+    module_re = re.compile("|".join(re.escape(m) for m in sorted(modules, key=len, reverse=True)))
+    if not modules:
+        return
+    for path in files:
+        if path.resolve() == target.resolve():
+            continue
+        try:
+            text = path.read_text(encoding="utf-8")
+        except UnicodeDecodeError:
+            continue
+        rel = relpath(path, repo_root)
+        for idx, line in enumerate(text.splitlines(), 1):
+            if dynamic_re.search(line) and module_re.search(line):
+                result.dynamic_references.append(Reference(rel, idx, "dynamic", line.strip()))
+            if reflection_re.search(line) and module_re.search(line):
+                result.reflection_patterns.append(Reference(rel, idx, "reflection", line.strip()))

fossil/cache.py ADDED Viewed

@@ -0,0 +1,228 @@
+"""Local SQLite result cache.
+Implements §3.5 of the pre-development docs:
+- analysis_results table for per-file results
+- scan_results table for directory scan results
+- pr_cache table for GitHub/GitLab PR lookups
+- schema_version for future migration support
+- Auto-prune entries older than cache_ttl_hours when cache exceeds 100MB
+- Corruption detection and silent rebuild
+"""
+from __future__ import annotations
+import json
+import sqlite3
+import time
+from pathlib import Path
+from typing import Any
+SCHEMA_VERSION = 2
+SCHEMA = """\
+CREATE TABLE IF NOT EXISTS analysis_results (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  file_path TEXT NOT NULL,
+  git_head_hash TEXT NOT NULL,
+  repo_root TEXT NOT NULL,
+  result_json TEXT NOT NULL,
+  created_at INTEGER NOT NULL,
+  fossil_version TEXT NOT NULL,
+  UNIQUE(file_path, git_head_hash, repo_root)
+);
+CREATE TABLE IF NOT EXISTS scan_results (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  repo_root TEXT NOT NULL,
+  scan_target TEXT NOT NULL,
+  git_head_hash TEXT NOT NULL,
+  result_json TEXT NOT NULL,
+  created_at INTEGER NOT NULL,
+  UNIQUE(repo_root, scan_target, git_head_hash)
+);
+CREATE TABLE IF NOT EXISTS pr_cache (
+  id INTEGER PRIMARY KEY AUTOINCREMENT,
+  remote_url TEXT NOT NULL,
+  pr_number INTEGER NOT NULL,
+  pr_title TEXT,
+  pr_body TEXT,
+  merged_at TEXT,
+  cached_at INTEGER NOT NULL,
+  UNIQUE(remote_url, pr_number)
+);
+CREATE TABLE IF NOT EXISTS schema_version (version INTEGER);
+"""
+MAX_CACHE_BYTES = 100 * 1024 * 1024  # 100 MB
+MAX_RESULT_BYTES = 5 * 1024 * 1024  # 5 MB
+DEFAULT_TTL_HOURS = 24
+class CacheStore:
+    def __init__(self, repo_root: Path):
+        self.path = repo_root / ".fossil" / "cache.db"
+    def _connect(self) -> sqlite3.Connection:
+        self.path.parent.mkdir(exist_ok=True)
+        try:
+            conn = sqlite3.connect(self.path)
+            conn.executescript(SCHEMA)
+            # Set schema version if not yet set
+            row = conn.execute("SELECT version FROM schema_version LIMIT 1").fetchone()
+            if row is None:
+                conn.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
+                conn.commit()
+        except sqlite3.DatabaseError:
+            # Corruption detected — rebuild
+            self.clear()
+            conn = sqlite3.connect(self.path)
+            conn.executescript(SCHEMA)
+            conn.execute("INSERT INTO schema_version (version) VALUES (?)", (SCHEMA_VERSION,))
+            conn.commit()
+        return conn
+    # ── Analysis result CRUD ──
+    def get_analysis(self, file_path: Path, head: str, repo_root: Path) -> dict[str, Any] | None:
+        try:
+            with self._connect() as conn:
+                row = conn.execute(
+                    "SELECT result_json FROM analysis_results WHERE file_path=? AND git_head_hash=? AND repo_root=?",
+                    (str(file_path), head, str(repo_root)),
+                ).fetchone()
+        except sqlite3.DatabaseError:
+            self.clear()
+            return None
+        return json.loads(row[0]) if row else None
+    def put_analysis(
+        self, file_path: Path, head: str, repo_root: Path, version: str, result: dict[str, Any]
+    ) -> None:
+        payload = json.dumps(result, sort_keys=True)
+        if len(payload.encode("utf-8")) > MAX_RESULT_BYTES:
+            return
+        try:
+            with self._connect() as conn:
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO analysis_results
+                    (file_path, git_head_hash, repo_root, result_json, created_at, fossil_version)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (str(file_path), head, str(repo_root), payload, int(time.time()), version),
+                )
+        except sqlite3.DatabaseError:
+            self.clear()
+        self._auto_prune()
+    # ── Scan result CRUD ──
+    def get_scan(self, scan_target: str, head: str, repo_root: Path) -> list[dict[str, Any]] | None:
+        try:
+            with self._connect() as conn:
+                row = conn.execute(
+                    "SELECT result_json FROM scan_results WHERE scan_target=? AND git_head_hash=? AND repo_root=?",
+                    (scan_target, head, str(repo_root)),
+                ).fetchone()
+        except sqlite3.DatabaseError:
+            self.clear()
+            return None
+        return json.loads(row[0]) if row else None
+    def put_scan(
+        self, scan_target: str, head: str, repo_root: Path, result: list[dict[str, Any]]
+    ) -> None:
+        payload = json.dumps(result, sort_keys=True)
+        if len(payload.encode("utf-8")) > MAX_RESULT_BYTES:
+            return
+        try:
+            with self._connect() as conn:
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO scan_results
+                    (repo_root, scan_target, git_head_hash, result_json, created_at)
+                    VALUES (?, ?, ?, ?, ?)
+                    """,
+                    (str(repo_root), scan_target, head, payload, int(time.time())),
+                )
+        except sqlite3.DatabaseError:
+            self.clear()
+    # ── PR cache CRUD ──
+    def get_pr(self, remote_url: str, pr_number: int) -> dict[str, Any] | None:
+        try:
+            with self._connect() as conn:
+                row = conn.execute(
+                    "SELECT pr_title, pr_body, merged_at FROM pr_cache WHERE remote_url=? AND pr_number=?",
+                    (remote_url, pr_number),
+                ).fetchone()
+        except sqlite3.DatabaseError:
+            return None
+        if row is None:
+            return None
+        return {"pr_title": row[0], "pr_body": row[1], "merged_at": row[2]}
+    def put_pr(
+        self,
+        remote_url: str,
+        pr_number: int,
+        title: str | None,
+        body: str | None,
+        merged_at: str | None,
+    ) -> None:
+        try:
+            with self._connect() as conn:
+                conn.execute(
+                    """
+                    INSERT OR REPLACE INTO pr_cache
+                    (remote_url, pr_number, pr_title, pr_body, merged_at, cached_at)
+                    VALUES (?, ?, ?, ?, ?, ?)
+                    """,
+                    (remote_url, pr_number, title, body, merged_at, int(time.time())),
+                )
+        except sqlite3.DatabaseError:
+            pass
+    # ── Cache management ──
+    def clear(self) -> None:
+        if self.path.exists():
+            self.path.unlink()
+    def stats(self) -> dict[str, Any]:
+        """Return cache statistics."""
+        if not self.path.exists():
+            return {"size_bytes": 0, "analysis_count": 0, "scan_count": 0, "pr_count": 0}
+        try:
+            size = self.path.stat().st_size
+            with self._connect() as conn:
+                analysis_count = conn.execute("SELECT COUNT(*) FROM analysis_results").fetchone()[0]
+                scan_count = conn.execute("SELECT COUNT(*) FROM scan_results").fetchone()[0]
+                pr_count = conn.execute("SELECT COUNT(*) FROM pr_cache").fetchone()[0]
+            return {
+                "size_bytes": size,
+                "analysis_count": analysis_count,
+                "scan_count": scan_count,
+                "pr_count": pr_count,
+            }
+        except (sqlite3.DatabaseError, OSError):
+            return {"size_bytes": 0, "analysis_count": 0, "scan_count": 0, "pr_count": 0}
+    def _auto_prune(self, ttl_hours: int = DEFAULT_TTL_HOURS) -> None:
+        """Prune old entries if cache exceeds MAX_CACHE_BYTES."""
+        if not self.path.exists():
+            return
+        try:
+            if self.path.stat().st_size < MAX_CACHE_BYTES:
+                return
+        except OSError:
+            return
+        cutoff = int(time.time()) - (ttl_hours * 3600)
+        try:
+            with self._connect() as conn:
+                conn.execute("DELETE FROM analysis_results WHERE created_at < ?", (cutoff,))
+                conn.execute("DELETE FROM scan_results WHERE created_at < ?", (cutoff,))
+                conn.execute("DELETE FROM pr_cache WHERE cached_at < ?", (cutoff,))
+                conn.execute("VACUUM")
+        except sqlite3.DatabaseError:
+            self.clear()