PyPI - fossil-code - Versions diffs - 0.2.0__py3-none-any.whl - Mend

fossil-code 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

fossil/__init__.py +3 -0
fossil/__main__.py +4 -0
fossil/analyzers.py +221 -0
fossil/cache.py +228 -0
fossil/cli.py +421 -0
fossil/config_manager.py +141 -0
fossil/engine.py +122 -0
fossil/git_miner.py +78 -0
fossil/models.py +109 -0
fossil/patterns.py +79 -0
fossil/py.typed +1 -0
fossil/render.py +436 -0
fossil/repo.py +82 -0
fossil/scoring.py +126 -0
fossil_code-0.2.0.dist-info/METADATA +377 -0
fossil_code-0.2.0.dist-info/RECORD +20 -0
fossil_code-0.2.0.dist-info/WHEEL +5 -0
fossil_code-0.2.0.dist-info/entry_points.txt +2 -0
fossil_code-0.2.0.dist-info/licenses/LICENSE +21 -0
fossil_code-0.2.0.dist-info/top_level.txt +1 -0

fossil/cli.py ADDED Viewed

@@ -0,0 +1,421 @@
+"""Root CLI — command parser, dispatch, and user-facing error handling.
+Implements §3.4 of the pre-development docs:
+- fossil explain <target> — full forensic report
+- fossil scan [directory] — directory scan with threshold filtering
+- fossil clean [directory] — prioritized deletion backlog
+- fossil cache clear — delete cache
+- fossil cache stats — show cache statistics
+- fossil config set/show — credential management
+- Global flags: --no-color, --plain, --version
+- Exit codes: 0 (dead), 1 (error), 2 (file not found), 3 (not git repo), 4 (alive/no results), 5 (unsupported)
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from fossil import __version__
+from fossil.analyzers import SOURCE_EXTENSIONS, iter_repo_files, language_for
+from fossil.cache import CacheStore
+from fossil.config_manager import masked_config, read_project_config, set_config
+from fossil.engine import explain
+from fossil.render import (
+    render_explain,
+    render_rich_clean,
+    render_rich_scan,
+)
+from fossil.repo import FileMissingError, FossilError, NotGitRepositoryError, find_repo_root
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(prog="fossil", description="Dead-code forensics CLI")
+    parser.add_argument("--version", action="version", version=f"fossil {__version__}")
+    sub = parser.add_subparsers(dest="command", required=True)
+    # ── fossil explain ──
+    explain_p = sub.add_parser("explain", help="Generate a forensic report for one file")
+    explain_p.add_argument("target")
+    explain_p.add_argument("--json", action="store_true")
+    explain_p.add_argument("--plain", action="store_true")
+    explain_p.add_argument("--no-color", action="store_true")
+    explain_p.add_argument("--no-cache", action="store_true")
+    explain_p.add_argument("--depth", type=int, default=500)
+    explain_p.add_argument("--remote", choices=["github", "gitlab", "none", "auto"], default="auto")
+    explain_p.add_argument("--narrate", action="store_true")
+    explain_p.add_argument("--include-code", action="store_true")
+    explain_p.add_argument("--yolo", action="store_true")
+    explain_p.add_argument("--force-yolo", action="store_true")
+    explain_p.set_defaults(func=cmd_explain)
+    # ── fossil scan ──
+    scan_p = sub.add_parser("scan", help="Scan a directory for dead files")
+    scan_p.add_argument("directory", nargs="?", default=".")
+    scan_p.add_argument("--threshold", type=int, default=70)
+    scan_p.add_argument("--language", default="all")
+    scan_p.add_argument("--exclude", action="append", default=[])
+    scan_p.add_argument("--json", action="store_true")
+    scan_p.add_argument("--plain", action="store_true")
+    scan_p.add_argument("--no-color", action="store_true")
+    scan_p.add_argument("--no-cache", action="store_true")
+    scan_p.add_argument("--depth", type=int, default=500)
+    scan_p.set_defaults(func=cmd_scan)
+    # ── fossil clean ──
+    clean_p = sub.add_parser("clean", help="Build a prioritized deletion backlog")
+    clean_p.add_argument("directory", nargs="?", default=".")
+    clean_p.add_argument("--threshold", type=int, default=80)
+    clean_p.add_argument("--dry-run", action="store_true")
+    clean_p.add_argument("--yolo", action="store_true")
+    clean_p.add_argument("--json", action="store_true")
+    clean_p.add_argument("--plain", action="store_true")
+    clean_p.add_argument("--no-color", action="store_true")
+    clean_p.add_argument("--no-cache", action="store_true")
+    clean_p.add_argument("--depth", type=int, default=500)
+    clean_p.set_defaults(func=cmd_clean)
+    # ── fossil cache ──
+    cache_p = sub.add_parser("cache", help="Cache operations")
+    cache_sub = cache_p.add_subparsers(dest="cache_command", required=True)
+    clear_p = cache_sub.add_parser("clear")
+    clear_p.set_defaults(func=cmd_cache_clear)
+    stats_p = cache_sub.add_parser("stats")
+    stats_p.set_defaults(func=cmd_cache_stats)
+    # ── fossil config ──
+    config_p = sub.add_parser("config", help="Configuration operations")
+    config_sub = config_p.add_subparsers(dest="config_command", required=True)
+    show_p = config_sub.add_parser("show")
+    show_p.set_defaults(func=cmd_config_show)
+    set_p = config_sub.add_parser("set")
+    set_p.add_argument("key")
+    set_p.add_argument("value")
+    set_p.set_defaults(func=cmd_config_set)
+    return parser
+# ---------------------------------------------------------------------------
+# Command implementations
+# ---------------------------------------------------------------------------
+def cmd_explain(args: argparse.Namespace) -> int:
+    if args.narrate:
+        print(
+            "--narrate requires a configured LLM provider. Run: fossil config set llm_provider openai",
+            file=sys.stderr,
+        )
+        return 1
+    result = explain(args.target, depth=args.depth, no_cache=args.no_cache)
+    if args.yolo or args.force_yolo:
+        min_score = 90 if args.yolo and not args.force_yolo else 0
+        score = result.confidence.score if result.confidence else 0
+        if score < min_score:
+            print(
+                f"Confidence is {score}%. --yolo blocked below 90%. Use --force-yolo to override.",
+                file=sys.stderr,
+            )
+            return 1
+        print(
+            "--yolo PR generation requires GitHub/GitLab API integration; no files were changed.",
+            file=sys.stderr,
+        )
+        return 1
+    output = render_explain(
+        result,
+        json_mode=args.json,
+        plain=args.plain,
+        no_color=args.no_color,
+    )
+    print(output)
+    return 0 if result.dead else 4
+def cmd_scan(args: argparse.Namespace) -> int:
+    root = Path(args.directory).expanduser().resolve()
+    repo_root = find_repo_root(root)
+    # Read project config for exclude patterns
+    project_config = read_project_config(repo_root)
+    exclude = list(args.exclude)
+    if project_config.get("analysis", {}).get("exclude_patterns"):
+        exclude.extend(project_config["analysis"]["exclude_patterns"])
+    selected = _language_filter(args.language)
+    candidates = [
+        path
+        for path in iter_repo_files(root, exclude)
+        if path.suffix.lower() in SOURCE_EXTENSIONS
+        and (selected is None or language_for(path) in selected)
+    ]
+    if not candidates:
+        if args.json:
+            print(json.dumps([]))
+        else:
+            print(
+                f"No supported source files found in {args.directory}. Supported: Python, JavaScript, TypeScript, Java, Go."
+            )
+        return 4
+    # Analyze files with progress
+    results = _analyze_files_parallel(
+        candidates, args.depth, args.no_cache, args.threshold, args.plain
+    )
+    if args.json:
+        print(json.dumps([r.to_dict() for r in results], indent=2, sort_keys=True))
+    else:
+        use_rich = not args.plain and _rich_ok()
+        if use_rich:
+            output = render_rich_scan(
+                results,
+                str(repo_root),
+                len(candidates),
+                args.threshold,
+                args.directory,
+                no_color=args.no_color,
+            )
+            print(output)
+        else:
+            if not results:
+                print(f"✓ No dead code found above {args.threshold}% threshold.")
+                return 4
+            print(f"fossil scan {args.directory} ({len(candidates)} files)")
+            print(f"{'File':<50} {'Language':<12} {'Confidence':>10}  Status")
+            print("─" * 85)
+            for result in results:
+                score = result.confidence.score if result.confidence else 0
+                rel = Path(result.abs_path).relative_to(repo_root).as_posix()
+                print(f"{rel:<50} {result.language:<12} {score:>9}%  {result.confidence.label}")
+            print(f"\n{len(results)} dead files found above {args.threshold}% threshold.")
+    return 0 if results else 4
+def cmd_clean(args: argparse.Namespace) -> int:
+    root = Path(args.directory).expanduser().resolve()
+    repo_root = find_repo_root(root)
+    # Read project config
+    project_config = read_project_config(repo_root)
+    exclude = []
+    if project_config.get("analysis", {}).get("exclude_patterns"):
+        exclude.extend(project_config["analysis"]["exclude_patterns"])
+    candidates = [
+        path for path in iter_repo_files(root, exclude) if path.suffix.lower() in SOURCE_EXTENSIONS
+    ]
+    results = _analyze_files_parallel(
+        candidates, args.depth, args.no_cache, args.threshold, args.plain
+    )
+    if args.json:
+        print(json.dumps([r.to_dict() for r in results], indent=2, sort_keys=True))
+    elif not results:
+        print(f"No deletion candidates found above {args.threshold}% threshold.")
+        return 4
+    else:
+        use_rich = not args.plain and _rich_ok()
+        if use_rich:
+            output = render_rich_clean(
+                results,
+                str(repo_root),
+                args.threshold,
+                args.directory,
+                dry_run=args.dry_run or not args.yolo,
+                no_color=args.no_color,
+            )
+            print(output)
+        else:
+            mode = "dry run" if args.dry_run or not args.yolo else "planned"
+            print(f"fossil clean {args.directory} — {mode}")
+            for index, result in enumerate(results, 1):
+                score = result.confidence.score if result.confidence else 0
+                rel = Path(result.abs_path).relative_to(repo_root).as_posix()
+                print(f"{index}. {rel} — {score}% — {result.suggested_action}")
+    if args.yolo:
+        print(
+            "--yolo PR generation requires GitHub/GitLab API integration; no files were changed.",
+            file=sys.stderr,
+        )
+        return 1
+    return 0 if results else 4
+def _analyze_files_parallel(
+    candidates: list[Path],
+    depth: int,
+    no_cache: bool,
+    threshold: int,
+    plain: bool,
+) -> list:
+    """Analyze files using ThreadPoolExecutor with optional Rich progress bar."""
+    from fossil.engine import explain as explain_file
+    results = []
+    use_progress = not plain and _rich_ok() and len(candidates) > 3
+    if use_progress:
+        try:
+            from rich.progress import (
+                BarColumn,
+                MofNCompleteColumn,
+                Progress,
+                SpinnerColumn,
+                TextColumn,
+            )
+            with Progress(
+                SpinnerColumn(),
+                TextColumn("[bold blue]Scanning..."),
+                BarColumn(),
+                MofNCompleteColumn(),
+                TextColumn("[dim]{task.description}"),
+                transient=True,
+            ) as progress:
+                task = progress.add_task("", total=len(candidates))
+                worker_count = min(32, (os.cpu_count() or 1) + 4)
+                # Use parallel only if enough files
+                if len(candidates) >= 10:
+                    with ThreadPoolExecutor(max_workers=worker_count) as pool:
+                        futures = {
+                            pool.submit(explain_file, str(p), depth=depth, no_cache=no_cache): p
+                            for p in candidates
+                        }
+                        for future in as_completed(futures):
+                            progress.advance(task)
+                            try:
+                                result = future.result()
+                                if (
+                                    result.dead
+                                    and result.confidence
+                                    and result.confidence.score >= threshold
+                                ):
+                                    results.append(result)
+                            except Exception:
+                                pass
+                else:
+                    for path in candidates:
+                        progress.advance(task)
+                        try:
+                            result = explain_file(str(path), depth=depth, no_cache=no_cache)
+                            if (
+                                result.dead
+                                and result.confidence
+                                and result.confidence.score >= threshold
+                            ):
+                                results.append(result)
+                        except Exception:
+                            pass
+        except ImportError:
+            use_progress = False
+    if not use_progress:
+        for path in candidates:
+            try:
+                result = explain_file(str(path), depth=depth, no_cache=no_cache)
+                if result.dead and result.confidence and result.confidence.score >= threshold:
+                    results.append(result)
+            except Exception:
+                pass
+    results.sort(key=lambda r: r.confidence.score if r.confidence else 0, reverse=True)
+    return results
+def _language_filter(value: str) -> set[str] | None:
+    if value == "all":
+        return None
+    mapping = {"py": "python", "js": "javascript", "ts": "typescript", "java": "java", "go": "go"}
+    return {mapping.get(item.strip(), item.strip()) for item in value.split(",") if item.strip()}
+def _rich_ok() -> bool:
+    try:
+        from rich.console import Console  # noqa: F401
+        return True
+    except ImportError:
+        return False
+# ---------------------------------------------------------------------------
+# Cache & config commands
+# ---------------------------------------------------------------------------
+def cmd_cache_clear(args: argparse.Namespace) -> int:
+    repo_root = find_repo_root(Path.cwd())
+    CacheStore(repo_root).clear()
+    print("Cache cleared.")
+    return 0
+def cmd_cache_stats(args: argparse.Namespace) -> int:
+    repo_root = find_repo_root(Path.cwd())
+    stats = CacheStore(repo_root).stats()
+    print(f"Cache location: {repo_root / '.fossil' / 'cache.db'}")
+    print(f"Size: {stats['size_bytes'] / 1024:.1f} KB")
+    print(f"Analysis results cached: {stats['analysis_count']}")
+    print(f"Scan results cached: {stats['scan_count']}")
+    print(f"PR lookups cached: {stats['pr_count']}")
+    return 0
+def cmd_config_show(args: argparse.Namespace) -> int:
+    values = masked_config()
+    if not values:
+        print("No fossil config values set.")
+        return 0
+    for key, value in sorted(values.items()):
+        print(f"{key} = {value}")
+    return 0
+def cmd_config_set(args: argparse.Namespace) -> int:
+    set_config(args.key, args.value)
+    print(f"✓ {args.key} saved.")
+    return 0
+# ---------------------------------------------------------------------------
+# Entry point & error handling
+# ---------------------------------------------------------------------------
+def main(argv: list[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    try:
+        code = args.func(args)
+    except FileMissingError as exc:
+        code = exc.exit_code
+        _print_error(args, "File not found", str(exc), code)
+    except NotGitRepositoryError as exc:
+        code = exc.exit_code
+        _print_error(args, "Not a git repository", str(exc), code)
+    except FossilError as exc:
+        code = exc.exit_code
+        _print_error(args, "fossil error", str(exc), code)
+    except Exception as exc:  # pragma: no cover - defensive CLI boundary
+        code = 1
+        _print_error(args, "Unexpected error", str(exc), code)
+    raise SystemExit(code)
+def _print_error(args: argparse.Namespace, error: str, message: str, code: int) -> None:
+    if getattr(args, "json", False):
+        print(json.dumps({"error": error, "message": message, "code": code}, sort_keys=True))
+    else:
+        print(f"Error: {message}", file=sys.stderr)
+if __name__ == "__main__":
+    main()

fossil/config_manager.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""Configuration management.
+Implements §8 of the pre-development docs:
+- User config at ~/.config/fossil/config.toml with 0600 permissions
+- Project config at .fossil.toml (repo root, committed)
+- Environment variable overrides
+- Masked display of sensitive values
+- TOML parsing via tomllib (Python 3.11+)
+"""
+from __future__ import annotations
+import os
+import stat
+import tomllib
+from pathlib import Path
+from typing import Any
+CONFIG_DIR = Path.home() / ".config" / "fossil"
+CONFIG_PATH = CONFIG_DIR / "config.toml"
+SENSITIVE = {"github_token", "gitlab_token", "llm_api_key"}
+# Valid config keys (§8.2)
+VALID_KEYS = {
+    "github_token",
+    "gitlab_token",
+    "llm_api_key",
+    "llm_provider",
+    "llm_model",
+    "llm_base_url",
+    "default_depth",
+    "cache_ttl_hours",
+    "output.color",
+    "output.theme",
+}
+# Env var → config key mapping
+ENV_OVERRIDES = {
+    "GITHUB_TOKEN": "github_token",
+    "GITLAB_TOKEN": "gitlab_token",
+    "FOSSIL_LLM_API_KEY": "llm_api_key",
+    "FOSSIL_LLM_PROVIDER": "llm_provider",
+    "FOSSIL_LLM_MODEL": "llm_model",
+    "FOSSIL_DEFAULT_DEPTH": "default_depth",
+    "FOSSIL_LOG_LEVEL": "log_level",
+}
+def set_config(key: str, value: str) -> None:
+    """Write a key-value pair to the user config file."""
+    CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+    values = _read_raw_config()
+    values[key] = value
+    lines = [f'{k} = "{v}"\n' for k, v in sorted(values.items())]
+    CONFIG_PATH.write_text("".join(lines), encoding="utf-8")
+    os.chmod(CONFIG_PATH, stat.S_IRUSR | stat.S_IWUSR)
+def _read_raw_config() -> dict[str, str]:
+    """Read config file without env var overrides."""
+    if not CONFIG_PATH.exists():
+        return {}
+    data: dict[str, str] = {}
+    try:
+        with open(CONFIG_PATH, "rb") as f:
+            parsed = tomllib.load(f)
+        # Flatten nested TOML sections
+        for section_key, section_val in parsed.items():
+            if isinstance(section_val, dict):
+                for k, v in section_val.items():
+                    data[f"{section_key}.{k}"] = str(v)
+            else:
+                data[section_key] = str(section_val)
+    except (tomllib.TOMLDecodeError, OSError):
+        # Fall back to simple key=value parsing for legacy configs
+        try:
+            for line in CONFIG_PATH.read_text(encoding="utf-8").splitlines():
+                if "=" not in line or line.strip().startswith("#"):
+                    continue
+                key, value = line.split("=", 1)
+                data[key.strip()] = value.strip().strip('"')
+        except OSError:
+            pass
+    return data
+def read_config() -> dict[str, str]:
+    """Read config with environment variable overrides."""
+    data = _read_raw_config()
+    for env, key in ENV_OVERRIDES.items():
+        if os.environ.get(env):
+            data[key] = os.environ[env]
+    return data
+def read_project_config(repo_root: Path) -> dict[str, Any]:
+    """Read .fossil.toml project-level configuration.
+    Returns a dictionary with sections: analysis, thresholds, pr.
+    """
+    project_config_path = repo_root / ".fossil.toml"
+    if not project_config_path.exists():
+        return {}
+    try:
+        with open(project_config_path, "rb") as f:
+            return tomllib.load(f)
+    except (tomllib.TOMLDecodeError, OSError):
+        return {}
+def get_effective_config(repo_root: Path | None = None) -> dict[str, Any]:
+    """Get merged config: user config → project config → env overrides.
+    Project config values override user config for matching keys.
+    Environment variables override everything.
+    """
+    config: dict[str, Any] = dict(read_config())
+    if repo_root:
+        project = read_project_config(repo_root)
+        # Merge project config (flattened)
+        for section, values in project.items():
+            if isinstance(values, dict):
+                for k, v in values.items():
+                    config[f"{section}.{k}"] = v
+            else:
+                config[section] = values
+    return config
+def masked_config() -> dict[str, str]:
+    return {
+        key: _mask(value) if key in SENSITIVE else value for key, value in read_config().items()
+    }
+def _mask(value: str) -> str:
+    if not value:
+        return ""
+    if len(value) <= 4:
+        return "****"
+    return f"{value[:4]}...{value[-4:]}"

fossil/engine.py ADDED Viewed

@@ -0,0 +1,122 @@
+from __future__ import annotations
+import time
+from fossil import __version__
+from fossil.analyzers import analyze_file, module_names
+from fossil.cache import CacheStore
+from fossil.git_miner import mine_history
+from fossil.models import ForensicResult
+from fossil.patterns import detect_patterns
+from fossil.repo import git_head, is_gitignored, is_tracked, relpath, resolve_target
+from fossil.scoring import score
+def explain(target: str, *, depth: int = 500, no_cache: bool = False) -> ForensicResult:
+    start = time.perf_counter()
+    path, repo_root, symlink = resolve_target(target)
+    head = git_head(repo_root)
+    cache = CacheStore(repo_root)
+    if not no_cache:
+        cached = cache.get_analysis(path, head, repo_root)
+        if cached:
+            return _from_dict(cached, cached=True)
+    static = analyze_file(path, repo_root)
+    tracked = is_tracked(path, repo_root)
+    refs = module_names(path, repo_root) | {path.stem}
+    git = mine_history(path, repo_root, depth, refs)
+    patterns = detect_patterns(path, repo_root)
+    warnings = list(git.warnings)
+    if symlink:
+        warnings.append(f"Target is a symlink; analyzed resolved path: {path}")
+    if is_gitignored(path, repo_root):
+        warnings.append("File is gitignored. Analysis may be incomplete.")
+    if not tracked:
+        confidence = None
+        dead = False
+        status = "UNTRACKED — No git history. Cannot determine death date."
+    else:
+        confidence = score(static, git, patterns)
+        dead = static.import_references == 0 and static.call_sites == 0
+        status = "DEAD" if dead else "LIVE"
+    rel = relpath(path, repo_root)
+    duration = int((time.perf_counter() - start) * 1000)
+    result = ForensicResult(
+        fossil_version=__version__,
+        target=target,
+        abs_path=str(path),
+        repo_root=str(repo_root),
+        language=static.language,
+        dead=dead,
+        status=status,
+        static_analysis=static,
+        git_history=git,
+        temporary_hold=patterns,
+        confidence=confidence,
+        suggested_action=f"rm {rel}" if dead else None,
+        yolo_command=f"fossil explain {rel} --yolo" if dead else None,
+        analysis_duration_ms=duration,
+        warnings=warnings,
+    )
+    if not no_cache:
+        cache.put_analysis(path, head, repo_root, __version__, result.to_dict())
+    return result
+def _from_dict(data: dict, cached: bool) -> ForensicResult:
+    from fossil.models import (
+        CommitInfo,
+        ConfidenceResult,
+        ConfidenceSignal,
+        GitHistoryResult,
+        HoldPattern,
+        PatternResult,
+        Reference,
+        StaticAnalysisResult,
+    )
+    static_data = data["static_analysis"]
+    static = StaticAnalysisResult(
+        **{
+            **static_data,
+            "references": [Reference(**r) for r in static_data.get("references", [])],
+            "dynamic_references": [
+                Reference(**r) for r in static_data.get("dynamic_references", [])
+            ],
+            "reflection_patterns": [
+                Reference(**r) for r in static_data.get("reflection_patterns", [])
+            ],
+        }
+    )
+    git_data = data["git_history"]
+    for key in ("death_commit", "original_author", "last_modified"):
+        if git_data.get(key):
+            git_data[key] = CommitInfo(**git_data[key])
+    git = GitHistoryResult(**git_data)
+    pattern_data = data["temporary_hold"]
+    patterns = PatternResult(
+        detected=pattern_data.get("detected", False),
+        patterns=[HoldPattern(**p) for p in pattern_data.get("patterns", [])],
+    )
+    confidence = None
+    if data.get("confidence"):
+        c = data["confidence"]
+        confidence = ConfidenceResult(
+            score=c["score"],
+            label=c["label"],
+            risk=c["risk"],
+            signals=[ConfidenceSignal(**s) for s in c.get("signals", [])],
+        )
+    return ForensicResult(
+        **{
+            **data,
+            "static_analysis": static,
+            "git_history": git,
+            "temporary_hold": patterns,
+            "confidence": confidence,
+            "cached": cached,
+        }
+    )