PyPI - agentcam - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agentcam 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

agentcam/__init__.py +4 -0
agentcam/cli.py +252 -0
agentcam/git_state.py +207 -0
agentcam/models.py +146 -0
agentcam/paths.py +102 -0
agentcam/redaction.py +259 -0
agentcam/report.py +407 -0
agentcam/runner.py +301 -0
agentcam/scanner.py +363 -0
agentcam-0.1.0.dist-info/METADATA +313 -0
agentcam-0.1.0.dist-info/RECORD +14 -0
agentcam-0.1.0.dist-info/WHEEL +4 -0
agentcam-0.1.0.dist-info/entry_points.txt +2 -0
agentcam-0.1.0.dist-info/licenses/LICENSE +21 -0

agentcam/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""agentcam: local-first CLI wrapper that records agent runs."""
+__version__ = "0.1.0"
+__all__ = ["__version__"]

agentcam/cli.py ADDED Viewed

@@ -0,0 +1,252 @@
+"""agentcam command-line entry point.
+Subcommands:
+  - ``agentcam version``           — print version and exit
+  - ``agentcam run -- <argv...>``  — wrap a command, record raw + redacted
+                                     logs, generate AGENT_RUN_REPORT.md
+``run`` is intentionally argv-only; for shell features (pipes, redirects,
+variable expansion) wrap your own shell explicitly, e.g.::
+    agentcam run -- bash -lc "echo hi > out.txt"
+    agentcam run -- pwsh -Command "Get-Process | Out-File procs.txt"
+    agentcam run -- cmd /c "dir > files.txt"
+See ``docs/design.md`` (forthcoming) for the rationale.
+"""
+from __future__ import annotations
+import argparse
+import platform
+import sys
+from datetime import datetime, timezone
+from pathlib import Path
+from agentcam import __version__
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="agentcam",
+        description=(
+            "Local-first CLI wrapper that records what your AI coding agent "
+            "changed in your repo."
+        ),
+    )
+    sub = parser.add_subparsers(dest="cmd", required=True, metavar="COMMAND")
+    sub.add_parser("version", help="Print agentcam version and exit.")
+    run = sub.add_parser(
+        "run",
+        help="Wrap a command and record the agent run.",
+        description=(
+            "Wraps an argv-style command. Use `bash -lc \"...\"`, "
+            "`pwsh -Command \"...\"`, or `cmd /c \"...\"` for shell features "
+            "(pipes, redirects, variable expansion)."
+        ),
+    )
+    run.add_argument(
+        "--name",
+        default=None,
+        help="Slug included in the run id (e.g. 'claude-fix-login').",
+    )
+    run.add_argument(
+        "argv",
+        nargs=argparse.REMAINDER,
+        help="The command to run, after a `--` separator.",
+    )
+    return parser
+def _strip_leading_dashdash(argv: list[str]) -> list[str]:
+    """argparse.REMAINDER keeps a leading `--`; strip it for cleanliness."""
+    if argv and argv[0] == "--":
+        return argv[1:]
+    return argv
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if args.cmd == "version":
+        print(f"agentcam {__version__}")
+        return 0
+    if args.cmd == "run":
+        return _run_command(args)
+    parser.error(f"unknown subcommand: {args.cmd}")
+    return 2  # unreachable; parser.error exits
+# ---------------------------------------------------------------------------
+# `agentcam run` orchestrator
+# ---------------------------------------------------------------------------
+def _run_command(args) -> int:
+    # Imports are local so `agentcam version` doesn't pay for them at startup.
+    from agentcam.git_state import (
+        NotAGitRepoError,
+        collect_git_state,
+        is_working_tree_dirty,
+        resolve_git_dir,
+        resolve_git_root,
+    )
+    from agentcam.models import RunManifest
+    from agentcam.paths import RunIdCollisionError, create_run_dir
+    from agentcam.redaction import StreamingRedactor, redact_argv
+    from agentcam.report import render_report, write_manifest
+    from agentcam.runner import CommandNotFoundError, run_wrapped
+    from agentcam.scanner import scan_output, scan_paths
+    run_argv = _strip_leading_dashdash(args.argv or [])
+    if not run_argv:
+        print(
+            "agentcam run: no command provided. "
+            "Usage: agentcam run -- <command...>",
+            file=sys.stderr,
+        )
+        return 2
+    cwd = Path.cwd()
+    # 1) Confirm we're in a git repo and resolve git dir.
+    try:
+        git_dir = resolve_git_dir(cwd)
+        git_root = resolve_git_root(cwd)
+    except NotAGitRepoError:
+        print(
+            "agentcam: not in a git repository. "
+            "Initialize one with 'git init' first.",
+            file=sys.stderr,
+        )
+        return 2
+    except Exception as e:  # noqa: BLE001
+        print(f"agentcam: git error: {e}", file=sys.stderr)
+        return 2
+    # 2) Collect pre-run git state.
+    try:
+        state_before = collect_git_state(cwd, is_after=False)
+    except NotAGitRepoError:
+        print(
+            "agentcam: not in a git repository. "
+            "Initialize one with 'git init' first.",
+            file=sys.stderr,
+        )
+        return 2
+    pre_run_dirty = is_working_tree_dirty(state_before)
+    # 3) Create the run directory under <git_dir>/agentcam/runs/<run_id>/.
+    started_at = datetime.now(timezone.utc).astimezone()
+    try:
+        run_id, run_paths = create_run_dir(
+            git_dir, started_at, name=args.name
+        )
+    except RunIdCollisionError as e:
+        print(f"agentcam: {e}", file=sys.stderr)
+        return 2
+    # 4) Run the wrapped subprocess with threads-based tee.
+    try:
+        run_result = run_wrapped(
+            run_argv,
+            cwd=cwd,
+            stdout_raw_path=Path(run_paths.stdout_raw),
+            stderr_raw_path=Path(run_paths.stderr_raw),
+        )
+    except CommandNotFoundError as e:
+        print(str(e), file=sys.stderr)
+        return 2
+    ended_at = datetime.now(timezone.utc).astimezone()
+    duration = (ended_at - started_at).total_seconds()
+    # 5) Produce redacted logs from the raw logs.
+    _redact_log(Path(run_paths.stdout_raw), Path(run_paths.stdout_redacted))
+    _redact_log(Path(run_paths.stderr_raw), Path(run_paths.stderr_redacted))
+    # 6) Collect post-run git state (is_after=True triggers diff --check).
+    state_after = collect_git_state(cwd, is_after=True)
+    # 7) Scan paths + raw output for risk flags.
+    risk_flags = scan_paths(state_after.changed_files)
+    risk_flags.extend(_scan_log(Path(run_paths.stdout_raw), "stdout.log"))
+    risk_flags.extend(_scan_log(Path(run_paths.stderr_raw), "stderr.log"))
+    # 8) Assemble the manifest.
+    manifest = RunManifest(
+        schema_version="0.1",
+        run_id=run_id.text,
+        started_at=started_at,
+        ended_at=ended_at,
+        duration_seconds=duration,
+        cwd=str(cwd),
+        git_root=str(git_root),
+        git_dir=str(git_dir),
+        branch=state_before.branch,
+        is_detached_head=state_before.is_detached_head,
+        head_before=state_before.head,
+        head_after=state_after.head,
+        pre_existing_op=(
+            state_before.pre_existing_op or state_after.pre_existing_op
+        ),
+        pre_run_dirty=pre_run_dirty,
+        command_argv_raw=list(run_argv),
+        command_argv_redacted=redact_argv(list(run_argv)),
+        exit_detail=run_result.exit_detail,
+        shell_used=run_result.shell_used,
+        terminal_forward_degraded=run_result.terminal_forward_degraded,
+        platform=platform.system().lower(),
+        agentcam_version=__version__,
+        paths=run_paths,
+    )
+    # 9) Write report + manifest.
+    Path(run_paths.report_md).write_text(
+        render_report(manifest, state_before, state_after, risk_flags),
+        encoding="utf-8",
+    )
+    write_manifest(manifest, Path(run_paths.manifest_json))
+    # 10) Tell the user where to find the report (stderr so it doesn't pollute
+    # programmatic stdout consumers).
+    print(
+        f"\nagentcam: run report at {run_paths.report_md}",
+        file=sys.stderr,
+    )
+    # 11) Return the wrapper exit code (0 if subprocess succeeded, else 1).
+    return run_result.exit_detail.wrapper_exit
+def _redact_log(raw_path: Path, redacted_path: Path) -> None:
+    """Stream raw_path through StreamingRedactor into redacted_path."""
+    from agentcam.redaction import StreamingRedactor
+    with raw_path.open("rb") as in_fp, redacted_path.open("wb") as out_fp:
+        r = StreamingRedactor(out_fp)
+        while True:
+            chunk = in_fp.read(4096)
+            if not chunk:
+                break
+            r.feed(chunk)
+        r.close()
+def _scan_log(raw_path: Path, label: str):
+    """Scan a raw log for output-pattern risk flags."""
+    from agentcam.scanner import scan_output
+    try:
+        text = raw_path.read_bytes().decode("utf-8", errors="replace")
+    except OSError:
+        text = ""
+    return scan_output(text, stream_label=label)
+if __name__ == "__main__":
+    sys.exit(main())

agentcam/git_state.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""Git state collection (before and after the wrapped command).
+Uses ``git status --porcelain=v1 -z`` as the primary source of truth, with
+``git diff [--cached] --stat / --name-status / --check`` for display in the
+report. See plan section 4.
+``git_dir`` is resolved via ``git rev-parse --git-dir`` so worktree and
+submodule gitlink cases (where ``<repo>/.git`` is a file, not a directory)
+work correctly. See plan section 1.
+"""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+from agentcam.models import ChangedFile, ChangeStatus, GitState
+# Order matters: the first matching marker wins. ``rebase-merge`` and
+# ``rebase-apply`` are checked before ``REVERT_HEAD`` etc.
+_PRE_EXISTING_OP_MARKERS: tuple[tuple[str, str], ...] = (
+    ("MERGE_HEAD", "merge"),
+    ("rebase-merge", "rebase"),
+    ("rebase-apply", "rebase"),
+    ("CHERRY_PICK_HEAD", "cherry-pick"),
+    ("REVERT_HEAD", "revert"),
+    ("BISECT_LOG", "bisect"),
+)
+class NotAGitRepoError(RuntimeError):
+    """Raised when the cwd is not inside a git repository."""
+def is_git_repo(cwd: Path) -> bool:
+    return _git(cwd, "rev-parse", "--git-dir", check=False).returncode == 0
+def resolve_git_dir(cwd: Path) -> Path:
+    """Absolute path of the real git directory.
+    ``git rev-parse --git-dir`` resolves worktree / submodule gitlink files
+    for us, so we never have to read or parse ``<repo>/.git`` ourselves.
+    """
+    text = _git_text(cwd, "rev-parse", "--git-dir")
+    p = Path(text)
+    if not p.is_absolute():
+        p = (cwd / p).resolve()
+    return p
+def resolve_git_root(cwd: Path) -> Path:
+    """Absolute path of the working tree root."""
+    return Path(_git_text(cwd, "rev-parse", "--show-toplevel"))
+def detect_pre_existing_op(git_dir: Path) -> str | None:
+    """Return operation name (merge / rebase / cherry-pick / etc.) or None."""
+    for filename, op in _PRE_EXISTING_OP_MARKERS:
+        if (git_dir / filename).exists():
+            return op
+    return None
+def collect_git_state(cwd: Path, *, is_after: bool = False) -> GitState:
+    """Snapshot git state. ``is_after=True`` also runs ``git diff --check``."""
+    if not is_git_repo(cwd):
+        raise NotAGitRepoError(
+            "Not in a git repository. Initialize one with 'git init' first."
+        )
+    git_dir = resolve_git_dir(cwd)
+    head = _safe_head(cwd)
+    branch_raw = _git_text(cwd, "branch", "--show-current")
+    branch = branch_raw or None
+    is_detached = head is not None and not branch
+    porcelain_raw = _git(cwd, "status", "--porcelain=v1", "-z").stdout
+    diff_stat = _git_text(cwd, "diff", "--stat", check=False)
+    diff_stat_cached = _git_text(cwd, "diff", "--cached", "--stat", check=False)
+    diff_name_status = _git_text(cwd, "diff", "--name-status", check=False)
+    diff_name_status_cached = _git_text(
+        cwd, "diff", "--cached", "--name-status", check=False
+    )
+    diff_check = ""
+    diff_check_cached = ""
+    if is_after:
+        diff_check = _git_text(cwd, "diff", "--check", check=False)
+        diff_check_cached = _git_text(
+            cwd, "diff", "--cached", "--check", check=False
+        )
+    pre_existing_op = detect_pre_existing_op(git_dir)
+    changed_files = parse_porcelain_v1z(porcelain_raw)
+    return GitState(
+        head=head,
+        branch=branch,
+        is_detached_head=is_detached,
+        porcelain_raw=porcelain_raw,
+        diff_stat=diff_stat,
+        diff_stat_cached=diff_stat_cached,
+        diff_name_status=diff_name_status,
+        diff_name_status_cached=diff_name_status_cached,
+        diff_check=diff_check,
+        diff_check_cached=diff_check_cached,
+        pre_existing_op=pre_existing_op,
+        changed_files=changed_files,
+    )
+def is_working_tree_dirty(state: GitState) -> bool:
+    """True if there are any staged, unstaged, or untracked changes."""
+    return bool(state.changed_files)
+# ---------------------------------------------------------------------------
+# Porcelain v1 -z parser
+# ---------------------------------------------------------------------------
+def parse_porcelain_v1z(data: bytes) -> list[ChangedFile]:
+    """Parse ``git status --porcelain=v1 -z`` output.
+    Each entry is ``XY<space><path>\\x00``. R/C (rename / copy) entries take
+    two NUL-separated fields: ``XY<space><new>\\x00<old>\\x00``.
+    """
+    if not data:
+        return []
+    tokens = data.split(b"\x00")
+    results: list[ChangedFile] = []
+    i = 0
+    while i < len(tokens):
+        tok = tokens[i]
+        if not tok:
+            i += 1
+            continue
+        if len(tok) < 3:
+            # Malformed entry; skip defensively rather than crash.
+            i += 1
+            continue
+        x = chr(tok[0])
+        y = chr(tok[1])
+        # tok[2] is the separator (typically a space). Path bytes start at 3.
+        path = tok[3:].decode("utf-8", errors="replace")
+        rename_from: str | None = None
+        if x in ("R", "C") or y in ("R", "C"):
+            i += 1
+            if i < len(tokens):
+                rename_from = tokens[i].decode("utf-8", errors="replace")
+        status = _classify_status(x, y)
+        results.append(
+            ChangedFile(path=path, status=status, rename_from=rename_from)
+        )
+        i += 1
+    return results
+def _classify_status(x: str, y: str) -> ChangeStatus:
+    xy = x + y
+    if xy == "??":
+        return "untracked"
+    if x == "U" or y == "U" or xy in ("AA", "DD"):
+        return "unmerged"
+    if x in ("R", "C") or y in ("R", "C"):
+        return "renamed"
+    if x != " " and x not in ("?", "!"):
+        if x == "D":
+            return "staged_deleted"
+        return "staged"
+    if y == "M":
+        return "unstaged_modified"
+    if y == "D":
+        return "unstaged_deleted"
+    # Defensive fallback.
+    return "unstaged_modified"
+# ---------------------------------------------------------------------------
+# Low-level git helpers
+# ---------------------------------------------------------------------------
+def _git(
+    cwd: Path,
+    *args: str,
+    check: bool = True,
+) -> subprocess.CompletedProcess[bytes]:
+    return subprocess.run(
+        ["git", *args],
+        cwd=cwd,
+        capture_output=True,
+        check=check,
+    )
+def _git_text(cwd: Path, *args: str, check: bool = True) -> str:
+    res = _git(cwd, *args, check=check)
+    return res.stdout.decode("utf-8", errors="replace").rstrip("\n")
+def _safe_head(cwd: Path) -> str | None:
+    """Return HEAD SHA, or None if HEAD does not resolve (empty repo)."""
+    res = _git(cwd, "rev-parse", "HEAD", check=False)
+    if res.returncode != 0:
+        return None
+    return res.stdout.decode("utf-8", errors="replace").strip() or None

agentcam/models.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""Data structures used across agentcam modules.
+Plain dataclasses (no Pydantic) to keep dependencies to the standard library
+only. JSON serialization is done by ``report.py`` and the manifest writer, not
+by these classes.
+See ``docs/design.md`` (forthcoming) for the schema design rationale.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Literal
+# Two-level risk taxonomy (v0.1). LOW was dropped — see design.md decision 8.
+RiskLevel = Literal["HIGH", "MEDIUM"]
+ChangeStatus = Literal[
+    "staged",
+    "staged_deleted",
+    "unstaged_modified",
+    "unstaged_deleted",
+    "untracked",
+    "renamed",
+    "unmerged",
+]
+# Source of the exit interpretation in manifest.exit_detail.
+InterpretationSource = Literal[
+    "known_table",
+    "signal",
+    "user_defined",
+    "unknown",
+]
+@dataclass(frozen=True, slots=True)
+class RunId:
+    """Identifier for an agentcam run.
+    Format: ``YYYYMMDD-HHMMSS-<ms>-<slug>[-<hex>]``
+    where ``<hex>`` is a 4-char collision-avoidance suffix added on retry.
+    """
+    text: str
+    def __str__(self) -> str:
+        return self.text
+@dataclass(frozen=True, slots=True)
+class RunPaths:
+    """Filesystem layout for a single agentcam run.
+    All paths live under ``<git_dir>/agentcam/runs/<run_id>/``. ``git_dir`` is
+    the *real* git dir as resolved by ``git rev-parse --git-dir`` (handles
+    worktrees and submodule gitlinks correctly).
+    """
+    run_dir: str
+    manifest_json: str
+    report_md: str
+    stdout_raw: str
+    stderr_raw: str
+    stdout_redacted: str
+    stderr_redacted: str
+@dataclass
+class ChangedFile:
+    """A file modified between pre-run and post-run git state."""
+    path: str
+    status: ChangeStatus
+    rename_from: str | None = None
+    secret_like_name: bool = False  # True if filename matches secret-like pattern
+@dataclass
+class RiskFlag:
+    """A single risk observation. evidence must not contain raw secrets."""
+    level: RiskLevel
+    rule: str
+    evidence: str
+@dataclass
+class GitState:
+    """Snapshot of git state (before or after the wrapped command)."""
+    head: str | None
+    branch: str | None
+    is_detached_head: bool
+    porcelain_raw: bytes
+    diff_stat: str
+    diff_stat_cached: str
+    diff_name_status: str
+    diff_name_status_cached: str
+    diff_check: str = ""
+    diff_check_cached: str = ""
+    pre_existing_op: str | None = None  # 'merge' | 'rebase' | 'cherry-pick' | ...
+    changed_files: list[ChangedFile] = field(default_factory=list)
+@dataclass
+class ExitDetail:
+    """Exit status detail, written to manifest and Exit Code Detail section.
+    See plan section 9 (Exit code pass-through).
+    """
+    wrapper_exit: int  # 0 or 1
+    raw_returncode: int
+    raw_returncode_hex: str | None
+    platform: str
+    interpretation: str
+    interpretation_source: InterpretationSource
+@dataclass
+class RunManifest:
+    """Top-level run manifest, serialized to ``manifest.json``."""
+    schema_version: str
+    run_id: str
+    started_at: datetime
+    ended_at: datetime | None
+    duration_seconds: float | None
+    cwd: str
+    git_root: str
+    git_dir: str
+    branch: str | None
+    is_detached_head: bool
+    head_before: str | None
+    head_after: str | None
+    pre_existing_op: str | None
+    pre_run_dirty: bool
+    command_argv_raw: list[str]
+    command_argv_redacted: list[str]
+    exit_detail: ExitDetail | None
+    shell_used: bool
+    terminal_forward_degraded: bool
+    platform: str
+    agentcam_version: str
+    paths: RunPaths