PyPI - sieve-layer - Versions diffs - 0.1.0__py3-none-any.whl - Mend

sieve-layer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

sieve/__init__.py +35 -0
sieve/approval/__init__.py +4 -0
sieve/approval/base.py +32 -0
sieve/approval/cli.py +64 -0
sieve/audit/__init__.py +0 -0
sieve/audit/log.py +274 -0
sieve/audit/models.py +29 -0
sieve/cli.py +129 -0
sieve/config.py +55 -0
sieve/core/__init__.py +0 -0
sieve/core/cost.py +108 -0
sieve/core/decision.py +50 -0
sieve/core/errors.py +46 -0
sieve/core/interceptor.py +271 -0
sieve/core/similarity.py +161 -0
sieve/decorator.py +71 -0
sieve/integrations/__init__.py +0 -0
sieve/integrations/langchain.py +191 -0
sieve/integrations/mcp.py +61 -0
sieve/policy/__init__.py +0 -0
sieve/policy/engine.py +59 -0
sieve/policy/loader.py +227 -0
sieve/policy/models.py +55 -0
sieve_layer-0.1.0.dist-info/METADATA +387 -0
sieve_layer-0.1.0.dist-info/RECORD +28 -0
sieve_layer-0.1.0.dist-info/WHEEL +4 -0
sieve_layer-0.1.0.dist-info/entry_points.txt +2 -0
sieve_layer-0.1.0.dist-info/licenses/LICENSE +21 -0

sieve/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""Sieve — drop-in AI agent tool call interceptor.
+Public API::
+    import sieve
+    # 1. Configure once at startup
+    sieve.configure(policy_path="policy.yaml", db_path="audit.db")
+    # 2. Guard plain functions
+    @sieve.guard()
+    def postgres_query(query: str) -> str:
+        ...
+    # 3. Guard LangChain tools
+    from sieve.integrations.langchain import wrap_tool
+    safe_tool = wrap_tool(my_langchain_tool)
+    # 4. Guard MCP servers
+    from sieve.integrations.mcp import SieveMiddleware
+    server.add_middleware(SieveMiddleware)
+"""
+from sieve.config import configure, get_interceptor
+from sieve.core.errors import ApprovalDenied, CostLimitExceeded, PolicyViolation
+from sieve.decorator import guard
+__all__ = [
+    "configure",
+    "get_interceptor",
+    "guard",
+    "PolicyViolation",
+    "ApprovalDenied",
+    "CostLimitExceeded",
+]

sieve/approval/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from sieve.approval.base import ApprovalHandler, StaticApprovalHandler
+from sieve.approval.cli import CLIApprovalHandler
+__all__ = ["ApprovalHandler", "CLIApprovalHandler", "StaticApprovalHandler"]

sieve/approval/base.py ADDED Viewed

@@ -0,0 +1,32 @@
+from __future__ import annotations
+from typing import TYPE_CHECKING, Protocol, runtime_checkable
+if TYPE_CHECKING:
+    from sieve.core.decision import ToolCall
+@runtime_checkable
+class ApprovalHandler(Protocol):
+    """Interface for approval mechanisms.
+    Implementations must return True if the operator approves the call,
+    False to deny. Should not raise; denial is expressed via the return value.
+    """
+    def request(self, call: ToolCall) -> bool:
+        """Synchronously request approval for a tool call.
+        Returns True (approved) or False (denied).
+        """
+        ...
+class StaticApprovalHandler:
+    """Non-interactive approval handler for tests, CI, and headless agents."""
+    def __init__(self, approve: bool = False) -> None:
+        self.approve = approve
+    def request(self, call: ToolCall) -> bool:
+        return self.approve

sieve/approval/cli.py ADDED Viewed

@@ -0,0 +1,64 @@
+from __future__ import annotations
+import json
+import select
+import sys
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from sieve.core.decision import ToolCall
+class CLIApprovalHandler:
+    """Interactive terminal approval prompt.
+    Blocks execution and asks the operator y/N. Defaults to deny on any
+    non-affirmative input, EOF, keyboard interrupt, or timeout.
+    """
+    def __init__(
+        self,
+        timeout_seconds: float | None = None,
+        timeout_default: bool = False,
+    ) -> None:
+        self.timeout_seconds = timeout_seconds
+        self.timeout_default = timeout_default
+    def request(self, call: ToolCall) -> bool:
+        args_display = json.dumps(call.args, indent=2, default=str)
+        print(
+            f"\n{'='*60}\n"
+            f"[SIEVE] Approval required for tool call:\n"
+            f"  Tool : {call.name}\n"
+            f"  Args : {args_display}\n"
+            f"{'='*60}",
+            file=sys.stderr,
+        )
+        try:
+            answer = self._read_answer().strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            print("\n[SIEVE] Input interrupted — denying by default.", file=sys.stderr)
+            return False
+        approved = answer in ("y", "yes")
+        status = "APPROVED" if approved else "DENIED"
+        print(f"[SIEVE] {status}.\n", file=sys.stderr)
+        return approved
+    def _read_answer(self) -> str:
+        prompt = "Approve this call? [y/N]: "
+        if self.timeout_seconds is None:
+            return input(prompt)
+        print(prompt, end="", flush=True)
+        readable, _, _ = select.select([sys.stdin], [], [], self.timeout_seconds)
+        if readable:
+            return sys.stdin.readline()
+        status = "approving" if self.timeout_default else "denying"
+        print(
+            f"\n[SIEVE] Approval timed out after {self.timeout_seconds:g}s — "
+            f"{status} by default.",
+            file=sys.stderr,
+        )
+        return "y" if self.timeout_default else "n"

sieve/audit/__init__.py ADDED Viewed

File without changes

sieve/audit/log.py ADDED Viewed

@@ -0,0 +1,274 @@
+from __future__ import annotations
+import hashlib
+import json
+import os
+import re
+import sqlite3
+import stat
+import threading
+import time
+from contextlib import closing
+from pathlib import Path
+from typing import Any
+from sieve.audit.models import AuditEntry
+from sieve.core.decision import Decision, ToolCall
+_GENESIS_HASH = "0" * 64
+_SQLITE_TIMEOUT_SECONDS = 30.0
+_SECRET_KEY_RE = re.compile(
+    r"(api[_-]?key|authorization|access[_-]?token|auth[_-]?token|bearer|client[_-]?secret|"
+    r"password|private[_-]?key|refresh[_-]?token|secret)",
+    re.IGNORECASE,
+)
+_SECRET_VALUE_RE = re.compile(
+    r"(?i)\b(sk-[A-Za-z0-9_-]{12,}|xox[baprs]-[A-Za-z0-9-]{12,}|"
+    r"gh[pousr]_[A-Za-z0-9_]{12,}|Bearer\s+[A-Za-z0-9._~+/=-]{12,})\b"
+)
+_REDACTED = "[REDACTED]"
+_DDL = """
+CREATE TABLE IF NOT EXISTS audit_log (
+    id          INTEGER PRIMARY KEY AUTOINCREMENT,
+    timestamp   REAL    NOT NULL,
+    tool_name   TEXT    NOT NULL,
+    tool_args   TEXT    NOT NULL,
+    outcome     TEXT    NOT NULL,
+    rule_name   TEXT,
+    approved_by TEXT,
+    error       TEXT,
+    metadata    TEXT,
+    prev_hash   TEXT    NOT NULL,
+    entry_hash  TEXT    NOT NULL
+);
+"""
+_AUDIT_COLUMNS = (
+    "id, timestamp, tool_name, tool_args, outcome, rule_name, approved_by, "
+    "error, metadata, prev_hash, entry_hash"
+)
+def _canonical_json(fields: dict[str, Any]) -> str:
+    """Deterministic JSON serialization with sorted keys and no extra whitespace."""
+    return json.dumps(fields, sort_keys=True, separators=(",", ":"), default=str)
+def _compute_hash(prev_hash: str, fields: dict[str, Any]) -> str:
+    payload = prev_hash + _canonical_json(fields)
+    return hashlib.sha256(payload.encode()).hexdigest()
+def redact_sensitive(value: Any) -> Any:
+    """Return a JSON-like copy with likely credentials removed before auditing."""
+    if isinstance(value, dict):
+        redacted: dict[str, Any] = {}
+        for key, item in value.items():
+            key_text = str(key)
+            redacted[key] = _REDACTED if _SECRET_KEY_RE.search(key_text) else redact_sensitive(item)
+        return redacted
+    if isinstance(value, list):
+        return [redact_sensitive(item) for item in value]
+    if isinstance(value, tuple):
+        return [redact_sensitive(item) for item in value]
+    if isinstance(value, str):
+        return _SECRET_VALUE_RE.sub(_REDACTED, value)
+    return value
+class AuditLog:
+    """Thread-safe, SQLite-backed, hash-chained audit log.
+    Each appended entry captures the sha256 of the previous entry's hash,
+    making any retrospective modification detectable via verify_chain().
+    """
+    def __init__(self, db_path: str | Path = "sieve_audit.db") -> None:
+        self._db_path = str(db_path)
+        self._lock = threading.Lock()
+        self._ensure_private_db_file()
+        self._init_db()
+    def _connect(self) -> sqlite3.Connection:
+        conn = sqlite3.connect(
+            self._db_path,
+            timeout=_SQLITE_TIMEOUT_SECONDS,
+            isolation_level=None,
+        )
+        conn.row_factory = sqlite3.Row
+        conn.execute(f"PRAGMA busy_timeout = {int(_SQLITE_TIMEOUT_SECONDS * 1000)}")
+        if self._db_path != ":memory:":
+            conn.execute("PRAGMA journal_mode = WAL")
+        return conn
+    def _ensure_private_db_file(self) -> None:
+        if self._db_path == ":memory:":
+            return
+        path = Path(self._db_path)
+        if path.exists() and path.is_dir():
+            raise ValueError(f"Audit database path points to a directory: {path}")
+        path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
+        except FileExistsError:
+            current = stat.S_IMODE(path.stat().st_mode)
+            private = current & ~0o077
+            if current != private:
+                os.chmod(path, private)
+        else:
+            os.close(fd)
+    def _init_db(self) -> None:
+        with closing(self._connect()) as conn:
+            conn.execute("BEGIN IMMEDIATE")
+            conn.execute(_DDL)
+            columns = {
+                row["name"]
+                for row in conn.execute("PRAGMA table_info(audit_log)").fetchall()
+            }
+            if "metadata" not in columns:
+                conn.execute("ALTER TABLE audit_log ADD COLUMN metadata TEXT")
+            conn.commit()
+    def _last_hash(self, conn: sqlite3.Connection) -> str:
+        row = conn.execute(
+            "SELECT entry_hash FROM audit_log ORDER BY id DESC LIMIT 1"
+        ).fetchone()
+        return row["entry_hash"] if row else _GENESIS_HASH
+    def append(
+        self,
+        call: ToolCall,
+        decision: Decision,
+        *,
+        approved_by: str | None = None,
+        error: str | None = None,
+        metadata: dict[str, Any] | None = None,
+    ) -> AuditEntry:
+        """Append a new entry to the chain and return the written AuditEntry."""
+        with self._lock:
+            with closing(self._connect()) as conn:
+                conn.execute("BEGIN IMMEDIATE")
+                prev_hash = self._last_hash(conn)
+                now = time.time()
+                audit_args = redact_sensitive(call.args)
+                audit_metadata = redact_sensitive(metadata) if metadata is not None else None
+                hashable = {
+                    "timestamp": now,
+                    "tool_name": call.name,
+                    "tool_args": audit_args,
+                    "outcome": decision.outcome.value,
+                    "rule_name": decision.rule_name,
+                    "approved_by": approved_by,
+                    "error": error,
+                    "metadata": audit_metadata,
+                }
+                entry_hash = _compute_hash(prev_hash, hashable)
+                cursor = conn.execute(
+                    """
+                    INSERT INTO audit_log
+                        (timestamp, tool_name, tool_args, outcome, rule_name,
+                         approved_by, error, metadata, prev_hash, entry_hash)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        now,
+                        call.name,
+                        _canonical_json(audit_args),
+                        decision.outcome.value,
+                        decision.rule_name,
+                        approved_by,
+                        error,
+                        _canonical_json(audit_metadata) if audit_metadata is not None else None,
+                        prev_hash,
+                        entry_hash,
+                    ),
+                )
+                conn.commit()
+                return AuditEntry(
+                    id=cursor.lastrowid,
+                    timestamp=now,
+                    tool_name=call.name,
+                    tool_args=audit_args,
+                    outcome=decision.outcome.value,
+                    rule_name=decision.rule_name,
+                    approved_by=approved_by,
+                    error=error,
+                    metadata=audit_metadata,
+                    prev_hash=prev_hash,
+                    entry_hash=entry_hash,
+                )
+    def verify_chain(self) -> tuple[bool, str]:
+        """Verify the integrity of the entire audit chain.
+        Returns (True, message) if valid, (False, description_of_first_violation) if tampered.
+        """
+        with closing(self._connect()) as conn:
+            rows = conn.execute(
+                f"SELECT {_AUDIT_COLUMNS} FROM audit_log ORDER BY id ASC"
+            ).fetchall()
+        if not rows:
+            return True, "Audit log is empty — chain is vacuously valid."
+        prev_hash = _GENESIS_HASH
+        for row in rows:
+            stored_prev = row["prev_hash"]
+            if stored_prev != prev_hash:
+                return False, (
+                    f"Chain broken at id={row['id']}: "
+                    f"stored prev_hash {stored_prev!r} != expected {prev_hash!r}"
+                )
+            hashable = {
+                "timestamp": row["timestamp"],
+                "tool_name": row["tool_name"],
+                "tool_args": json.loads(row["tool_args"]),
+                "outcome": row["outcome"],
+                "rule_name": row["rule_name"],
+                "approved_by": row["approved_by"],
+                "error": row["error"],
+                "metadata": json.loads(row["metadata"]) if row["metadata"] else None,
+            }
+            expected_hash = _compute_hash(prev_hash, hashable)
+            if row["entry_hash"] != expected_hash:
+                legacy_hashable = dict(hashable)
+                legacy_hashable.pop("metadata")
+                legacy_expected_hash = _compute_hash(prev_hash, legacy_hashable)
+                if row["entry_hash"] != legacy_expected_hash:
+                    return False, (
+                        f"Hash mismatch at id={row['id']}: "
+                        f"stored {row['entry_hash']!r} != computed {expected_hash!r}"
+                    )
+            prev_hash = row["entry_hash"]
+        return True, f"Chain intact — {len(rows)} entries verified."
+    def tail(self, n: int = 20) -> list[AuditEntry]:
+        """Return the n most recent audit entries, oldest first."""
+        if isinstance(n, bool) or not isinstance(n, int):
+            raise ValueError("AuditLog.tail limit must be an integer.")
+        if n < 1:
+            raise ValueError("AuditLog.tail limit must be greater than zero.")
+        with closing(self._connect()) as conn:
+            rows = conn.execute(
+                f"SELECT {_AUDIT_COLUMNS} FROM audit_log ORDER BY id DESC LIMIT ?", (n,)
+            ).fetchall()
+        return [
+            AuditEntry(
+                id=row["id"],
+                timestamp=row["timestamp"],
+                tool_name=row["tool_name"],
+                tool_args=json.loads(row["tool_args"]),
+                outcome=row["outcome"],
+                rule_name=row["rule_name"],
+                approved_by=row["approved_by"],
+                error=row["error"],
+                metadata=json.loads(row["metadata"]) if row["metadata"] else None,
+                prev_hash=row["prev_hash"],
+                entry_hash=row["entry_hash"],
+            )
+            for row in reversed(rows)
+        ]

sieve/audit/models.py ADDED Viewed

@@ -0,0 +1,29 @@
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+@dataclass(frozen=True)
+class AuditEntry:
+    """A single record in the tamper-evident audit chain.
+    Fields included in the hash:
+        timestamp, tool_name, redacted tool_args, outcome, rule_name,
+        approved_by, error, redacted metadata
+    prev_hash:   sha256 of the previous entry's entry_hash (64 zero chars for genesis).
+    entry_hash:  sha256(prev_hash + canonical_json(hashable_fields)).
+    """
+    id: int
+    timestamp: float
+    tool_name: str
+    tool_args: dict[str, Any]
+    outcome: str          # Outcome enum value
+    rule_name: str | None
+    approved_by: str | None   # identifier of who approved (e.g. "cli:operator")
+    error: str | None
+    metadata: dict[str, Any] | None
+    prev_hash: str
+    entry_hash: str

sieve/cli.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Sieve CLI — inspect and verify the tamper-evident audit log.
+Commands:
+    sieve verify <db>   Verify the hash chain integrity of an audit database.
+    sieve tail <db>     Print the most recent audit entries.
+"""
+from __future__ import annotations
+import argparse
+import datetime
+import json
+import sys
+from pathlib import Path
+from sieve.audit.models import AuditEntry
+def _existing_database_path(raw_path: str | Path) -> Path:
+    db_path = Path(raw_path)
+    if not db_path.exists():
+        raise FileNotFoundError(f"database not found: {db_path}")
+    if not db_path.is_file():
+        raise ValueError(f"database path is not a file: {db_path}")
+    return db_path
+def _format_entry(entry: AuditEntry) -> str:
+    ts = datetime.datetime.fromtimestamp(entry.timestamp).strftime("%Y-%m-%d %H:%M:%S")
+    parts = [
+        f"[{entry.id:>6}] {ts}  {entry.outcome.upper():<20} tool={entry.tool_name}",
+        f"         args={json.dumps(entry.tool_args, separators=(',', ':'), default=str)}",
+    ]
+    if entry.rule_name:
+        parts.append(f"         rule={entry.rule_name}")
+    if entry.approved_by:
+        parts.append(f"         approved_by={entry.approved_by}")
+    if entry.error:
+        parts.append(f"         error={entry.error}")
+    if entry.metadata:
+        parts.append(
+            "         metadata="
+            + json.dumps(entry.metadata, sort_keys=True, separators=(",", ":"), default=str)
+        )
+    parts.append(f"         hash={entry.entry_hash[:16]}...")
+    return "\n".join(parts)
+def cmd_verify(args: argparse.Namespace) -> int:
+    try:
+        db_path = _existing_database_path(args.db)
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        return 1
+    from sieve.audit.log import AuditLog
+    log = AuditLog(db_path)
+    valid, message = log.verify_chain()
+    if valid:
+        print(f"✓ {message}")
+        return 0
+    else:
+        print(f"✗ TAMPER DETECTED: {message}", file=sys.stderr)
+        return 2
+def cmd_tail(args: argparse.Namespace) -> int:
+    try:
+        db_path = _existing_database_path(args.db)
+        entries = _tail_entries(db_path, args.n)
+    except (FileNotFoundError, ValueError) as exc:
+        print(f"Error: {exc}", file=sys.stderr)
+        return 1
+    if not entries:
+        print("Audit log is empty.")
+        return 0
+    for entry in entries:
+        print(_format_entry(entry))
+        print()
+    return 0
+def _tail_entries(db_path: Path, limit: int) -> list[AuditEntry]:
+    from sieve.audit.log import AuditLog
+    log = AuditLog(db_path)
+    return log.tail(n=limit)
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="sieve",
+        description="Sieve audit log inspector",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    verify_p = sub.add_parser("verify", help="Verify the hash chain integrity")
+    verify_p.add_argument("db", help="Path to the SQLite audit database")
+    tail_p = sub.add_parser("tail", help="Print the most recent audit entries")
+    tail_p.add_argument("db", help="Path to the SQLite audit database")
+    tail_p.add_argument(
+        "-n",
+        type=int,
+        default=20,
+        help="Number of entries to show (default: 20)",
+    )
+    return parser
+def main() -> None:
+    parser = build_parser()
+    args = parser.parse_args()
+    if args.command == "verify":
+        sys.exit(cmd_verify(args))
+    elif args.command == "tail":
+        sys.exit(cmd_tail(args))
+    else:
+        parser.print_help()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

sieve/config.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from sieve.approval.base import ApprovalHandler
+from sieve.approval.cli import CLIApprovalHandler
+from sieve.audit.log import AuditLog
+from sieve.core.cost import TaskCostTracker
+from sieve.core.interceptor import Interceptor
+from sieve.core.similarity import SimilarityCircuitBreaker
+from sieve.policy.engine import PolicyEngine
+from sieve.policy.loader import load_policy
+@dataclass
+class SieveConfig:
+    policy_path: str | Path
+    db_path: str | Path = "sieve_audit.db"
+    approval_handler: ApprovalHandler = field(default_factory=CLIApprovalHandler)
+_interceptor: Interceptor | None = None
+def configure(
+    policy_path: str | Path,
+    db_path: str | Path = "sieve_audit.db",
+    approval_handler: ApprovalHandler | None = None,
+) -> Interceptor:
+    """Initialize the global Sieve interceptor.
+    Must be called once at application startup before any guarded tool executes.
+    Calling again replaces the existing interceptor (safe for tests).
+    """
+    global _interceptor
+    handler = approval_handler if approval_handler is not None else CLIApprovalHandler()
+    policy = load_policy(policy_path)
+    engine = PolicyEngine(policy)
+    audit = AuditLog(db_path)
+    circuit_breaker = SimilarityCircuitBreaker(policy.circuit_breakers)
+    cost_tracker = TaskCostTracker(policy.max_cost_per_task)
+    _interceptor = Interceptor(engine, audit, handler, circuit_breaker, cost_tracker)
+    return _interceptor
+def get_interceptor() -> Interceptor:
+    """Return the global interceptor, raising if configure() has not been called."""
+    if _interceptor is None:
+        raise RuntimeError(
+            "Sieve has not been configured. Call sieve.configure(policy_path=...) "
+            "before using any guarded tools."
+        )
+    return _interceptor

sieve/core/__init__.py ADDED Viewed

File without changes