PyPI - controlzero - Versions diffs - 1.6.0__tar.gz → 1.7.0__tar.gz - Mend

controlzero 1.6.0tar.gz → 1.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

{controlzero-1.6.0 → controlzero-1.7.0}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,32 @@
 # Changelog
+## 1.7.0 -- 2026-05-19 (T86, gh#391)
+### Added
+- **Unknown-action validator at policy-load time** (T86, GitHub #391).
+  When `controlzero.policy_loader.load_policy()` parses a policy whose
+  rules target an action name that is not in the canonical-or-alias
+  table (typo, made-up name like `database:queryy`), the loader now
+  emits a `logging.WARNING` per offending action with a did-you-mean
+  suggestion list. The policy still loads -- the validator is
+  warn-not-block at the SDK level (the platform backend blocks publish
+  with 422 on the same condition).
+  This catches the silent "rule lands but never fires" class of bug
+  that T84's alias shim was created to prevent: a customer typing
+  `database:queryy` gets a one-line warning pointing at
+  `database:query (legacy)` instead of the rule silently never matching.
+  The validator's known-action set is the union of canonical SDK
+  extractor tools, host-tool aliases (e.g. `Read` -> `file_read`), the
+  four canonical SQL semantic classes plus every legacy alias from the
+  T84 alias table, and wildcards (`*`, `tool:*`, `*:method`). Adding
+  a new alias to `_internal/action_aliases.py` automatically widens
+  what the validator accepts.
+  See `docs/concepts/policies.md#validation` for the full contract.
 ## v1.6.0 -- 2026-05-17 (HITL-6a, gh#542)
 First minor that turns the Human-in-the-Loop approval workflow on. 1.5.8

{controlzero-1.6.0 → controlzero-1.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: controlzero
-Version: 1.6.0
+Version: 1.7.0
 Summary: AI agent governance: policies, audit, and observability for tool calls. Works locally with no signup.
 Project-URL: Homepage, https://controlzero.ai
 Project-URL: Documentation, https://docs.controlzero.ai

{controlzero-1.6.0 → controlzero-1.7.0}/controlzero/__init__.py RENAMED Viewed

@@ -29,7 +29,7 @@ from controlzero.errors import (
 from controlzero.hitl import PendingApproval
 from controlzero.policy_loader import load_policy
-__version__ = "1.6.0"
+__version__ = "1.7.0"
 __all__ = [
     "Client",

controlzero-1.7.0/controlzero/_internal/action_validator.py ADDED Viewed

@@ -0,0 +1,182 @@
+"""T86 / GitHub #391 -- unknown-action validator (warn-only at SDK load).
+Pairs with the backend validator at
+``apps/control-zero-platform/backend/internal/policy/action_aliases.go``.
+The backend BLOCKS publish on unknown actions (422); the SDK
+WARNS at load time so a customer running local-policy mode (no
+backend) still sees the typo before the rule silently never fires.
+The known-action set is the union of:
+- Canonical tools (``database``, ``Bash``, ``http``, ``web_search``,
+  ``browser``, ``file_read``, ``file_write``, ``file_search``,
+  ``task``) plus their host-tool aliases from the SDK extractor
+  spec (``sdks/python/controlzero/controlzero/_internal/tool_extractors.json``).
+- For the ``database`` tool: the four canonical SQL semantic classes
+  (``read``/``write``/``admin``/``exec``), every legacy alias from
+  the T84 alias table, and the ambiguous ``delete`` alias.
+For every other tool the validator accepts ANY method (open
+extractor outputs -- Bash basenames, HTTP verbs, browser action
+strings, etc.). Wildcards (``*``, ``tool:*``, ``*:method``) always
+pass.
+"""
+from __future__ import annotations
+from typing import Iterable
+from controlzero._internal.action_aliases import TOOL as _ALIAS_TOOL
+from controlzero._internal.action_aliases import _AMBIGUOUS, _CLASSES
+# Mirror of the canonical tool set + host-aliases the extractors
+# accept. Source of truth is tool_extractors.json; this list is
+# updated alongside it.
+_CANONICAL_TOOLS: set[str] = {
+    "Bash", "database", "http", "web_search", "browser",
+    "file_read", "file_write", "file_search", "task",
+    # database aliases
+    "sql", "Database", "PostgreSQL", "MySQL", "postgres", "sqlite",
+    # Bash aliases
+    "bash", "shell", "ShellTool", "run_shell_command",
+    "PowerShell", "powershell", "Shell",
+    # http aliases
+    "fetch", "web_fetch", "WebFetch", "HTTPRequest", "request",
+    # web_search aliases
+    "WebSearch", "google_web_search", "SearchTool",
+    # browser aliases
+    "playwright", "Puppeteer",
+    # file_read aliases
+    "read_file", "Read", "ReadFile", "read_many_files",
+    # file_write aliases
+    "write_file", "Write", "WriteFile", "edit_file", "Edit",
+    "replace", "apply_patch",
+    # file_search aliases
+    "Grep", "grep_search", "Glob", "glob",
+    # task aliases
+    "Task", "Agent", "subagent", "spawn_agent",
+}
+_DATABASE_TOOL_ALIASES = {
+    "database", "sql", "Database", "PostgreSQL", "MySQL", "postgres", "sqlite",
+}
+def _build_known_database_methods() -> set[str]:
+    out: set[str] = {"*"}
+    for cls, aliases in _CLASSES.items():
+        out.add(cls)
+        for a in aliases:
+            out.add(a)
+    for alias in _AMBIGUOUS:
+        out.add(alias)
+    return out
+_KNOWN_DATABASE_METHODS = _build_known_database_methods()
+def is_known_action(action: str) -> bool:
+    """Return True if ``action`` is recognised by the SDK extractors / aliases."""
+    if not action:
+        return False
+    if action == "*":
+        return True
+    if ":" not in action:
+        return action in _CANONICAL_TOOLS
+    tool, _, method = action.partition(":")
+    if tool == "*":
+        return True
+    if tool not in _CANONICAL_TOOLS:
+        return False
+    if method == "*" or method == "":
+        return True
+    if tool in _DATABASE_TOOL_ALIASES:
+        return method in _KNOWN_DATABASE_METHODS
+    # Other tools: any method accepted (open extractor outputs).
+    return True
+def _levenshtein(a: str, b: str) -> int:
+    if a == b:
+        return 0
+    if not a:
+        return len(b)
+    if not b:
+        return len(a)
+    prev = list(range(len(b) + 1))
+    curr = [0] * (len(b) + 1)
+    for i in range(1, len(a) + 1):
+        curr[0] = i
+        for j in range(1, len(b) + 1):
+            cost = 0 if a[i - 1] == b[j - 1] else 1
+            curr[j] = min(prev[j] + 1, curr[j - 1] + 1, prev[j - 1] + cost)
+        prev, curr = curr, prev
+    return prev[len(b)]
+def _shares_tool_prefix(a: str, b: str) -> bool:
+    if ":" not in a or ":" not in b:
+        return False
+    ta, _, ma = a.partition(":")
+    tb, _, mb = b.partition(":")
+    if ta != tb or not ma or not mb:
+        return False
+    short = min(len(ma), len(mb))
+    overlap = 0
+    for i in range(short):
+        if ma[i].lower() != mb[i].lower():
+            break
+        overlap += 1
+    return overlap * 2 >= short
+def _candidates() -> list[tuple[str, bool]]:
+    """Enumerate (name, is_legacy) tuples for suggestion ranking."""
+    out: list[tuple[str, bool]] = []
+    for cls in _CLASSES:
+        out.append((f"{_ALIAS_TOOL}:{cls}", False))
+    for aliases in _CLASSES.values():
+        for a in aliases:
+            out.append((f"{_ALIAS_TOOL}:{a}", True))
+    for alias in _AMBIGUOUS:
+        out.append((f"{_ALIAS_TOOL}:{alias}", True))
+    for tool in _CANONICAL_TOOLS:
+        out.append((f"{tool}:*", False))
+    return out
+def suggest_for_action(action: str, max_suggestions: int = 3) -> list[str]:
+    """Return up to ``max_suggestions`` did-you-mean candidates for ``action``."""
+    max_distance = 3
+    cands = _candidates()
+    hits: list[tuple[str, int, bool]] = []
+    for name, legacy in cands:
+        d = _levenshtein(action, name)
+        if d > max_distance and not _shares_tool_prefix(action, name):
+            continue
+        hits.append((name, d, legacy))
+    # Sort by distance, then prefer canonical over legacy, then name.
+    hits.sort(key=lambda h: (h[1], h[2], h[0]))
+    out: list[str] = []
+    for name, _d, legacy in hits[:max_suggestions]:
+        out.append(f"{name} (legacy)" if legacy else name)
+    return out
+def validate_actions(actions: Iterable[str]) -> tuple[list[str], dict[str, list[str]]]:
+    """Return (unknown_actions, suggestions_map) for the given action list."""
+    unknown: list[str] = []
+    suggestions: dict[str, list[str]] = {}
+    seen: set[str] = set()
+    for a in actions:
+        if is_known_action(a):
+            continue
+        if a in seen:
+            continue
+        seen.add(a)
+        unknown.append(a)
+        suggestions[a] = suggest_for_action(a)
+    return unknown, suggestions
+__all__ = ["is_known_action", "suggest_for_action", "validate_actions"]

controlzero-1.7.0/controlzero/_internal/credential_hook.py ADDED Viewed

@@ -0,0 +1,339 @@
+"""Credential leak ingest hook (epic #666, PR-2).
+Wraps the pure-Python `scan_for_credentials` and turns a list of
+matches into:
+* a possibly-redacted body of text (when action == "redact"),
+* a list of audit-row dicts ready for the existing batched audit
+  flush (`audit_remote.py`),
+* an optional raised exception (when action == "block").
+Design notes:
+* Redaction never echoes plaintext credentials back to the audit row.
+  The matched bytes are replaced in-place with
+  `cz:credleak:<sha256_hex>` so the audit row can deterministically
+  reference the same secret across calls without holding it.
+* `value_hash` is HMAC-SHA256(per-org key, plaintext bytes), first
+  16 hex chars. One-way; rotates with the org HMAC key. The plaintext
+  is held only on the stack during this call -- the function returns
+  redacted text + hashes so no caller needs to manage the raw value.
+* The 16-byte context window surrounding each match is masked: the
+  literal `<MASKED>` replaces the credential body itself so the
+  audit row never carries any prefix or suffix of the secret bytes.
+* `CONTROLZERO_CREDLEAK_OFF=1` in the environment downgrades any
+  configured action to `warn`, but still emits the audit row with
+  `enforcement_downgraded=True`. The env override is the operator's
+  break-glass for a noisy false positive in production; the row
+  preserves the original intent.
+"""
+from __future__ import annotations
+import hashlib
+import hmac
+import os
+from dataclasses import dataclass
+from typing import Any, Literal
+from controlzero._internal.credential_scanner import scan_for_credentials
+from controlzero.errors import CredentialLeakBlocked
+Action = Literal["warn", "redact", "block"]
+Source = Literal["tool_output", "tool_stderr", "file_read", "grep_match"]
+# Operator break-glass: when this env var is set to "1" the handler
+# downgrades any non-warn action to warn and stamps the audit row with
+# `enforcement_downgraded=True`. Useful when a false positive is
+# blocking work in production while the catalog is updated.
+_OFF_ENV_VAR = "CONTROLZERO_CREDLEAK_OFF"
+# Sentinel inserted into the context window in place of the actual
+# credential bytes. The 16 bytes on either side of the credential
+# are preserved as additional ambient context for the audit reviewer;
+# the credential itself is never echoed.
+_MASK = "<MASKED>"
+@dataclass(frozen=True)
+class _Match:
+    """Internal projection of a single scanner hit. Decouples the
+    handler from the raw dict shape `scan_for_credentials` emits."""
+    pattern_id: str
+    severity: str
+    start: int
+    end: int
+def _hmac_value_hash(hmac_key: bytes, plaintext: bytes) -> str:
+    """First 16 hex chars of HMAC-SHA256(hmac_key, plaintext).
+    The hash truncation is deliberate: 64 bits is sufficient
+    de-duplication granularity for credential matches per org (the
+    rotation tracker groups by hash, and even at 100M rows the
+    expected collision count stays well under 1), and the shorter
+    string keeps the audit_logs.metadata column LOW-cardinality
+    friendly. The key MUST be the per-org HMAC key issued at
+    enrollment; never reuse across orgs because cross-org hash
+    equality would leak `same secret` membership across tenants.
+    """
+    return hmac.new(hmac_key, plaintext, hashlib.sha256).hexdigest()[:16]
+def _build_context_window(text: str, start: int, end: int) -> str:
+    """Sixteen bytes of ambient text on either side of the credential,
+    with the credential body itself replaced by `<MASKED>`.
+    The window is computed on the str (not bytes) for simplicity;
+    on ASCII text -- the dominant case for tool output of secrets --
+    the result is byte-equivalent. Non-printable bytes that survive
+    in the window are passed through as-is so the audit reviewer
+    sees the operator-visible representation; downstream analytical
+    store columns of type `String` accept any UTF-8.
+    """
+    text_len = len(text)
+    left_start = max(0, start - 16)
+    right_end = min(text_len, end + 16)
+    left = text[left_start:start]
+    right = text[end:right_end]
+    return f"{left}{_MASK}{right}"
+class CredentialLeakHandler:
+    """Ingest hook that wires the scanner + redactor + audit emit into
+    one call.
+    The handler is intentionally instantiated per-client (not per-call)
+    so the same configuration -- project id, action posture, HMAC key
+    -- applies across every tool-output scan from a given agent. The
+    object holds no per-call state; it is safe to share across
+    threads.
+    """
+    def __init__(
+        self,
+        *,
+        client: Any,
+        project_id: str,
+        action: Action,
+        hmac_key: bytes,
+    ) -> None:
+        if action not in ("warn", "redact", "block"):
+            raise ValueError(
+                f"CredentialLeakHandler.action must be warn|redact|block, got {action!r}"
+            )
+        if not isinstance(hmac_key, (bytes, bytearray)):
+            raise TypeError("hmac_key must be bytes")
+        if len(hmac_key) < 16:
+            # 16 bytes is the minimum we accept; HMAC-SHA256 can take
+            # any key length but a short key offers no security
+            # advantage and almost always indicates a config bug.
+            raise ValueError("hmac_key must be at least 16 bytes")
+        self._client = client
+        self._project_id = project_id
+        self._configured_action = action
+        self._hmac_key = bytes(hmac_key)
+    # ------------------------------------------------------------------
+    # Public entry point.
+    # ------------------------------------------------------------------
+    def handle(
+        self,
+        *,
+        source: Source,
+        text: str,
+        tool_name: str,
+        tool_call_id: str,
+        agent_name: str,
+    ) -> tuple[str, list[dict[str, Any]]]:
+        """Scan `text`, emit audit rows for each hit, return the
+        (possibly-redacted) text plus the rows.
+        When no credentials are detected the original text is returned
+        verbatim and the row list is empty -- the function is
+        zero-effect on innocuous input. When credentials are detected
+        the function:
+        1. Resolves the effective action (configured action, possibly
+           downgraded to `warn` by the env override).
+        2. Builds one audit row per match. The row carries no
+           plaintext; the credential is hashed via `_hmac_value_hash`
+           and the surrounding context is masked.
+        3. Either redacts each match in-place, returns the text
+           unchanged (warn), or raises `CredentialLeakBlocked` AFTER
+           emitting the rows (block).
+        Returns:
+            (returned_text, rows) -- when action == "block" this
+            tuple is never observed by the caller because the
+            function raises before returning.
+        """
+        raw = scan_for_credentials(text)
+        if not raw:
+            return text, []
+        matches = [
+            _Match(
+                pattern_id=str(m["pattern_id"]),
+                severity=str(m["severity"]),
+                start=int(m["start"]),
+                end=int(m["end"]),
+            )
+            for m in raw
+        ]
+        effective, downgraded = self._resolve_effective_action()
+        # Build audit rows first so the rows reflect the original
+        # match positions; redaction shifts byte offsets but the
+        # audit row records the pre-redaction span.
+        rows = [
+            self._build_audit_row(
+                m=m,
+                text=text,
+                source=source,
+                tool_name=tool_name,
+                tool_call_id=tool_call_id,
+                agent_name=agent_name,
+                effective_action=effective,
+                enforcement_downgraded=downgraded,
+            )
+            for m in matches
+        ]
+        # Emit each row through the configured audit sink. Audit
+        # delivery is best-effort and never blocks the hot path; on
+        # failure the sink retains the row internally for retry.
+        for row in rows:
+            self._emit(row)
+        if effective == "redact":
+            returned_text = self._redact_text(text, matches)
+        elif effective == "warn":
+            returned_text = text
+        else:
+            # `block`. Audit rows are emitted BEFORE raising so the
+            # operator sees the detection event even when the agent
+            # never observes the redacted output.
+            assert effective == "block"
+            raise CredentialLeakBlocked(
+                f"credential leak detected in {source} (tool={tool_name}): "
+                f"{len(matches)} match(es); see audit log for details"
+            )
+        return returned_text, rows
+    # ------------------------------------------------------------------
+    # Internal helpers.
+    # ------------------------------------------------------------------
+    def _resolve_effective_action(self) -> tuple[Action, bool]:
+        """Apply the `CONTROLZERO_CREDLEAK_OFF=1` operator override.
+        Returns the effective action plus a `downgraded` flag that
+        feeds into the audit row so the dashboard can highlight rows
+        whose intended posture was bypassed.
+        """
+        if os.environ.get(_OFF_ENV_VAR, "") == "1":
+            if self._configured_action != "warn":
+                return "warn", True
+        return self._configured_action, False
+    def _redact_text(self, text: str, matches: list[_Match]) -> str:
+        """Replace each match with `cz:credleak:<sha256_hex>`.
+        Matches are processed right-to-left so a redaction never
+        invalidates the byte offsets of earlier (lower-index)
+        matches. Two pieces of context held intentionally:
+        * `cz:credleak:` is a fixed prefix the downstream agent log
+          consumer can grep for; supports a "show me everywhere this
+          secret was redacted" rotation workflow.
+        * The hex digest is SHA-256 of the plaintext credential, not
+          the HMAC-keyed hash. The redaction lives inside the agent's
+          local output; the HMAC hash lives in the audit row that
+          leaves the host. Keeping them distinct means a leak of the
+          local log file does not let an attacker correlate a
+          previous local redaction with a cross-org audit row.
+        """
+        ordered = sorted(matches, key=lambda m: m.start, reverse=True)
+        out = text
+        for m in ordered:
+            plaintext = text[m.start : m.end]
+            digest = hashlib.sha256(plaintext.encode("utf-8", errors="replace")).hexdigest()
+            replacement = f"cz:credleak:{digest}"
+            out = out[: m.start] + replacement + out[m.end :]
+        return out
+    def _build_audit_row(
+        self,
+        *,
+        m: _Match,
+        text: str,
+        source: Source,
+        tool_name: str,
+        tool_call_id: str,
+        agent_name: str,
+        effective_action: Action,
+        enforcement_downgraded: bool,
+    ) -> dict[str, Any]:
+        """Construct the wire-shape dict the audit sink already
+        accepts. The sink folds additional keys onto the existing
+        batch payload (`/api/audit/batch` accepts unknown extra
+        fields per the additive-schema contract); backend storage
+        lands in PR-5.
+        """
+        plaintext = text[m.start : m.end]
+        value_hash = _hmac_value_hash(
+            self._hmac_key, plaintext.encode("utf-8", errors="replace")
+        )
+        return {
+            # Mark the row as a credential leak so the backend ingest
+            # can route it to the rotation tracker view in PR-5.
+            "event_kind": "credential_leak_detected",
+            "pattern_id": m.pattern_id,
+            "severity": m.severity,
+            "value_hash": value_hash,
+            "context_window": _build_context_window(text, m.start, m.end),
+            "source": source,
+            "tool_name": tool_name,
+            "tool_call_id": tool_call_id,
+            "agent_name": agent_name,
+            "project_id": self._project_id,
+            "enforcement_action": effective_action,
+            "enforcement_downgraded": enforcement_downgraded,
+        }
+    def _emit(self, row: dict[str, Any]) -> None:
+        """Push one audit row through the client's existing sink.
+        The handler does not own its own batch buffer; it piggy-backs
+        on whichever sink the client has wired up (RemoteAuditSink,
+        BearerAuditSink, or a test double). Best-effort: if the
+        client is missing an audit sink the row is dropped silently
+        so an SDK in local-only mode keeps functioning without an
+        audit destination.
+        """
+        sink = getattr(self._client, "audit_sink", None)
+        if sink is None:
+            return
+        log_fn = getattr(sink, "log", None)
+        if log_fn is None:
+            return
+        try:
+            log_fn(row)
+        except Exception:  # noqa: BLE001
+            # The audit pipeline is best-effort by design (matches
+            # the existing audit_remote.py contract). The hook must
+            # never crash a user's tool call because an audit
+            # delivery failed.
+            pass
+__all__ = [
+    "Action",
+    "Source",
+    "CredentialLeakHandler",
+]

controlzero 1.6.0__tar.gz → 1.7.0__tar.gz

controlzero 1.6.0tar.gz → 1.7.0tar.gz