npm - @jaguilar87/gaia - Versions diffs - 5.0.2 → 5.0.4 - Mend

@jaguilar87/gaia 5.0.2 → 5.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/hooks/modules/agents/handoff_persister.py CHANGED Viewed

@@ -15,6 +15,180 @@ import logging
 logger = logging.getLogger(__name__)
+def _normalize_command_set(raw) -> list:
+    """Coerce a raw ``command_set`` into the canonical ``[{command, rationale}]``.
+    Mirrors the normalization in ``bash_validator._build_sealed_payload`` and
+    ``approval_grants.activate_db_pending_by_prefix`` so the intake writes the
+    exact shape the activation/consume sides expect. Items without a non-empty
+    ``command`` are dropped; ``rationale`` defaults to "".
+    """
+    out: list = []
+    if isinstance(raw, list):
+        for item in raw:
+            if isinstance(item, dict) and item.get("command"):
+                out.append(
+                    {
+                        "command": item["command"],
+                        "rationale": item.get("rationale", ""),
+                    }
+                )
+    return out
+def _filter_mutative_command_set(items: list) -> list:
+    """Keep only the command_set items whose command is mutative/T3.
+    The consume side (``bash_validator._validate_single_command``) gates the
+    whole COMMAND_SET match path on ``detect_mutative_command(command).is_mutative``:
+    a command that the matcher does not see as mutative NEVER reaches
+    ``match_command_set_grant`` and its index is therefore NEVER consumed. If
+    such a command is included in the grant's ``command_set``, ``len(consumed)``
+    can never reach ``len(command_set)`` and the grant is stuck PENDING forever
+    (it never flips to CONSUMED). To stay in lockstep with the consume gate, the
+    intake filters with the EXACT same predicate, dropping non-mutative commands
+    (e.g. ``touch``, ``ls``, ``cat``) before the grant is ever minted.
+    Items that fail to classify (import error, unexpected exception) are kept --
+    failing open here is safer than silently dropping a command from a consent
+    batch the user is about to approve.
+    """
+    try:
+        from modules.security.mutative_verbs import detect_mutative_command
+    except ImportError:
+        import pathlib as _pl
+        import sys as _sys
+        _hooks_root = _pl.Path(__file__).resolve().parent.parent.parent
+        _sys.path.insert(0, str(_hooks_root))
+        from modules.security.mutative_verbs import detect_mutative_command
+    kept: list = []
+    for item in items:
+        command = item.get("command", "")
+        try:
+            if detect_mutative_command(command).is_mutative:
+                kept.append(item)
+        except Exception:
+            # Fail open: if classification raises, keep the item rather than
+            # silently dropping a command from the user's consent batch.
+            kept.append(item)
+    return kept
+def _intake_command_set_pending(
+    approval_req: dict,
+    *,
+    agent_id,
+    session_id: str,
+) -> str | None:
+    """INTAKE bridge: plan-first COMMAND_SET envelope -> ONE pending row.
+    When a subagent emits an ``APPROVAL_REQUEST`` whose ``approval_request``
+    carries a ``command_set`` of >= 2 ``{command, rationale}`` items and NO
+    ``approval_id`` (plan-first: the batch is declared up-front, before any
+    command was attempted/blocked), this persists exactly ONE pending approval
+    whose ``payload_json`` contains the ``command_set`` key. That is the signal
+    ``activate_db_pending_by_prefix`` reads (Step 3b) to branch into
+    ``create_command_set_grant`` on user approval.
+    Mutative filtering (Thread a): the command_set is first reduced to ONLY the
+    commands the consume side will treat as mutative/T3 -- see
+    ``_filter_mutative_command_set``. Non-mutative commands (``touch``, ``ls``,
+    ...) never reach the bash_validator matcher, so leaving them in the grant
+    would strand its ``consumed_indexes_json`` short of completion and pin the
+    grant at PENDING forever. After filtering:
+      * >= 2 mutative items  -> mint the COMMAND_SET over exactly those items.
+      * exactly 1 mutative   -> NOT a batch. Return None; the caller falls
+        through to the singular ``approval_id`` path and the lone command is
+        gated by the normal hook-block / SCOPE_SEMANTIC_SIGNATURE flow when the
+        agent attempts it. We deliberately do NOT degrade-to-singular here: this
+        function's contract is "mint a COMMAND_SET or stand aside", and the
+        singular flow is owned end-to-end by the hook block path -- minting a
+        singular row from here would duplicate that ownership.
+      * 0 mutative           -> nothing to approve. Return None (no pending).
+    A raw ``command_set`` of <= 1 item is likewise not a batch and returns None
+    before filtering, preserving the original contract (never mint for one
+    command, never degrade a batch the other way) and the working plan-first
+    flow for genuine multi-command mutative batches.
+    Returns the minted ``approval_id`` (``P-{uuid4hex}``) on success, or None
+    when this is not a plan-first command_set envelope (no action taken).
+    """
+    if not isinstance(approval_req, dict):
+        return None
+    # Plan-first is defined by command_set present AND no approval_id. A request
+    # that already carries an approval_id was minted by the hook block path; it
+    # is the singular flow and must not be re-intaken here.
+    if approval_req.get("approval_id"):
+        return None
+    raw_items = _normalize_command_set(approval_req.get("command_set"))
+    if len(raw_items) < 2:
+        # 0 or 1 item: not a batch. Singular path owns it.
+        return None
+    # Reduce to the mutative/T3 commands only -- the exact predicate the consume
+    # side uses to decide whether a command reaches the COMMAND_SET matcher.
+    command_set_items = _filter_mutative_command_set(raw_items)
+    if len(command_set_items) < 2:
+        # After filtering there is no batch left: either every command was
+        # non-mutative (0 -> nothing to approve) or just one mutative command
+        # remained (1 -> singular path owns it). Either way, no COMMAND_SET.
+        logger.info(
+            "INTAKE: command_set not minted -- %d/%d items mutative after filter "
+            "(need >= 2 for a batch)",
+            len(command_set_items), len(raw_items),
+        )
+        return None
+    # Build a sealed_payload that mirrors bash_validator._build_sealed_payload's
+    # COMMAND_SET shape: command_set verbatim + commands listing every string.
+    # Carry through the subagent's operation/risk fields when present so the
+    # orchestrator's presentation has real values, falling back to neutral
+    # COMMAND_SET defaults otherwise.
+    first_command = command_set_items[0]["command"]
+    sealed_payload = {
+        "operation": approval_req.get("operation")
+        or f"COMMAND_SET intercepted: {len(command_set_items)} commands under one consent",
+        "exact_content": approval_req.get("exact_content") or first_command,
+        "scope": approval_req.get("scope")
+        or (first_command.split()[0] if first_command.strip() else "unknown"),
+        "risk_level": approval_req.get("risk_level") or "medium",
+        "rollback_hint": approval_req.get("rollback") or approval_req.get("rollback_hint"),
+        "rationale": approval_req.get("rationale")
+        or (
+            f"A batch of {len(command_set_items)} related T3 commands requires user "
+            "approval under one consent per the COMMAND_SET policy."
+        ),
+        "commands": [it["command"] for it in command_set_items],
+        "command_set": command_set_items,
+    }
+    try:
+        from gaia.approvals.store import insert_requested
+    except ImportError:
+        import pathlib as _pl
+        import sys as _sys
+        _repo_root = _pl.Path(__file__).resolve().parent.parent.parent.parent
+        _sys.path.insert(0, str(_repo_root))
+        from gaia.approvals.store import insert_requested
+    approval_id = insert_requested(
+        sealed_payload,
+        agent_id=agent_id,
+        session_id=session_id or None,
+    )
+    logger.info(
+        "INTAKE: plan-first COMMAND_SET pending created approval_id=%s items=%d",
+        (approval_id or "")[:16], len(command_set_items),
+    )
+    return approval_id
 def persist_handoff(
     parsed_contract,
     agent_output: str,
@@ -38,6 +212,38 @@ def persist_handoff(
     import pathlib as _pl
     import sys as _sys
+    agent_id = task_info.get("agent_id") or task_info.get("agent") or "unknown"
+    # ---------------------------------------------------------------------
+    # INTAKE bridge (plan-first COMMAND_SET) -- run FIRST and INDEPENDENTLY.
+    #
+    # Minting the pending COMMAND_SET approval is the security-critical path:
+    # it is the consent the user must act on. It must not be coupled to the
+    # audit handoff-row write below -- if insert_agent_contract_handoff fails
+    # for any reason, the user must still get the approval to review. So the
+    # intake runs in its own isolated try, before the handoff-row write.
+    #
+    # Only plan-first envelopes act here: command_set >= 2 items AND no
+    # approval_id. A <= 1 item set or a request that already carries an
+    # approval_id (hook-block / singular path) is a no-op for the intake.
+    # ---------------------------------------------------------------------
+    minted_command_set_id = None
+    if parsed_contract is not None:
+        _env = parsed_contract if isinstance(parsed_contract, dict) else {}
+        _approval_req = _env.get("approval_request")
+        if isinstance(_approval_req, dict):
+            try:
+                minted_command_set_id = _intake_command_set_pending(
+                    _approval_req,
+                    agent_id=agent_id,
+                    session_id=session_id,
+                )
+            except Exception as _intake_exc:
+                logger.warning(
+                    "M4: COMMAND_SET intake failed (non-blocking): %s",
+                    _intake_exc,
+                )
     try:
         # Prefer a sibling gaia package if installed; fall back to the repo
         # layout where gaia/ lives two levels above hooks/.
@@ -48,7 +254,6 @@ def persist_handoff(
             _sys.path.insert(0, str(_repo_root))
             from gaia.store import writer as _writer
-        agent_id = task_info.get("agent_id") or task_info.get("agent") or "unknown"
         workspace = task_info.get("workspace") or _os.environ.get("GAIA_WORKSPACE") or "global"
         db_path_str = task_info.get("db_path")
         db_path = _pl.Path(db_path_str) if db_path_str else None
@@ -99,7 +304,12 @@ def persist_handoff(
             envelope = parsed_contract if isinstance(parsed_contract, dict) else {}
             approval_req = envelope.get("approval_request")
             if approval_req and isinstance(approval_req, dict):
-                approval_id = approval_req.get("approval_id")
+                # The approval_id is either the one the subagent relayed (hook-block
+                # / singular path) or the one the INTAKE bridge just minted for a
+                # plan-first COMMAND_SET. Either way it points at the pending row
+                # the handoff_approvals audit row should link to.
+                approval_id = approval_req.get("approval_id") or minted_command_set_id
                 if approval_id:
                     # Look up the grant to determine the decision at stop time.
                     try:

package/hooks/modules/agents/response_contract.py CHANGED Viewed

@@ -402,6 +402,31 @@ def parse_memorialize_suggestions(
     return _extract_memorialize_suggestions(contract)
+def parse_user_facing_summary(
+    agent_output: str,
+    parsed_contract: Optional[dict] = None,
+) -> Optional[str]:
+    """Parse the optional top-level ``user_facing_summary`` field (Option A).
+    This is the ONE human-audience field in the contract: a brief prose summary
+    the subagent writes once, intended for the user. The orchestrator relays it
+    near-verbatim on a single-agent COMPLETE (N=1) instead of re-synthesizing
+    ``key_outputs``; for N>1 it is ignored and synthesis proceeds.
+    Strictly additive and advisory: the field is never required and never
+    affects contract validity. Returns the trimmed string when present and
+    non-empty, otherwise None (absent, null, blank, or non-string).
+    """
+    contract = parsed_contract if parsed_contract is not None else parse_contract(agent_output)
+    if contract is None:
+        return None
+    raw = contract.get("user_facing_summary")
+    if not isinstance(raw, str):
+        return None
+    text = raw.strip()
+    return text or None
 def _is_resume_agent_id(value: str) -> bool:
     return bool(_AGENT_ID_PATTERN.match(value or ""))
@@ -659,6 +684,7 @@ __all__ = [
     "parse_evidence_report",
     "parse_consolidation_report",
     "parse_memorialize_suggestions",
+    "parse_user_facing_summary",
     "validate_response_contract",
     "save_validation_result",
     "load_last_validation",

package/hooks/modules/agents/transcript_reader.py CHANGED Viewed

@@ -139,10 +139,25 @@ def extract_injected_context_payload_from_transcript(
     """
     import os
+    # Empty/None path guard. Without it, Path("").stem == "" and the substring
+    # match below (``candidate.stem in "" or "" in candidate.stem``) is ALWAYS
+    # True because ``"" in any_string`` is True -- so an empty path would match
+    # (and return) the FIRST payload sitting in gaia-context-payloads/, making
+    # the result depend on whatever happens to be in that directory. Mirror the
+    # guard in read_first_user_content_from_transcript: no path, no match.
+    if not transcript_path:
+        return {}
     try:
         payload_dir = Path(os.environ.get("TMPDIR", "/tmp")) / "gaia-context-payloads"
         if payload_dir.exists():
             agent_file = Path(transcript_path).stem  # e.g. "agent-ae190a4da68d626d4"
+            # A stem that came out empty (e.g. path was "/" or "."): nothing to
+            # match against, so the substring test would again degrade to the
+            # always-true ``"" in candidate.stem``. Bail rather than grab an
+            # arbitrary payload.
+            if not agent_file:
+                return {}
             # Match by agent ID substring
             for candidate in payload_dir.glob("*.json"):
                 if candidate.stem in agent_file or agent_file in candidate.stem:

package/hooks/modules/security/__init__.py CHANGED Viewed

@@ -5,7 +5,6 @@ Provides:
 - tiers: SecurityTier enum and classification
 - blocked_commands: Permanently blocked pattern matching
 - mutative_verbs: Mutative verb detection (user approval workflow)
-- gitops_validator: kubectl/helm/flux validation
 - approval_constants: Approval token patterns (legacy APPROVE: and ElicitationResult)
 - approval_grants: Time-limited T3 command passthrough after user approval
 - shell_unwrapper: Detect and strip wrapper shells for inner command classification
@@ -21,7 +20,6 @@ from .blocked_commands import (
     get_blocked_patterns,
     BlockedCommandResult,
 )
-from .gitops_validator import validate_gitops_workflow, GitOpsValidationResult
 from .mutative_verbs import (
     CLI_FAMILY_LOOKUP,
     CATEGORY_MUTATIVE,
@@ -73,9 +71,6 @@ __all__ = [
     "is_blocked_command",
     "get_blocked_patterns",
     "BlockedCommandResult",
-    # GitOps
-    "validate_gitops_workflow",
-    "GitOpsValidationResult",
     # Mutative verbs
     "CLI_FAMILY_LOOKUP",
     "CATEGORY_MUTATIVE",

package/hooks/modules/security/approval_grants.py CHANGED Viewed

@@ -16,10 +16,12 @@ Two-phase nonce-based approval flow:
     grant and allows it.
 Grants are:
-- Scoped to a session (CLAUDE_SESSION_ID)
-- Time-limited (default 10 minutes)
+- Time-limited (default 10 minutes; DB grants use APPROVAL_GRANT_TTL_MINUTES)
 - Cleaned up after use or expiry
-- Stored in .claude/cache/approvals/
+- Stored AUTHORITATIVELY in the DB (``approval_grants`` in gaia.db) since the
+  Brief 71 cutover. The filesystem plane (.claude/cache/approvals/) is the
+  DEPRECATED fallback retained only for grants minted before the cutover; new
+  grants are created and consumed through the DB plane (gaia.store.writer).
 Security properties:
 - Grants are created ONLY by the hook (not by agents)
@@ -28,8 +30,11 @@ Security properties:
 - The deny list (blocked_commands.py) is NEVER bypassed -- grants only
   override the dangerous verb detector
 - Nonces are 128-bit random hex (cannot be guessed)
-- Pending files are session-scoped (cannot be activated from another session)
-- A nonce can only be activated ONCE (pending file deleted on activation)
+- A nonce can only be activated ONCE (DB row marked CONSUMED on activation;
+  legacy pending files are deleted on activation)
+- DB grants are session-AGNOSTIC by design: the block-approve-retry flow
+  legitimately spans sessions, so replay protection comes from the CONSUMED
+  status + TTL, not from session scoping (see the DB-backed model note below)
 =============================================================================
 Grant lifetime (DB-backed model -- Brief 71 cutover)
@@ -1160,16 +1165,26 @@ def consume_grant(command: str, session_id: str = None) -> bool:
 def consume_session_grants(session_id: str = None) -> int:
-    """Consume all confirmed grants for a session.
+    """Consume confirmed grants on the LEGACY FILESYSTEM plane for a session.
-    Called at SubagentStop to clean up all grants that were used during the
-    subagent's lifetime. Multi-use grants are also consumed (session is over).
+    Called at SubagentStop. Scope is the deprecated FS plane ONLY: it sweeps
+    ``grant-{session_id}-*.json`` files under the approvals cache dir and marks
+    confirmed ones used (multi-use grants too, since the session is over).
+    This is a NO-OP for grants on the authoritative DB plane (post Brief 71):
+    DB semantic grants are consumed on the MATCHING RETRY via
+    ``consume_db_semantic_grant`` (see the module docstring, "DB-backed model"),
+    NOT at SubagentStop. There is therefore no DB cleanup gap here -- DB replay
+    protection is handled at consume-on-retry time, and this function
+    intentionally does not (and must not) touch the DB plane. It remains live
+    only to drain pre-cutover FS grants; new sessions that never write an FS
+    grant simply get a return value of 0.
     Args:
         session_id: Session ID to scope consumption (defaults to env var).
     Returns:
-        Number of grants consumed.
+        Number of legacy FS grants consumed (0 when no FS grants exist).
     """
     if not session_id:
         session_id = _get_session_id()
@@ -1789,7 +1804,31 @@ def activate_db_pending_by_prefix(
                 reason="DB pending approval has invalid payload_json.",
             )
+        # Multi-command (COMMAND_SET) detection. A payload carrying a
+        # ``command_set`` list of more than one {command, rationale} item is a
+        # batch the user approved under ONE consent. It must NOT be degraded to
+        # a single command (the historic bug at this site) -- it activates into
+        # a COMMAND_SET grant via the dedicated branch below. A set of length
+        # <= 1 falls through to the singular SCOPE_SEMANTIC_SIGNATURE path so we
+        # never mint a COMMAND_SET grant for one command.
+        raw_command_set = payload.get("command_set")
+        command_set_items: list = []
+        if isinstance(raw_command_set, list):
+            for _item in raw_command_set:
+                if isinstance(_item, dict) and _item.get("command"):
+                    command_set_items.append(
+                        {
+                            "command": _item["command"],
+                            "rationale": _item.get("rationale", ""),
+                        }
+                    )
+        is_command_set = len(command_set_items) > 1
         command = payload.get("exact_content") or payload.get("commands", [None])[0] or ""
+        if is_command_set and not command:
+            # For a command_set the first item is a safe stand-in for the
+            # singular display/signature path; the set itself is authoritative.
+            command = command_set_items[0]["command"]
         if not command:
             logger.warning(
                 "activate_db_pending_by_prefix: no command found in payload for %s",
@@ -1836,6 +1875,57 @@ def activate_db_pending_by_prefix(
                     reason=f"DB transition failed: {ve}",
                 )
+        # Step 3b: COMMAND_SET branch. When the approved payload carries a set
+        # of more than one command, create ONE COMMAND_SET grant covering the
+        # whole batch instead of a singular SCOPE_SEMANTIC_SIGNATURE grant. The
+        # set is consumed item-by-item (byte-for-byte) by bash_validator's
+        # match_command_set_grant / mark_command_set_item_consumed path -- the
+        # consume side is unchanged; this is the create side that was orphaned.
+        #
+        # Precondition: ``command_set`` in the payload is already pre-filtered to
+        # mutative commands by ``_intake_command_set_pending`` (handoff_persister,
+        # the only producer of these pending records in production). Activation
+        # therefore assumes every item is consumable and does NOT re-filter here;
+        # do not add a filtering step at this site -- it would silently drop items
+        # the user already consented to under one grant.
+        if is_command_set:
+            created = create_command_set_grant(
+                command_set_items,
+                approval_id,
+                session_id=current_session_id,
+                agent_id=agent_id,
+                ttl_minutes=DEFAULT_COMMAND_SET_TTL_MINUTES,
+            )
+            if not created:
+                logger.error(
+                    "activate_db_pending_by_prefix: COMMAND_SET grant creation "
+                    "failed for approval_id=%s (items=%d)",
+                    approval_id[:16], len(command_set_items),
+                )
+                return ApprovalActivationResult(
+                    success=False,
+                    status=ACTIVATION_ERROR,
+                    reason="Failed to create COMMAND_SET grant from approved payload.",
+                )
+            logger.info(
+                "activate_db_pending_by_prefix: COMMAND_SET grant created: "
+                "approval_id=%s, items=%d, ttl=%d min, originating_session=%s, "
+                "current_session=%s",
+                approval_id[:16], len(command_set_items),
+                DEFAULT_COMMAND_SET_TTL_MINUTES,
+                (originating_session or "")[:12],
+                current_session_id[:12],
+            )
+            return ApprovalActivationResult(
+                success=True,
+                status=ACTIVATION_ACTIVATED,
+                reason=(
+                    "DB pending approval activated as a COMMAND_SET grant "
+                    f"({len(command_set_items)} commands under one consent)."
+                ),
+                grant_path=None,
+            )
         # Step 4: Rebuild approval signature from the command so the
         # filesystem grant has a valid scope_signature for check_approval_grant().
         from .approval_scopes import build_approval_signature, SCOPE_SEMANTIC_SIGNATURE
@@ -2026,7 +2116,13 @@ def activate_grants_for_session(
 # approved command (adding cd, redirect, pipe, flag) produces a different
 # string and requires fresh approval. Each item in the set is single-use.
-DEFAULT_COMMAND_SET_TTL_MINUTES = 10
+# COMMAND_SET grant TTL in minutes. Aligned to the singular active-grant TTL
+# (DEFAULT_GRANT_TTL_MINUTES / APPROVAL_GRANT_TTL_MINUTES = 60) so a batch of
+# commands approved under one consent gets the same cross-session retry window
+# as a single approved command -- the block-approve-retry flow legitimately
+# spans sessions, and a shorter window would expire the batch before the
+# subagent could consume every item.
+DEFAULT_COMMAND_SET_TTL_MINUTES = 60
 def create_command_set_grant(
@@ -2107,7 +2203,6 @@ def create_command_set_grant(
 def match_command_set_grant(
     retried_command: str,
     *,
-    session_id: str | None = None,
     db_path=None,
 ) -> tuple | None:
     """Find an active COMMAND_SET grant containing ``retried_command``.
@@ -2117,14 +2212,26 @@ def match_command_set_grant(
     ``retried_command``.  No normalization of any kind is applied.
     The grant must:
+    - Have scope COMMAND_SET
     - Have status PENDING (not CONSUMED, REVOKED, or EXPIRED)
     - Not be past its expires_at timestamp
     - Contain ``retried_command`` at an index that has NOT been consumed
-    - Belong to the current session_id
+    The lookup is SESSION-AGNOSTIC (Brief 71), exactly like the singular path
+    (``check_db_semantic_grant``). The block-approve-retry flow legitimately
+    spans sessions, and CLAUDE_SESSION_ID is not guaranteed to be exported into
+    the bash subprocess -- where ``get_session_id()`` falls back to the literal
+    ``"default"``. A session_id filter therefore silently dropped every grant
+    created under the real session, letting approved COMMAND_SET commands run
+    WITHOUT being consumed (the consumption-bypass bug). Replay protection is
+    preserved by the conjunction of the byte-for-byte match, status='PENDING'
+    plus per-index ``consumed_indexes_json``, and the expires_at TTL -- none of
+    which depend on which session is asking. See
+    ``gaia.store.writer.list_command_set_grants_agnostic`` for the full
+    security-boundary rationale.
     Args:
         retried_command: The exact command string the agent wants to run.
-        session_id: CLAUDE_SESSION_ID (defaults to current session).
         db_path: Optional explicit DB path override (used by tests).
     Returns:
@@ -2132,15 +2239,11 @@ def match_command_set_grant(
         The caller should call mark_command_set_item_consumed(approval_id, index)
         after successful execution.
     """
-    if session_id is None:
-        session_id = _get_session_id()
     try:
-        from gaia.store.writer import list_approval_grants
+        from gaia.store.writer import list_command_set_grants_agnostic
         from datetime import datetime, timezone
-        grants = list_approval_grants(
-            session_id=session_id,
+        grants = list_command_set_grants_agnostic(
             status="PENDING",
             db_path=db_path,
         )