@jaguilar87/gaia 5.0.7 → 5.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +13 -0
  4. package/bin/README.md +6 -1
  5. package/bin/cli/approvals.py +486 -474
  6. package/bin/cli/brief.py +13 -0
  7. package/bin/cli/doctor.py +1 -1
  8. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  9. package/dist/gaia-ops/hooks/adapters/claude_code.py +92 -86
  10. package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +13 -2
  11. package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
  12. package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
  13. package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
  14. package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
  15. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
  16. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
  17. package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
  18. package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
  19. package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +19 -0
  20. package/dist/gaia-ops/hooks/post_compact.py +1 -0
  21. package/dist/gaia-ops/hooks/pre_compact.py +1 -0
  22. package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
  23. package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +50 -14
  24. package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +16 -9
  25. package/dist/gaia-ops/skills/agent-protocol/examples.md +12 -1
  26. package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
  27. package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -22
  28. package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
  29. package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +20 -14
  30. package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
  31. package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +28 -3
  32. package/dist/gaia-ops/skills/subagent-request-approval/reference.md +34 -8
  33. package/dist/gaia-ops/tools/migration/README.md +10 -12
  34. package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
  35. package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
  36. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  37. package/dist/gaia-security/hooks/adapters/claude_code.py +92 -86
  38. package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +13 -2
  39. package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
  40. package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
  41. package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
  42. package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
  43. package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
  44. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
  45. package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
  46. package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
  47. package/dist/gaia-security/hooks/modules/tools/bash_validator.py +19 -0
  48. package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
  49. package/gaia/approvals/__init__.py +2 -1
  50. package/gaia/approvals/store.py +165 -15
  51. package/gaia/store/schema.sql +38 -1
  52. package/gaia/store/writer.py +400 -0
  53. package/hooks/adapters/claude_code.py +92 -86
  54. package/hooks/elicitation_result.py +20 -75
  55. package/hooks/modules/agents/handoff_persister.py +13 -2
  56. package/hooks/modules/context/context_injector.py +23 -7
  57. package/hooks/modules/events/event_writer.py +63 -96
  58. package/hooks/modules/security/__init__.py +0 -2
  59. package/hooks/modules/security/approval_cleanup.py +238 -69
  60. package/hooks/modules/security/approval_grants.py +506 -1103
  61. package/hooks/modules/security/mutative_verbs.py +24 -1
  62. package/hooks/modules/session/pending_scanner.py +150 -90
  63. package/hooks/modules/session/session_manifest.py +257 -28
  64. package/hooks/modules/tools/bash_validator.py +19 -0
  65. package/hooks/post_compact.py +1 -0
  66. package/hooks/pre_compact.py +1 -0
  67. package/hooks/user_prompt_submit.py +20 -0
  68. package/package.json +1 -1
  69. package/pyproject.toml +1 -1
  70. package/scripts/bootstrap_database.sh +66 -17
  71. package/scripts/migrations/README.md +26 -14
  72. package/scripts/migrations/schema.checksum +2 -2
  73. package/scripts/migrations/v18_to_v19.sql +36 -0
  74. package/scripts/migrations/v19_to_v20.sql +20 -0
  75. package/skills/agent-approval-protocol/SKILL.md +50 -14
  76. package/skills/agent-approval-protocol/reference.md +16 -9
  77. package/skills/agent-protocol/examples.md +12 -1
  78. package/skills/gaia-patterns/reference.md +2 -2
  79. package/skills/orchestrator-present-approval/SKILL.md +69 -22
  80. package/skills/orchestrator-present-approval/reference.md +16 -3
  81. package/skills/orchestrator-present-approval/template.md +20 -14
  82. package/skills/pending-approvals/SKILL.md +16 -11
  83. package/skills/subagent-request-approval/SKILL.md +28 -3
  84. package/skills/subagent-request-approval/reference.md +34 -8
  85. package/tools/migration/README.md +10 -12
  86. package/tools/scan/orchestrator.py +194 -10
  87. package/tools/scan/tests/test_integration.py +1 -2
  88. package/bin/cli/plans.py +0 -517
  89. package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
  90. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
  91. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
  92. package/dist/gaia-ops/tools/scan/merge.py +0 -213
  93. package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
  94. package/gaia/approvals/revert.py +0 -282
  95. package/tools/context/deep_merge.py +0 -159
  96. package/tools/migration/migrate_04_harness_events.py +0 -132
  97. package/tools/migration/migrate_04_harness_events.sh +0 -23
  98. package/tools/scan/merge.py +0 -213
  99. package/tools/scan/tests/test_merge.py +0 -269
@@ -90,6 +90,11 @@ class BashValidationResult:
90
90
  # plain error string (exit 2). Used for structured block responses that
91
91
  # should correct the agent rather than terminate execution.
92
92
  block_response: Optional[Dict[str, Any]] = None
93
+ # When a T3 command is allowed because it matched (and consumed) an active
94
+ # grant, this carries the approval_id of that grant. The adapter stashes it
95
+ # in HookState so PostToolUse can append an EXECUTED/FAILED event to the
96
+ # approval_events chain for this approval. None for non-T3 / no-grant paths.
97
+ consumed_approval_id: Optional[str] = None
93
98
 
94
99
  def __post_init__(self):
95
100
  if self.suggestions is None:
@@ -667,6 +672,7 @@ class BashValidator:
667
672
  allowed=True,
668
673
  tier=SecurityTier.T3_BLOCKED,
669
674
  reason="Command-set grant matched",
675
+ consumed_approval_id=cs_approval_id,
670
676
  )
671
677
 
672
678
  # DB-primary + filesystem-fallback grant check.
@@ -720,6 +726,7 @@ class BashValidator:
720
726
  allowed=True,
721
727
  tier=SecurityTier.T3_BLOCKED,
722
728
  reason="Grant confirmed",
729
+ consumed_approval_id=db_approval_id,
723
730
  )
724
731
  else:
725
732
  # Filesystem grant exists, not yet confirmed -- GAIA approved,
@@ -733,6 +740,7 @@ class BashValidator:
733
740
  allowed=True,
734
741
  tier=SecurityTier.T3_BLOCKED,
735
742
  reason="Grant active, pending confirmation",
743
+ consumed_approval_id=db_approval_id,
736
744
  )
737
745
  else:
738
746
  # Converge on the single T3 decision point. When there is an
@@ -808,6 +816,7 @@ class BashValidator:
808
816
  allowed=True,
809
817
  tier=SecurityTier.T3_BLOCKED,
810
818
  reason="Command-set grant matched",
819
+ consumed_approval_id=cs_approval_id,
811
820
  )
812
821
 
813
822
  grant = check_approval_grant(command, session_id=session_id)
@@ -859,6 +868,7 @@ class BashValidator:
859
868
  allowed=True,
860
869
  tier=SecurityTier.T3_BLOCKED,
861
870
  reason="Grant confirmed",
871
+ consumed_approval_id=db_approval_id,
862
872
  )
863
873
  else:
864
874
  logger.info(
@@ -870,6 +880,7 @@ class BashValidator:
870
880
  allowed=True,
871
881
  tier=SecurityTier.T3_BLOCKED,
872
882
  reason="Grant active, pending confirmation",
883
+ consumed_approval_id=db_approval_id,
873
884
  )
874
885
 
875
886
  # No grant matched -- converge on the single T3 decision
@@ -939,10 +950,18 @@ class BashValidator:
939
950
  key=lambda t: tier_order.index(t.value),
940
951
  )
941
952
 
953
+ # Propagate the consumed approval_id from whichever component matched a
954
+ # grant, so PostToolUse can append EXECUTED/FAILED for that approval.
955
+ consumed_approval_id = next(
956
+ (r.consumed_approval_id for r in component_results if r.consumed_approval_id),
957
+ None,
958
+ )
959
+
942
960
  return BashValidationResult(
943
961
  allowed=True,
944
962
  tier=highest_tier,
945
963
  reason=f"All {len(components)} components validated",
964
+ consumed_approval_id=consumed_approval_id,
946
965
  )
947
966
 
948
967
  def _phase4_check_composition(
@@ -35,6 +35,7 @@ def _handle_post_compact(event) -> None:
35
35
 
36
36
  response = {
37
37
  "hookSpecificOutput": {
38
+ "hookEventName": "PostCompact",
38
39
  "additionalContext": context,
39
40
  }
40
41
  }
@@ -52,6 +52,7 @@ def _handle_pre_compact(event) -> None:
52
52
 
53
53
  response = {
54
54
  "hookSpecificOutput": {
55
+ "hookEventName": "PreCompact",
55
56
  "additionalContext": context,
56
57
  }
57
58
  }
@@ -194,6 +194,26 @@ if __name__ == "__main__":
194
194
  else:
195
195
  logger.info("Could not extract user prompt from stdin, skipping routing")
196
196
 
197
+ # Per-turn VERIFIED pending approvals. Lets the orchestrator present
198
+ # a pending approval for consent directly from injected context,
199
+ # WITHOUT dispatching a subagent to derive/verify it (that dispatch's
200
+ # SubagentStop caused a pending-revocation bug). Emits "" when there
201
+ # are no verified pendings, so a turn with nothing pending injects
202
+ # nothing -- this is what keeps the per-turn injection quiet, unlike
203
+ # the one-shot SessionStart summary it deliberately does not re-emit.
204
+ try:
205
+ from modules.session.session_manifest import (
206
+ build_per_turn_pending_approvals_block,
207
+ )
208
+ pending_block = build_per_turn_pending_approvals_block()
209
+ if pending_block:
210
+ context_parts.append(pending_block)
211
+ except Exception as _pa_exc:
212
+ logger.debug(
213
+ "per-turn pending approvals injection failed (non-fatal): %s",
214
+ _pa_exc,
215
+ )
216
+
197
217
  additional_context = "\n\n".join(context_parts)
198
218
  logger.info("Context injected: %s mode (%d chars)", mode, len(additional_context))
199
219
 
@@ -14,6 +14,20 @@ through the hook layer, to the orchestrator when a T3 command is blocked: the
14
14
  the status and event vocabularies, and how to confirm a grant is active. The
15
15
  tables below are the canonical schema -- relay them verbatim, do not author them.
16
16
 
17
+ The orchestrator presents this contract to the user from a **trusted source**,
18
+ never by dispatching a subagent to verify or derive it (it has no shell). The
19
+ primary source is the per-turn `[PENDING-APPROVALS-VERIFIED]` block injected at
20
+ `UserPromptSubmit` (`build_verified_pending_approvals` in
21
+ `hooks/modules/session/session_manifest.py`), which carries every pending that
22
+ has survived >= 1 turn, each already DB-read and fingerprint-verified
23
+ (`verified: true`). For a pending emitted in the current turn -- not yet in the
24
+ block -- the fallback is the subagent's relayed `approval_request`. The
25
+ **integrity boundary is grant activation**, not presentation:
26
+ `verify_fingerprint` (`gaia/approvals/chain.py`) runs when the user selects the
27
+ Approve label, so a tampered payload fails to form a grant regardless of how it
28
+ was presented. See `Skill('orchestrator-present-approval')` for the presentation
29
+ discipline.
30
+
17
31
  For the universal response envelope (`plan_status` states, `evidence_report`),
18
32
  see `agent-protocol`. For the deep mechanics -- fingerprint canonicalization,
19
33
  the hash chain, grant activation, reading a granted approval from Python -- see
@@ -21,10 +35,20 @@ the hash chain, grant activation, reading a granted approval from Python -- see
21
35
 
22
36
  ## approval_id format
23
37
 
38
+ For a **singular** T3 approval (the hook-block path),
24
39
  `store._generate_approval_id()` returns `P-{uuid4().hex}` (e.g.
25
- `P-b1bdfbb0b9474bf5b3f86b1f6a213f7a`). The `P-` prefix is mandatory: without it
26
- the PostToolUse hook cannot do targeted grant activation. The first 8 hex chars
27
- after `P-` are the nonce prefix shown in option labels: `[P-b1bdfbb0]`.
40
+ `P-b1bdfbb0b9474bf5b3f86b1f6a213f7a`) -- a random, unique id the subagent relays
41
+ verbatim. For a **plan-first `COMMAND_SET`** the id is instead **content-derived**
42
+ by `store.derive_command_set_id()`: `P-<first 32 hex of
43
+ sha256(canonical(command strings))>`. The two share the `P-` prefix and 32-hex
44
+ length but differ in origin -- the command_set id is deterministic (minted at
45
+ SubagentStop intake), and once the pending has survived a turn the orchestrator
46
+ reads that id directly from the injected `[PENDING-APPROVALS-VERIFIED]` block
47
+ (no derive-dispatch, no DB search); the singular id is random and the subagent
48
+ relays it directly for the same-turn case. The `P-` prefix is mandatory in both
49
+ cases: without it the PostToolUse
50
+ hook cannot do targeted grant activation. The first 8 hex chars after `P-` are
51
+ the nonce prefix shown in option labels: `[P-b1bdfbb0]`.
28
52
 
29
53
  ## APPROVAL_REQUEST contract shape
30
54
 
@@ -55,8 +79,11 @@ becomes `rollback` in the contract; `commands` (`[exact_content]`) and
55
79
  }
56
80
  ```
57
81
 
58
- There is no `batch_scope` field: the `verb_family` grant was removed, so each
59
- blocked command gets its own single-use grant. See
82
+ There is no `batch_scope` field: the `verb_family` grant was removed. For a
83
+ single blocked command, each gets its own single-use `SCOPE_SEMANTIC_SIGNATURE`
84
+ grant. For a batch of >= 2 T3 commands known up-front, emit a `command_set`
85
+ list and **no** `approval_id` -- the SubagentStop intake mints a single
86
+ `COMMAND_SET` grant (one consent covers all). See
60
87
  `Skill('orchestrator-present-approval')` for the orchestrator side.
61
88
 
62
89
  ## Status vocabularies -- distinct columns, opposite casing, never collapse
@@ -69,8 +96,8 @@ blocked command gets its own single-use grant. See
69
96
  ## Event chain
70
97
 
71
98
  The `approval_events.event_type` CHECK admits nine values: `REQUESTED` `SHOWN`
72
- `APPROVED` `REJECTED` `EXECUTED` `FAILED` `NOOP` `REVOKED` `REVERTED`. Only these
73
- are written by production code today:
99
+ `APPROVED` `REJECTED` `EXECUTED` `FAILED` `NOOP` `REVOKED` `REVERTED`. These are
100
+ written by production code today:
74
101
 
75
102
  | Event | Who writes it | When |
76
103
  |-------|--------------|------|
@@ -78,11 +105,16 @@ are written by production code today:
78
105
  | `SHOWN` | ElicitationResult hook via `activate_db_pending_by_prefix()` | User selects an Approve `[P-xxx]` label |
79
106
  | `APPROVED` | ElicitationResult hook (same call as `SHOWN`) | Immediately after `SHOWN` |
80
107
  | `REJECTED` / `REVOKED` | `gaia approvals` CLI via `store.reject()` / `store.revoke()` | User rejects or admin cancels |
108
+ | `EXECUTED` / `FAILED` | PostToolUse adapter (`_record_t3_outcome_event`) via `store.record_event()` | An approved T3 command runs under a consumed grant -- `EXECUTED` on clean exit, `FAILED` otherwise |
81
109
 
82
- `EXECUTED` `FAILED` `NOOP` `REVERTED` are valid in the CHECK and are *read* by
83
- `store.get_executed_payload()` and `revert.py`, but no production hook *writes*
84
- them today -- treat them as a designed extension point, not a live invariant. Do
85
- not assume an `EXECUTED` event exists after a command runs.
110
+ The PostToolUse path closes the audit cycle: PreToolUse stashes the consumed
111
+ grant's `approval_id` in `HookState`, and PostToolUse appends `EXECUTED` or
112
+ `FAILED` for that approval, continuing the hash chain through `record_event()`.
113
+ `store.get_executed_payload()` and `gaia approvals replay` read the `EXECUTED`
114
+ payload to re-present the commands that ran. `NOOP` and `REVERTED` remain valid
115
+ in the CHECK but are **inert** -- no production code writes them (the revert
116
+ feature was removed). Do not assume an `EXECUTED` event exists for an approval
117
+ whose command never ran, or that ran through the redirect-sanitized path.
86
118
 
87
119
  ## Key invariants
88
120
 
@@ -90,9 +122,13 @@ not assume an `EXECUTED` event exists after a command runs.
90
122
  - `SHOWN` precedes `APPROVED`; the activation path writes them together.
91
123
  - `approval_events` is append-only -- the `bu_approval_events_immutable` and
92
124
  `bd_approval_events_immutable` triggers `RAISE(ABORT)` on UPDATE/DELETE.
93
- - The orchestrator MUST re-verify a relayed payload via
94
- `chain.verify_fingerprint(approval_id, payload_json, con)` before presenting;
95
- a mismatch raises `ChainTamperError` and the approval aborts.
125
+ - The payload's integrity is enforced at grant **activation**, not at
126
+ presentation: `chain.verify_fingerprint(approval_id, payload_json, con)` runs
127
+ when the user selects the Approve label, and a mismatch raises
128
+ `ChainTamperError` so the grant never forms. The orchestrator presents from a
129
+ trusted source (the injected `[PENDING-APPROVALS-VERIFIED]` block, already
130
+ fingerprint-verified by the hook; or a same-turn relayed `approval_request`)
131
+ and never dispatches a subagent to verify or derive the approval.
96
132
 
97
133
  For the grant activation walk-through, fingerprint internals, reading a granted
98
134
  approval from Python, and the retry-blocked-again diagnosis, see `reference.md`.
@@ -12,12 +12,17 @@ canonical string. `store.insert_requested()` stores both the canonical JSON
12
12
  (`payload_json`) and the hex fingerprint on the `approvals` row and on the
13
13
  `REQUESTED` event.
14
14
 
15
- The orchestrator MUST re-verify via
16
- `chain.verify_fingerprint(approval_id, payload_json, con)` before presenting.
17
- That function re-parses and re-canonicalizes the relayed `payload_json`,
18
- recomputes the fingerprint, and compares it against the fingerprint stored on
19
- the `REQUESTED` event. A mismatch raises `ChainTamperError` and the approval
20
- aborts -- this is a security boundary, not a recoverable UX issue.
15
+ The fingerprint is verified at grant **activation**, not at presentation.
16
+ `chain.verify_fingerprint(approval_id, payload_json, con)` re-parses and
17
+ re-canonicalizes the payload, recomputes the fingerprint, and compares it
18
+ against the fingerprint stored on the `REQUESTED` event; a mismatch raises
19
+ `ChainTamperError` and the grant never forms -- a security boundary, not a
20
+ recoverable UX issue. The per-turn `[PENDING-APPROVALS-VERIFIED]` builder
21
+ (`build_verified_pending_approvals`) applies the same check when assembling the
22
+ injected block, so only fingerprint-clean pendings reach the orchestrator marked
23
+ `verified: true`. The orchestrator therefore presents from that already-verified
24
+ block (or a same-turn relayed `approval_request`) and never dispatches to verify
25
+ the payload itself.
21
26
 
22
27
  ## Hash chain
23
28
 
@@ -27,9 +32,11 @@ Each event links to the previous via `prev_hash` -> `this_hash`
27
32
  Because `approval_events` is append-only (UPDATE/DELETE blocked by the
28
33
  `bu_approval_events_immutable` and `bd_approval_events_immutable` triggers),
29
34
  `this_hash` is computed in the application layer before INSERT, inside
30
- `chain.insert_event()` -- not by a DB trigger. `REVERTED` events, when written,
31
- carry the original `event_id` in `metadata_json` per the revert design (D14);
32
- see `gaia/approvals/revert.py`.
35
+ `chain.insert_event()` -- not by a DB trigger. `EXECUTED` / `FAILED` events,
36
+ appended by the PostToolUse adapter through `store.record_event()` after an
37
+ approved T3 command runs, extend the same chain. `REVERTED` remains a valid
38
+ CHECK value but is **inert** -- the revert feature was removed, so no code
39
+ writes it.
33
40
 
34
41
  ## Grant activation walk-through
35
42
 
@@ -330,4 +330,15 @@ The agent discovered a project fact a section it owns did not yet hold, and writ
330
330
 
331
331
  ## Notes on multi-command APPROVAL_REQUEST sweeps
332
332
 
333
- There is no batch/multi-use grant in the current code: the legacy `verb_family` grant was removed (`hooks/modules/security/approval_grants.py`) and its `COMMAND_SET` replacement has no production activation path yet. Do **not** emit a `batch_scope` field -- it is ignored. When one intent expands into many T3 commands, each blocked command produces its own single-use approval; emit one `APPROVAL_REQUEST` per blocked command (shape identical to example 4 above) and let the user approve each.
333
+ **Just-in-time (unknown batch):** when T3 commands appear one at a time as the
334
+ agent works, each blocked command produces its own `APPROVAL_REQUEST` with an
335
+ `approval_id` (shape identical to example 4 above). Do not emit `batch_scope`
336
+ -- it is ignored.
337
+
338
+ **Plan-first (known batch):** when the agent knows >= 2 T3 commands up-front,
339
+ emit ONE `APPROVAL_REQUEST` carrying a `command_set` list of `{command,
340
+ rationale}` items and **no** `approval_id`. The SubagentStop intake
341
+ (`handoff_persister._intake_command_set_pending`) mints a single `COMMAND_SET`
342
+ approval; the orchestrator presents it as one consent covering all N commands.
343
+ Each command then runs on its own retry, byte-for-byte matched and consumed
344
+ individually.
@@ -109,7 +109,7 @@ The package ships a single `gaia` binary (`bin/gaia.js`) that dispatches to Pyth
109
109
  | `gaia memory` | `bin/cli/memory.py` | Episodic memory: FTS5 search, show episode, health checks |
110
110
  | `gaia metrics` | `bin/cli/metrics.py` | Usage analytics: tier classification, agent invocations, anomaly counters |
111
111
  | `gaia paths` | `bin/cli/paths.py` | Inspect canonical Gaia storage paths (DB, plugin root, workspace) |
112
- | `gaia plans` | `bin/cli/plans.py` | List and display briefs/plans with status info |
112
+ | `gaia plan` | `bin/cli/plan.py` | Manage plans (one per brief, DB-canonical): save, show, list, status |
113
113
  | `gaia workspace` | `bin/cli/workspace.py` | Workspace identity and consolidate operations |
114
114
  | `gaia scan` | `bin/cli/scan.py` | In-process project scan: detect stack, sync results to ~/.gaia/gaia.db (DB-canonical; no project-context.json written) |
115
115
  | `gaia status` | `bin/cli/status.py` | Quick installation snapshot: version, mode, DB path, registered workspace, last scan |
@@ -289,7 +289,7 @@ After `npm install -g @jaguilar87/gaia` (or via the local symlink) the dispatche
289
289
  | `gaia history` | Session history viewer | Debugging past sessions |
290
290
  | `gaia memory` | Episodic memory inspect/search | Recall past episodes, memory health |
291
291
  | `gaia approvals` | List/accept/reject pending T3 approvals | Approval workflow |
292
- | `gaia brief` / `gaia plans` | Brief and plan management against the DB substrate | Planning, brief lifecycle |
292
+ | `gaia brief` / `gaia plan` | Brief and plan management against the DB substrate | Planning, brief lifecycle |
293
293
  | `gaia context` | Display and refresh project context | Audit context state |
294
294
  | `gaia paths` | Print resolved storage paths | Path debugging |
295
295
  | `gaia workspace` | Workspace identity and consolidate operations | Multi-workspace setups |
@@ -15,11 +15,13 @@ names the specific action. No exceptions. No brevity shortcuts.
15
15
  ```
16
16
 
17
17
  `orchestrator-present-approval` is the discipline the orchestrator follows when
18
- a subagent emits `APPROVAL_REQUEST` with an `approval_id`: relay the
19
- `sealed_payload` into AskUserQuestion -- fingerprint check, mandatory fields in
20
- the question, mandatory nonce in the option label. For the subagent side that
21
- produced the payload see `subagent-request-approval`; for the data contract
22
- itself see `agent-approval-protocol`.
18
+ an approval needs the user's consent: relay the sealed fields into
19
+ AskUserQuestion -- mandatory fields in the question, mandatory nonce in the
20
+ option label. The orchestrator has no shell, so it never dispatches a subagent
21
+ to derive or verify an approval; it presents from a trusted source it already
22
+ holds. For the subagent side that produced the payload see
23
+ `subagent-request-approval`; for the data contract itself see
24
+ `agent-approval-protocol`.
23
25
 
24
26
  ## Mental Model
25
27
 
@@ -27,19 +29,53 @@ The orchestrator sits between the subagent and the user. The user cannot make
27
29
  an informed decision on data they have not seen -- a summary, a reference to
28
30
  "the plan above", or an offer to show details on request all push the decision
29
31
  without the data needed to decide. The job is **verbatim relay, not
30
- re-authoring**: rewriting any of the 7 sealed fields breaks the fingerprint and
31
- `verify_fingerprint` (`gaia/approvals/chain.py`) raises `ChainTamperError`.
32
-
33
- ## Step 0 -- Verify the approval against the DB (mandatory before SHOWN)
34
-
35
- A subagent's reported `approval_id` is an unverified claim, not a fact. The agent runs in its own context and can relay an id that is stale, from another session, or simply wrong -- and a stale id presented as a fresh block walks the user into consenting to nothing real (or to a grant that no longer exists). The DB is the source of truth; the agent's report is a pointer into it that you must resolve, never the authority itself.
36
-
37
- So before AskUserQuestion, two checks against the DB, in order:
38
-
39
- 1. **The approval exists, is fresh, and is from the current session.** Query `gaia approvals pending --session "$CLAUDE_SESSION_ID"` (or `--json` for parsing). The reported `approval_id` MUST appear in that result. If it appears only under `--all-sessions` but not the current session, it is leakage from another session (a test session such as `e2e-sim`, a prior run) -- **do not present**. If it does not appear at all, it does not exist or was already consumed/rejected -- **do not present**. Freshness is the `created_at` of the pending row plus its presence as still-`pending`; an id the agent reports that is not currently pending in *this* session is not a fresh block, whatever the agent says.
40
- 2. **The payload is untampered.** Call `verify_fingerprint(approval_id, payload_json, con) -> bool` from `gaia/approvals/chain.py`. It raises `ChainTamperError` if the payload was modified between subagent emission and your relay (security boundary, do not present), and `ValueError` if no REQUESTED event exists for this `approval_id`. Either case: **do not present**, report the failure, stop.
41
-
42
- **For a `command_set` (plan-first batch) the agent does not know the id at all.** The hook mints the `approval_id` at SubagentStop (`_intake_command_set_pending` -- see Rule 3); the subagent emits the `command_set` with **no** `approval_id`. So you do not have an agent-reported id to trust even if you wanted to -- you ALWAYS recover the freshly minted id from `gaia approvals pending` for the current session. This is the general shape made unavoidable: the DB mints, the orchestrator recovers, the agent never owns the id.
32
+ re-authoring**: rewriting any of the sealed fields would change the consent
33
+ surface from what was recorded. Integrity of the payload is enforced at grant
34
+ **activation** (`verify_fingerprint` in `gaia/approvals/chain.py`, called when
35
+ the user selects the Approve label), not at presentation -- so presentation
36
+ itself never needs a verify-dispatch.
37
+
38
+ ## Step 0 -- Present from a trusted source; never dispatch to verify or derive
39
+
40
+ The orchestrator has no shell. It MUST NOT dispatch a subagent solely to derive
41
+ or verify an approval before presenting -- that dispatch is both unnecessary
42
+ (the integrity check runs at activation, below) and harmful (its SubagentStop
43
+ can sweep the very pending being verified). Instead, present from one of two
44
+ **trusted** sources:
45
+
46
+ 1. **Primary -- the injected `[PENDING-APPROVALS-VERIFIED]` block.** A per-turn
47
+ hook (`hooks/modules/session/session_manifest.py`) injects, on every
48
+ `UserPromptSubmit`, every pending that has survived >= 1 turn. Each row in
49
+ that block has already been DB-read and fingerprint-verified by the hook
50
+ (`build_verified_pending_approvals` -- only rows whose payload re-canonicalizes
51
+ to the fingerprint stored on their `REQUESTED` event appear, each marked
52
+ `verified: true`). **Present directly from this block** -- the fields, the
53
+ full `approval_id`, and (for batches) the whole `command_set` with its minted
54
+ id are all there. No DB query, no `derive-id`, no dispatch.
55
+ 2. **Fallback -- same-turn relay.** A pending a subagent emits during the
56
+ CURRENT turn will not be in this turn's block yet: the block is built at
57
+ `UserPromptSubmit`, before the subagent ran. For that case present from the
58
+ subagent's relayed `approval_request`. This is justified because the pending
59
+ was freshly minted in THIS session by a trusted dispatch, AND integrity is
60
+ enforced at grant **activation** (`verify_fingerprint` fires when the user
61
+ selects the Approve label), not at presentation. The old pre-presentation
62
+ verify was redundant belt-and-suspenders; it is removed.
63
+
64
+ Once the pending survives a turn it appears in the injected block, so the relay
65
+ is only ever needed for the same-turn case.
66
+
67
+ **For a `command_set` (plan-first batch) you do not derive the id -- you read it
68
+ from the block.** The hook mints the `approval_id` at SubagentStop
69
+ (`_intake_command_set_pending` -- see Rule 3) from the **content** of the
70
+ command_set (`derive_command_set_id` in `gaia/approvals/store.py`,
71
+ `P-<first 32 hex of sha256(canonical(command list))>`). Once that pending has
72
+ survived a turn, the `[PENDING-APPROVALS-VERIFIED]` block carries it with its
73
+ minted `approval_id` and all N commands already attached -- so you read the id
74
+ and the commands straight from the block. **No `gaia approvals derive-id`
75
+ dispatch is needed.** For a command_set emitted in the CURRENT turn (not yet in
76
+ the block), present from the subagent's relayed `approval_request`, which carries
77
+ the same `command_set`; the content-derived id reaches you when the pending
78
+ appears in the next turn's block.
43
79
 
44
80
  ## Mandatory presentation -- 5 labeled fields + nonce-suffixed label
45
81
 
@@ -66,7 +102,13 @@ whose `id` starts with `P-{prefix}`. Without the suffix no grant is created.
66
102
  See `template.md` for the canonical layout and `reference.md` -> "GOOD vs BAD
67
103
  Examples" for full presentations.
68
104
 
69
- Fields above are extracted from the DB-stored canonical payload (`payload_json` on the REQUESTED row), not from the subagent's relayed `approval_request` — that's why `rollback_hint` is the field name here while the subagent contract uses `rollback`.
105
+ Fields above are extracted from your trusted source. From the injected
106
+ `[PENDING-APPROVALS-VERIFIED]` block (the primary path) they appear under the
107
+ canonical names shown here (`operation`, `exact_content`, `scope`, `risk_level`,
108
+ `rationale`, `rollback_hint`). From a same-turn relayed `approval_request` (the
109
+ fallback) the rollback field arrives under the key `rollback` -- map it to
110
+ ROLLBACK the same way. Either way you copy values verbatim; you do not re-author
111
+ them.
70
112
 
71
113
  ## Rules
72
114
 
@@ -84,7 +126,12 @@ Fields above are extracted from the DB-stored canonical payload (`payload_json`
84
126
  `APPROVAL_REQUEST` carrying a `command_set` of >= 2 `{command, rationale}`
85
127
  items and **no** `approval_id`, the SubagentStop processor
86
128
  (`handoff_persister._intake_command_set_pending`) mints ONE pending
87
- `COMMAND_SET` with one `approval_id`. You present that single approval: list
129
+ `COMMAND_SET` with one content-derived `approval_id`. Once that pending has
130
+ survived a turn it appears in the injected `[PENDING-APPROVALS-VERIFIED]`
131
+ block with its minted `approval_id` and all N commands -- **read the id and
132
+ commands from the block; do not dispatch `gaia approvals derive-id`.** (A
133
+ command_set emitted in the current turn is presented from the subagent's
134
+ relayed `approval_request`.) You present that single approval: list
88
135
  **all N commands** in the question body, but use **one** Approve label with
89
136
  **one** `[P-{nonce8}]` suffix -- one consent covers the whole batch. On
90
137
  approval, `activate_db_pending_by_prefix` Step 3b creates a single
@@ -120,5 +167,5 @@ wording, see `reference.md` -> "GOOD vs BAD Examples", "Option Label Patterns",
120
167
  | "Similar command, slightly different path -- I'll reuse / wrap it" | Grants match the statement signature byte-for-byte. Any wrapper, redirect, flag, or path drift is a different signature and a fresh re-block. |
121
168
  | "The same command emitted a new approval_id" | Grants are single-use and consumed on the first retry. A second run is a new APPROVAL_REQUEST -- approve again. |
122
169
  | "I'll set batch_scope to approve many at once" | `batch_scope` is ignored -- but a real batch path exists: a plan-first `command_set` (>= 2 items, no `approval_id`) is intaken into ONE pending `COMMAND_SET`. Present that single approval (N commands shown, one `[P-...]` nonce, one consent), not N separate approvals. |
123
- | "I can paraphrase a field before relaying" | The fingerprint covers all 7 sealed fields; any modification raises `ChainTamperError` in Step 0 and the presentation is refused. |
124
- | **"The agent reported an `approval_id`, so it's a real fresh block"** -- trusting a nonce relayed by the subagent | The agent's reported id is an unverified pointer, not a fact. It can be stale or belong to another session -- subagents have presented a STALE nonce from a test session (`e2e-sim`) as if it were a fresh block. Resolve every reported id against `gaia approvals pending --session "$CLAUDE_SESSION_ID"` (Step 0): it must be currently pending in *this* session. Visible only under `--all-sessions`, or absent entirely, means do not present. For `command_set` the hook mints the id and the agent never has one -- you always recover it from the DB. |
170
+ | "I can paraphrase a field before relaying" | The fingerprint covers all sealed fields and is checked at grant **activation** (`verify_fingerprint`, when the user selects the Approve label); a paraphrase there raises `ChainTamperError` and the grant never forms. Relay verbatim so activation succeeds. |
171
+ | **"I'll dispatch a subagent to verify or derive the approval before presenting"** | The orchestrator has no shell and must NEVER dispatch to verify or derive an approval. The pending arrives **already verified** in the injected `[PENDING-APPROVALS-VERIFIED]` block (DB-read + fingerprint-checked by the per-turn hook, `verified: true`) -- present from it. For a same-turn pending not yet in the block, present from the subagent's relayed `approval_request`. A verify/derive dispatch is unnecessary (integrity is enforced at activation) and harmful (its SubagentStop can sweep the very pending). For `command_set`, read the minted `approval_id` and all commands from the block -- do not run `gaia approvals derive-id`. |
@@ -151,6 +151,16 @@ commands** in the question body, with **one** Approve label carrying **one**
151
151
  `[P-{nonce8}]` suffix. The user gives one consent; each command then runs on its
152
152
  own retry within the 60-minute window. You do NOT issue N separate approvals.
153
153
 
154
+ **Reading the batch id and commands -- from the block, not by dispatch.** Once
155
+ the minted `COMMAND_SET` pending has survived a turn, it appears in the injected
156
+ `[PENDING-APPROVALS-VERIFIED]` block with its content-derived `approval_id` and
157
+ all N commands attached (`build_verified_pending_approvals` in
158
+ `hooks/modules/session/session_manifest.py`). Read the id and the commands
159
+ straight from that block -- the orchestrator has no shell and must NOT dispatch
160
+ `gaia approvals derive-id` or any verify command. For a command_set emitted in
161
+ the CURRENT turn (not yet in the block), present from the subagent's relayed
162
+ `approval_request`, which carries the same `command_set`.
163
+
154
164
  ## Grant Activation Mechanics
155
165
 
156
166
  When the hook blocks a T3 Bash command in subagent context,
@@ -161,9 +171,12 @@ generates a `P-{uuid4_hex}` `approval_id`, fingerprints the payload, inserts an
161
171
  message ends with `approval_id: P-{...}` (`build_t3_blocked_denial_message` in
162
172
  `hooks/modules/security/approval_messages.py`).
163
173
 
164
- The subagent relays that `approval_id` in its `approval_request`. The
165
- orchestrator presents via AskUserQuestion with the `[P-xxxxxxxx]` label. When
166
- the user selects the Approve label, the **ElicitationResult hook**
174
+ The orchestrator presents via AskUserQuestion with the `[P-xxxxxxxx]` label,
175
+ reading the `approval_id` and fields from the injected
176
+ `[PENDING-APPROVALS-VERIFIED]` block (primary) or, for a same-turn pending not
177
+ yet in the block, from the subagent's relayed `approval_request` (fallback). It
178
+ does not dispatch to verify or derive. When the user selects the Approve label,
179
+ the **ElicitationResult hook**
167
180
  (`hooks/elicitation_result.py`) fires and calls
168
181
  `activate_db_pending_by_prefix()`, which:
169
182
 
@@ -1,8 +1,12 @@
1
1
  # AskUserQuestion Template
2
2
 
3
3
  Use this layout verbatim when presenting an approval to the user. Replace
4
- `{...}` placeholders with values extracted from the subagent's `sealed_payload`
5
- and `approval_request`. Do not paraphrase, summarize, or omit any field.
4
+ `{...}` placeholders with values read from your trusted source -- the injected
5
+ `[PENDING-APPROVALS-VERIFIED]` block (primary; already DB-read and
6
+ fingerprint-verified by the per-turn hook) or, for a same-turn pending not yet
7
+ in the block, the subagent's relayed `approval_request` (fallback). Never
8
+ dispatch a subagent to derive or verify the approval. Do not paraphrase,
9
+ summarize, or omit any field.
6
10
 
7
11
  ## Standard Approval (single command)
8
12
 
@@ -23,19 +27,21 @@ AskUserQuestion(
23
27
  )
24
28
  ```
25
29
 
26
- Where `approval_id_prefix8` is the first 8 characters of the `approval_id`
27
- field from the subagent's `approval_request` (after the `P-` prefix).
30
+ Where `approval_id_prefix8` is the first 8 characters (after the `P-` prefix) of
31
+ the `approval_id` read from the `[PENDING-APPROVALS-VERIFIED]` block, or from the
32
+ subagent's `approval_request` for a same-turn pending.
28
33
 
29
- ## No batch template
34
+ ## Batch template (COMMAND_SET)
30
35
 
31
- There is no batch/multi-use approval in the current code. The `verb_family` grant
32
- was removed (see the module docstring of
33
- `hooks/modules/security/approval_grants.py`) and the `COMMAND_SET` replacement
34
- has no production activation path (`create_command_set_grant` has no production
35
- caller). The word "batch" in a label and a `batch_scope` field are both ignored.
36
- For a sweep of N commands, present each command with its own single-command
37
- approval (the template above), once per `approval_id`. See `reference.md` ->
38
- "On batch intents".
36
+ When the subagent emits a plan-first `APPROVAL_REQUEST` with a `command_set`
37
+ of >= 2 `{command, rationale}` items and **no** `approval_id`, the
38
+ SubagentStop intake mints ONE pending `COMMAND_SET` approval. Present it as
39
+ a single approval: list all N commands in the question body, one Approve
40
+ label with one `[P-{nonce8}]` suffix. See `reference.md` -> "On batch
41
+ intents" for the full layout.
42
+
43
+ A `batch_scope` field and the word "batch" in an option label are both
44
+ ignored -- the signal is the presence of `command_set` in the contract.
39
45
 
40
46
  ## Field Extraction Reference
41
47
 
@@ -46,4 +52,4 @@ approval (the template above), once per `approval_id`. See `reference.md` ->
46
52
  | SCOPE | `sealed_payload.scope` |
47
53
  | RIESGO | `sealed_payload.risk_level` + `sealed_payload.rationale` |
48
54
  | ROLLBACK | `sealed_payload.rollback_hint` (null -> "NOT REVERSIBLE") |
49
- | Option nonce suffix | `approval_request.approval_id` first 8 chars after `P-` |
55
+ | Option nonce suffix | `approval_id` first 8 chars after `P-` (from the `[PENDING-APPROVALS-VERIFIED]` block, or `approval_request.approval_id` for a same-turn pending) |
@@ -37,7 +37,7 @@ report "rejected" when nothing actually changed.
37
37
  | `gaia approvals list` | DB grants + filesystem pendings | `cmd_list` (mixed) |
38
38
  | `gaia approvals reject NONCE` | filesystem only | `reject_pending` in `hooks/modules/security/approval_grants.py` |
39
39
  | `gaia approvals reject-all` | filesystem only | loops `reject_pending` |
40
- | `gaia approvals clean` | filesystem only | `cleanup_expired_grants` |
40
+ | `gaia approvals clean` | DB (cross-session stale pendings) + filesystem | `cmd_clean` in `bin/cli/approvals.py`: calls `store.list_pending(all_sessions=True)`, transitions every pending older than `DEFAULT_PENDING_TTL_MINUTES` (24 h) to `revoked` via `store.revoke()`, then calls `cleanup_expired_grants` for filesystem files |
41
41
 
42
42
  The practical consequence: `revoke` is the DB-aware single-id verb; `reject` and
43
43
  `reject-all` only touch the legacy filesystem queue. If you need to mark a DB
@@ -105,15 +105,19 @@ Offer bulk cleanup when the user says "limpia todos los pendings", "borra los
105
105
  pendientes", or when SessionStart surfaces 5+ orphaned pendings the user has
106
106
  not engaged with.
107
107
 
108
- - `gaia approvals reject-all` -- bulk reject across the **filesystem** queue.
109
- Returns "0 rejected" when the queue is empty.
110
- - `gaia approvals clean` -- removes expired/stale **filesystem** files.
108
+ - `gaia approvals reject-all` -- bulk soft-reject across the **filesystem** queue.
109
+ Returns "0 rejected" when the queue is empty. Does not touch DB rows.
110
+ - `gaia approvals clean` -- the first-class cross-session bulk drain for stale
111
+ DB pendings: `cmd_clean` calls `store.list_pending(all_sessions=True)` and
112
+ transitions every pending older than 24 h (`DEFAULT_PENDING_TTL_MINUTES`) to
113
+ `revoked` via `store.revoke()`, then runs `cleanup_expired_grants` to clean
114
+ expired filesystem grant files. Runs without a T3 prompt (consent-reducing,
115
+ listed in `CONSENT_REDUCING_SUBCOMMAND_EXCEPTIONS`). Use this when
116
+ `gaia approvals pending --all-sessions` shows a backlog of stale rows.
111
117
 
112
- There is no first-class bulk-revoke for the DB queue. If `gaia approvals
113
- pending --all-sessions` shows rows that need clearing, either revoke each by id
114
- or call `store.revoke()` in a short Python loop. Do not report "bulk cleanup
115
- done" after `reject-all` if the DB queue still has pending rows -- check
116
- `gaia approvals pending --all-sessions` to confirm.
118
+ Do not report "bulk cleanup done" after `reject-all` alone -- it only clears
119
+ the filesystem queue. Run `gaia approvals clean` to drain the DB backlog, then
120
+ confirm with `gaia approvals pending --all-sessions`.
117
121
 
118
122
  Do not offer `reject-all` when there are active same-session pendings the user
119
123
  may still want to approve.
@@ -123,8 +127,9 @@ may still want to approve.
123
127
  - Approving without showing the exact COMANDO -- the user consents on the
124
128
  verbatim string, not a summary. The full presentation discipline lives in
125
129
  `orchestrator-present-approval`; this skill does not restate it.
126
- - Treating `gaia approvals reject-all` as a DB cleanup -- it operates on the
127
- filesystem queue only. DB rows survive the call.
130
+ - Treating `gaia approvals reject-all` as a full cleanup -- it operates on the
131
+ filesystem queue only; DB rows survive the call. Use `gaia approvals clean`
132
+ to drain the DB backlog.
128
133
  - Reporting "rechazado" without verifying the store -- `revoke` returns
129
134
  `not_found` for filesystem-only pendings; the inverse happens for `reject` on
130
135
  DB rows. Pick the verb by store, or be ready to fall back.