@jaguilar87/gaia 5.0.2 → 5.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/ARCHITECTURE.md +0 -1
  4. package/CHANGELOG.md +110 -0
  5. package/INSTALL.md +0 -2
  6. package/README.md +1 -6
  7. package/bin/README.md +0 -1
  8. package/bin/cli/_install_helpers.py +1 -1
  9. package/bin/cli/approvals.py +23 -21
  10. package/bin/cli/cleanup.py +0 -1
  11. package/bin/cli/doctor.py +1 -1
  12. package/bin/cli/memory.py +2 -0
  13. package/bin/cli/update.py +1 -1
  14. package/bin/pre-publish-validate.js +48 -5
  15. package/config/README.md +22 -44
  16. package/config/surface-routing.json +0 -2
  17. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  18. package/dist/gaia-ops/config/README.md +22 -44
  19. package/dist/gaia-ops/config/surface-routing.json +0 -2
  20. package/dist/gaia-ops/hooks/modules/agents/contract_validator.py +18 -0
  21. package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +214 -2
  22. package/dist/gaia-ops/hooks/modules/agents/response_contract.py +26 -0
  23. package/dist/gaia-ops/hooks/modules/agents/transcript_reader.py +15 -0
  24. package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -5
  25. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +124 -19
  26. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +99 -7
  27. package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +127 -24
  28. package/dist/gaia-ops/hooks/modules/validation/commit_validator.py +90 -55
  29. package/dist/gaia-ops/skills/README.md +1 -1
  30. package/dist/gaia-ops/skills/agent-contract-handoff/SKILL.md +3 -0
  31. package/dist/gaia-ops/skills/agent-response/SKILL.md +4 -2
  32. package/dist/gaia-ops/skills/gaia-patterns/SKILL.md +1 -1
  33. package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -3
  34. package/dist/gaia-ops/skills/gaia-release/SKILL.md +60 -24
  35. package/dist/gaia-ops/skills/gaia-release/reference.md +35 -11
  36. package/dist/gaia-ops/skills/git-conventions/SKILL.md +6 -2
  37. package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +30 -7
  38. package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +32 -15
  39. package/dist/gaia-ops/skills/readme-writing/SKILL.md +1 -1
  40. package/dist/gaia-ops/skills/readme-writing/reference.md +0 -1
  41. package/dist/gaia-ops/skills/security-tiers/SKILL.md +5 -1
  42. package/dist/gaia-ops/skills/security-tiers/reference.md +3 -1
  43. package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +43 -6
  44. package/dist/gaia-ops/skills/subagent-request-approval/reference.md +66 -16
  45. package/dist/gaia-ops/tools/context/README.md +1 -1
  46. package/dist/gaia-ops/tools/gaia_simulator/extractor.py +0 -1
  47. package/dist/gaia-ops/tools/scan/ui.py +20 -4
  48. package/dist/gaia-ops/tools/scan/verify.py +3 -3
  49. package/dist/gaia-ops/tools/validation/README.md +15 -24
  50. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  51. package/dist/gaia-security/hooks/modules/agents/contract_validator.py +18 -0
  52. package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +214 -2
  53. package/dist/gaia-security/hooks/modules/agents/response_contract.py +26 -0
  54. package/dist/gaia-security/hooks/modules/agents/transcript_reader.py +15 -0
  55. package/dist/gaia-security/hooks/modules/security/__init__.py +0 -5
  56. package/dist/gaia-security/hooks/modules/security/approval_grants.py +124 -19
  57. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +99 -7
  58. package/dist/gaia-security/hooks/modules/tools/bash_validator.py +127 -24
  59. package/dist/gaia-security/hooks/modules/validation/commit_validator.py +90 -55
  60. package/gaia/state/transitions.py +4 -4
  61. package/gaia/store/writer.py +56 -0
  62. package/hooks/modules/README.md +2 -4
  63. package/hooks/modules/agents/contract_validator.py +18 -0
  64. package/hooks/modules/agents/handoff_persister.py +214 -2
  65. package/hooks/modules/agents/response_contract.py +26 -0
  66. package/hooks/modules/agents/transcript_reader.py +15 -0
  67. package/hooks/modules/security/__init__.py +0 -5
  68. package/hooks/modules/security/approval_grants.py +124 -19
  69. package/hooks/modules/security/mutative_verbs.py +99 -7
  70. package/hooks/modules/tools/bash_validator.py +127 -24
  71. package/hooks/modules/validation/commit_validator.py +90 -55
  72. package/index.js +2 -12
  73. package/package.json +4 -6
  74. package/pyproject.toml +3 -3
  75. package/scripts/bootstrap_database.sh +88 -439
  76. package/scripts/check_schema_drift.py +208 -0
  77. package/scripts/migrations/README.md +78 -28
  78. package/scripts/migrations/schema.checksum +8 -0
  79. package/scripts/release-prepare.mjs +199 -0
  80. package/skills/README.md +1 -1
  81. package/skills/agent-contract-handoff/SKILL.md +3 -0
  82. package/skills/agent-response/SKILL.md +4 -2
  83. package/skills/gaia-patterns/SKILL.md +1 -1
  84. package/skills/gaia-patterns/reference.md +2 -3
  85. package/skills/gaia-release/SKILL.md +60 -24
  86. package/skills/gaia-release/reference.md +35 -11
  87. package/skills/git-conventions/SKILL.md +6 -2
  88. package/skills/orchestrator-present-approval/SKILL.md +30 -7
  89. package/skills/orchestrator-present-approval/reference.md +32 -15
  90. package/skills/readme-writing/SKILL.md +1 -1
  91. package/skills/readme-writing/reference.md +0 -1
  92. package/skills/security-tiers/SKILL.md +5 -1
  93. package/skills/security-tiers/reference.md +3 -1
  94. package/skills/subagent-request-approval/SKILL.md +43 -6
  95. package/skills/subagent-request-approval/reference.md +66 -16
  96. package/tools/context/README.md +1 -1
  97. package/tools/gaia_simulator/extractor.py +0 -1
  98. package/tools/scan/ui.py +20 -4
  99. package/tools/scan/verify.py +3 -3
  100. package/tools/validation/README.md +15 -24
  101. package/commands/README.md +0 -64
  102. package/commands/gaia.md +0 -37
  103. package/commands/scan-project.md +0 -74
  104. package/config/crons-schema.md +0 -81
  105. package/config/git_standards.json +0 -72
  106. package/dist/gaia-ops/commands/gaia.md +0 -37
  107. package/dist/gaia-ops/config/crons-schema.md +0 -81
  108. package/dist/gaia-ops/config/git_standards.json +0 -72
  109. package/dist/gaia-ops/hooks/modules/security/gitops_validator.py +0 -179
  110. package/dist/gaia-ops/tools/agentic-loop/decide-status.py +0 -210
  111. package/dist/gaia-ops/tools/agentic-loop/parse-metric.py +0 -106
  112. package/dist/gaia-ops/tools/agentic-loop/record-iteration.py +0 -223
  113. package/dist/gaia-security/hooks/modules/security/gitops_validator.py +0 -179
  114. package/git-hooks/commit-msg +0 -41
  115. package/hooks/modules/security/gitops_validator.py +0 -179
  116. package/scripts/migrations/v10_to_v11.sql +0 -170
  117. package/scripts/migrations/v10_to_v11_fresh.sql +0 -18
  118. package/scripts/migrations/v11_to_v12.sql +0 -195
  119. package/scripts/migrations/v11_to_v12_fresh.sql +0 -19
  120. package/scripts/migrations/v12_to_v13.sql +0 -48
  121. package/scripts/migrations/v12_to_v13_fresh.sql +0 -17
  122. package/scripts/migrations/v13_to_v14.sql +0 -44
  123. package/scripts/migrations/v13_to_v14_fresh.sql +0 -17
  124. package/scripts/migrations/v14_to_v15.sql +0 -71
  125. package/scripts/migrations/v14_to_v15_fresh.sql +0 -19
  126. package/scripts/migrations/v15_to_v16.sql +0 -57
  127. package/scripts/migrations/v15_to_v16_fresh.sql +0 -18
  128. package/scripts/migrations/v16_to_v17.sql +0 -51
  129. package/scripts/migrations/v16_to_v17_fresh.sql +0 -18
  130. package/scripts/migrations/v17_to_v18.sql +0 -66
  131. package/scripts/migrations/v17_to_v18_fresh.sql +0 -24
  132. package/scripts/migrations/v1_to_v2.sql +0 -97
  133. package/scripts/migrations/v2_to_v3.sql +0 -68
  134. package/scripts/migrations/v2_to_v3_merge.sql +0 -69
  135. package/scripts/migrations/v3_to_v4.sql +0 -67
  136. package/scripts/migrations/v3_to_v4_fresh.sql +0 -20
  137. package/scripts/migrations/v4_to_v5.sql +0 -55
  138. package/scripts/migrations/v4_to_v5_fresh.sql +0 -20
  139. package/scripts/migrations/v5_to_v6.sql +0 -48
  140. package/scripts/migrations/v5_to_v6_fresh.sql +0 -17
  141. package/scripts/migrations/v6_to_v7.sql +0 -26
  142. package/scripts/migrations/v6_to_v7_fresh.sql +0 -13
  143. package/scripts/migrations/v7_to_v8.sql +0 -44
  144. package/scripts/migrations/v7_to_v8_fresh.sql +0 -14
  145. package/scripts/migrations/v8_to_v9.sql +0 -87
  146. package/scripts/migrations/v8_to_v9_fresh.sql +0 -15
  147. package/scripts/migrations/v9_to_v10.sql +0 -109
  148. package/scripts/migrations/v9_to_v10_episodes_workspace.sql +0 -109
  149. package/scripts/migrations/v9_to_v10_fresh.sql +0 -18
  150. package/templates/README.md +0 -70
  151. package/templates/managed-settings.template.json +0 -43
  152. package/tools/agentic-loop/decide-status.py +0 -210
  153. package/tools/agentic-loop/parse-metric.py +0 -106
  154. package/tools/agentic-loop/record-iteration.py +0 -223
@@ -10,11 +10,187 @@ arise if the adapter imported _persist_handoff directly from subagent_stop
10
10
  (which itself imports from the adapter's dependency tree).
11
11
  """
12
12
 
13
+ from __future__ import annotations
14
+
13
15
  import logging
14
16
 
15
17
  logger = logging.getLogger(__name__)
16
18
 
17
19
 
20
+ def _normalize_command_set(raw) -> list:
21
+ """Coerce a raw ``command_set`` into the canonical ``[{command, rationale}]``.
22
+
23
+ Mirrors the normalization in ``bash_validator._build_sealed_payload`` and
24
+ ``approval_grants.activate_db_pending_by_prefix`` so the intake writes the
25
+ exact shape the activation/consume sides expect. Items without a non-empty
26
+ ``command`` are dropped; ``rationale`` defaults to "".
27
+ """
28
+ out: list = []
29
+ if isinstance(raw, list):
30
+ for item in raw:
31
+ if isinstance(item, dict) and item.get("command"):
32
+ out.append(
33
+ {
34
+ "command": item["command"],
35
+ "rationale": item.get("rationale", ""),
36
+ }
37
+ )
38
+ return out
39
+
40
+
41
+ def _filter_mutative_command_set(items: list) -> list:
42
+ """Keep only the command_set items whose command is mutative/T3.
43
+
44
+ The consume side (``bash_validator._validate_single_command``) gates the
45
+ whole COMMAND_SET match path on ``detect_mutative_command(command).is_mutative``:
46
+ a command that the matcher does not see as mutative NEVER reaches
47
+ ``match_command_set_grant`` and its index is therefore NEVER consumed. If
48
+ such a command is included in the grant's ``command_set``, ``len(consumed)``
49
+ can never reach ``len(command_set)`` and the grant is stuck PENDING forever
50
+ (it never flips to CONSUMED). To stay in lockstep with the consume gate, the
51
+ intake filters with the EXACT same predicate, dropping non-mutative commands
52
+ (e.g. ``touch``, ``ls``, ``cat``) before the grant is ever minted.
53
+
54
+ Items that fail to classify (import error, unexpected exception) are kept --
55
+ failing open here is safer than silently dropping a command from a consent
56
+ batch the user is about to approve.
57
+ """
58
+ try:
59
+ from modules.security.mutative_verbs import detect_mutative_command
60
+ except ImportError:
61
+ import pathlib as _pl
62
+ import sys as _sys
63
+
64
+ _hooks_root = _pl.Path(__file__).resolve().parent.parent.parent
65
+ _sys.path.insert(0, str(_hooks_root))
66
+ from modules.security.mutative_verbs import detect_mutative_command
67
+
68
+ kept: list = []
69
+ for item in items:
70
+ command = item.get("command", "")
71
+ try:
72
+ if detect_mutative_command(command).is_mutative:
73
+ kept.append(item)
74
+ except Exception:
75
+ # Fail open: if classification raises, keep the item rather than
76
+ # silently dropping a command from the user's consent batch.
77
+ kept.append(item)
78
+ return kept
79
+
80
+
81
+ def _intake_command_set_pending(
82
+ approval_req: dict,
83
+ *,
84
+ agent_id,
85
+ session_id: str,
86
+ ) -> str | None:
87
+ """INTAKE bridge: plan-first COMMAND_SET envelope -> ONE pending row.
88
+
89
+ When a subagent emits an ``APPROVAL_REQUEST`` whose ``approval_request``
90
+ carries a ``command_set`` of >= 2 ``{command, rationale}`` items and NO
91
+ ``approval_id`` (plan-first: the batch is declared up-front, before any
92
+ command was attempted/blocked), this persists exactly ONE pending approval
93
+ whose ``payload_json`` contains the ``command_set`` key. That is the signal
94
+ ``activate_db_pending_by_prefix`` reads (Step 3b) to branch into
95
+ ``create_command_set_grant`` on user approval.
96
+
97
+ Mutative filtering (Thread a): the command_set is first reduced to ONLY the
98
+ commands the consume side will treat as mutative/T3 -- see
99
+ ``_filter_mutative_command_set``. Non-mutative commands (``touch``, ``ls``,
100
+ ...) never reach the bash_validator matcher, so leaving them in the grant
101
+ would strand its ``consumed_indexes_json`` short of completion and pin the
102
+ grant at PENDING forever. After filtering:
103
+
104
+ * >= 2 mutative items -> mint the COMMAND_SET over exactly those items.
105
+ * exactly 1 mutative -> NOT a batch. Return None; the caller falls
106
+ through to the singular ``approval_id`` path and the lone command is
107
+ gated by the normal hook-block / SCOPE_SEMANTIC_SIGNATURE flow when the
108
+ agent attempts it. We deliberately do NOT degrade-to-singular here: this
109
+ function's contract is "mint a COMMAND_SET or stand aside", and the
110
+ singular flow is owned end-to-end by the hook block path -- minting a
111
+ singular row from here would duplicate that ownership.
112
+ * 0 mutative -> nothing to approve. Return None (no pending).
113
+
114
+ A raw ``command_set`` of <= 1 item is likewise not a batch and returns None
115
+ before filtering, preserving the original contract (never mint for one
116
+ command, never degrade a batch the other way) and the working plan-first
117
+ flow for genuine multi-command mutative batches.
118
+
119
+ Returns the minted ``approval_id`` (``P-{uuid4hex}``) on success, or None
120
+ when this is not a plan-first command_set envelope (no action taken).
121
+ """
122
+ if not isinstance(approval_req, dict):
123
+ return None
124
+ # Plan-first is defined by command_set present AND no approval_id. A request
125
+ # that already carries an approval_id was minted by the hook block path; it
126
+ # is the singular flow and must not be re-intaken here.
127
+ if approval_req.get("approval_id"):
128
+ return None
129
+
130
+ raw_items = _normalize_command_set(approval_req.get("command_set"))
131
+ if len(raw_items) < 2:
132
+ # 0 or 1 item: not a batch. Singular path owns it.
133
+ return None
134
+
135
+ # Reduce to the mutative/T3 commands only -- the exact predicate the consume
136
+ # side uses to decide whether a command reaches the COMMAND_SET matcher.
137
+ command_set_items = _filter_mutative_command_set(raw_items)
138
+ if len(command_set_items) < 2:
139
+ # After filtering there is no batch left: either every command was
140
+ # non-mutative (0 -> nothing to approve) or just one mutative command
141
+ # remained (1 -> singular path owns it). Either way, no COMMAND_SET.
142
+ logger.info(
143
+ "INTAKE: command_set not minted -- %d/%d items mutative after filter "
144
+ "(need >= 2 for a batch)",
145
+ len(command_set_items), len(raw_items),
146
+ )
147
+ return None
148
+
149
+ # Build a sealed_payload that mirrors bash_validator._build_sealed_payload's
150
+ # COMMAND_SET shape: command_set verbatim + commands listing every string.
151
+ # Carry through the subagent's operation/risk fields when present so the
152
+ # orchestrator's presentation has real values, falling back to neutral
153
+ # COMMAND_SET defaults otherwise.
154
+ first_command = command_set_items[0]["command"]
155
+ sealed_payload = {
156
+ "operation": approval_req.get("operation")
157
+ or f"COMMAND_SET intercepted: {len(command_set_items)} commands under one consent",
158
+ "exact_content": approval_req.get("exact_content") or first_command,
159
+ "scope": approval_req.get("scope")
160
+ or (first_command.split()[0] if first_command.strip() else "unknown"),
161
+ "risk_level": approval_req.get("risk_level") or "medium",
162
+ "rollback_hint": approval_req.get("rollback") or approval_req.get("rollback_hint"),
163
+ "rationale": approval_req.get("rationale")
164
+ or (
165
+ f"A batch of {len(command_set_items)} related T3 commands requires user "
166
+ "approval under one consent per the COMMAND_SET policy."
167
+ ),
168
+ "commands": [it["command"] for it in command_set_items],
169
+ "command_set": command_set_items,
170
+ }
171
+
172
+ try:
173
+ from gaia.approvals.store import insert_requested
174
+ except ImportError:
175
+ import pathlib as _pl
176
+ import sys as _sys
177
+
178
+ _repo_root = _pl.Path(__file__).resolve().parent.parent.parent.parent
179
+ _sys.path.insert(0, str(_repo_root))
180
+ from gaia.approvals.store import insert_requested
181
+
182
+ approval_id = insert_requested(
183
+ sealed_payload,
184
+ agent_id=agent_id,
185
+ session_id=session_id or None,
186
+ )
187
+ logger.info(
188
+ "INTAKE: plan-first COMMAND_SET pending created approval_id=%s items=%d",
189
+ (approval_id or "")[:16], len(command_set_items),
190
+ )
191
+ return approval_id
192
+
193
+
18
194
  def persist_handoff(
19
195
  parsed_contract,
20
196
  agent_output: str,
@@ -38,6 +214,38 @@ def persist_handoff(
38
214
  import pathlib as _pl
39
215
  import sys as _sys
40
216
 
217
+ agent_id = task_info.get("agent_id") or task_info.get("agent") or "unknown"
218
+
219
+ # ---------------------------------------------------------------------
220
+ # INTAKE bridge (plan-first COMMAND_SET) -- run FIRST and INDEPENDENTLY.
221
+ #
222
+ # Minting the pending COMMAND_SET approval is the security-critical path:
223
+ # it is the consent the user must act on. It must not be coupled to the
224
+ # audit handoff-row write below -- if insert_agent_contract_handoff fails
225
+ # for any reason, the user must still get the approval to review. So the
226
+ # intake runs in its own isolated try, before the handoff-row write.
227
+ #
228
+ # Only plan-first envelopes act here: command_set >= 2 items AND no
229
+ # approval_id. A <= 1 item set or a request that already carries an
230
+ # approval_id (hook-block / singular path) is a no-op for the intake.
231
+ # ---------------------------------------------------------------------
232
+ minted_command_set_id = None
233
+ if parsed_contract is not None:
234
+ _env = parsed_contract if isinstance(parsed_contract, dict) else {}
235
+ _approval_req = _env.get("approval_request")
236
+ if isinstance(_approval_req, dict):
237
+ try:
238
+ minted_command_set_id = _intake_command_set_pending(
239
+ _approval_req,
240
+ agent_id=agent_id,
241
+ session_id=session_id,
242
+ )
243
+ except Exception as _intake_exc:
244
+ logger.warning(
245
+ "M4: COMMAND_SET intake failed (non-blocking): %s",
246
+ _intake_exc,
247
+ )
248
+
41
249
  try:
42
250
  # Prefer a sibling gaia package if installed; fall back to the repo
43
251
  # layout where gaia/ lives two levels above hooks/.
@@ -48,7 +256,6 @@ def persist_handoff(
48
256
  _sys.path.insert(0, str(_repo_root))
49
257
  from gaia.store import writer as _writer
50
258
 
51
- agent_id = task_info.get("agent_id") or task_info.get("agent") or "unknown"
52
259
  workspace = task_info.get("workspace") or _os.environ.get("GAIA_WORKSPACE") or "global"
53
260
  db_path_str = task_info.get("db_path")
54
261
  db_path = _pl.Path(db_path_str) if db_path_str else None
@@ -99,7 +306,12 @@ def persist_handoff(
99
306
  envelope = parsed_contract if isinstance(parsed_contract, dict) else {}
100
307
  approval_req = envelope.get("approval_request")
101
308
  if approval_req and isinstance(approval_req, dict):
102
- approval_id = approval_req.get("approval_id")
309
+ # The approval_id is either the one the subagent relayed (hook-block
310
+ # / singular path) or the one the INTAKE bridge just minted for a
311
+ # plan-first COMMAND_SET. Either way it points at the pending row
312
+ # the handoff_approvals audit row should link to.
313
+ approval_id = approval_req.get("approval_id") or minted_command_set_id
314
+
103
315
  if approval_id:
104
316
  # Look up the grant to determine the decision at stop time.
105
317
  try:
@@ -402,6 +402,31 @@ def parse_memorialize_suggestions(
402
402
  return _extract_memorialize_suggestions(contract)
403
403
 
404
404
 
405
+ def parse_user_facing_summary(
406
+ agent_output: str,
407
+ parsed_contract: Optional[dict] = None,
408
+ ) -> Optional[str]:
409
+ """Parse the optional top-level ``user_facing_summary`` field (Option A).
410
+
411
+ This is the ONE human-audience field in the contract: a brief prose summary
412
+ the subagent writes once, intended for the user. The orchestrator relays it
413
+ near-verbatim on a single-agent COMPLETE (N=1) instead of re-synthesizing
414
+ ``key_outputs``; for N>1 it is ignored and synthesis proceeds.
415
+
416
+ Strictly additive and advisory: the field is never required and never
417
+ affects contract validity. Returns the trimmed string when present and
418
+ non-empty, otherwise None (absent, null, blank, or non-string).
419
+ """
420
+ contract = parsed_contract if parsed_contract is not None else parse_contract(agent_output)
421
+ if contract is None:
422
+ return None
423
+ raw = contract.get("user_facing_summary")
424
+ if not isinstance(raw, str):
425
+ return None
426
+ text = raw.strip()
427
+ return text or None
428
+
429
+
405
430
  def _is_resume_agent_id(value: str) -> bool:
406
431
  return bool(_AGENT_ID_PATTERN.match(value or ""))
407
432
 
@@ -659,6 +684,7 @@ __all__ = [
659
684
  "parse_evidence_report",
660
685
  "parse_consolidation_report",
661
686
  "parse_memorialize_suggestions",
687
+ "parse_user_facing_summary",
662
688
  "validate_response_contract",
663
689
  "save_validation_result",
664
690
  "load_last_validation",
@@ -139,10 +139,25 @@ def extract_injected_context_payload_from_transcript(
139
139
  """
140
140
  import os
141
141
 
142
+ # Empty/None path guard. Without it, Path("").stem == "" and the substring
143
+ # match below (``candidate.stem in "" or "" in candidate.stem``) is ALWAYS
144
+ # True because ``"" in any_string`` is True -- so an empty path would match
145
+ # (and return) the FIRST payload sitting in gaia-context-payloads/, making
146
+ # the result depend on whatever happens to be in that directory. Mirror the
147
+ # guard in read_first_user_content_from_transcript: no path, no match.
148
+ if not transcript_path:
149
+ return {}
150
+
142
151
  try:
143
152
  payload_dir = Path(os.environ.get("TMPDIR", "/tmp")) / "gaia-context-payloads"
144
153
  if payload_dir.exists():
145
154
  agent_file = Path(transcript_path).stem # e.g. "agent-ae190a4da68d626d4"
155
+ # A stem that came out empty (e.g. path was "/" or "."): nothing to
156
+ # match against, so the substring test would again degrade to the
157
+ # always-true ``"" in candidate.stem``. Bail rather than grab an
158
+ # arbitrary payload.
159
+ if not agent_file:
160
+ return {}
146
161
  # Match by agent ID substring
147
162
  for candidate in payload_dir.glob("*.json"):
148
163
  if candidate.stem in agent_file or agent_file in candidate.stem:
@@ -5,7 +5,6 @@ Provides:
5
5
  - tiers: SecurityTier enum and classification
6
6
  - blocked_commands: Permanently blocked pattern matching
7
7
  - mutative_verbs: Mutative verb detection (user approval workflow)
8
- - gitops_validator: kubectl/helm/flux validation
9
8
  - approval_constants: Approval token patterns (legacy APPROVE: and ElicitationResult)
10
9
  - approval_grants: Time-limited T3 command passthrough after user approval
11
10
  - shell_unwrapper: Detect and strip wrapper shells for inner command classification
@@ -21,7 +20,6 @@ from .blocked_commands import (
21
20
  get_blocked_patterns,
22
21
  BlockedCommandResult,
23
22
  )
24
- from .gitops_validator import validate_gitops_workflow, GitOpsValidationResult
25
23
  from .mutative_verbs import (
26
24
  CLI_FAMILY_LOOKUP,
27
25
  CATEGORY_MUTATIVE,
@@ -73,9 +71,6 @@ __all__ = [
73
71
  "is_blocked_command",
74
72
  "get_blocked_patterns",
75
73
  "BlockedCommandResult",
76
- # GitOps
77
- "validate_gitops_workflow",
78
- "GitOpsValidationResult",
79
74
  # Mutative verbs
80
75
  "CLI_FAMILY_LOOKUP",
81
76
  "CATEGORY_MUTATIVE",
@@ -16,10 +16,12 @@ Two-phase nonce-based approval flow:
16
16
  grant and allows it.
17
17
 
18
18
  Grants are:
19
- - Scoped to a session (CLAUDE_SESSION_ID)
20
- - Time-limited (default 10 minutes)
19
+ - Time-limited (default 10 minutes; DB grants use APPROVAL_GRANT_TTL_MINUTES)
21
20
  - Cleaned up after use or expiry
22
- - Stored in .claude/cache/approvals/
21
+ - Stored AUTHORITATIVELY in the DB (``approval_grants`` in gaia.db) since the
22
+ Brief 71 cutover. The filesystem plane (.claude/cache/approvals/) is the
23
+ DEPRECATED fallback retained only for grants minted before the cutover; new
24
+ grants are created and consumed through the DB plane (gaia.store.writer).
23
25
 
24
26
  Security properties:
25
27
  - Grants are created ONLY by the hook (not by agents)
@@ -28,8 +30,11 @@ Security properties:
28
30
  - The deny list (blocked_commands.py) is NEVER bypassed -- grants only
29
31
  override the dangerous verb detector
30
32
  - Nonces are 128-bit random hex (cannot be guessed)
31
- - Pending files are session-scoped (cannot be activated from another session)
32
- - A nonce can only be activated ONCE (pending file deleted on activation)
33
+ - A nonce can only be activated ONCE (DB row marked CONSUMED on activation;
34
+ legacy pending files are deleted on activation)
35
+ - DB grants are session-AGNOSTIC by design: the block-approve-retry flow
36
+ legitimately spans sessions, so replay protection comes from the CONSUMED
37
+ status + TTL, not from session scoping (see the DB-backed model note below)
33
38
 
34
39
  =============================================================================
35
40
  Grant lifetime (DB-backed model -- Brief 71 cutover)
@@ -71,6 +76,8 @@ fallback plane retained for grants created before the DB cutover. The active
71
76
  flow runs through the DB plane in gaia.store.writer.
72
77
  """
73
78
 
79
+ from __future__ import annotations
80
+
74
81
  import json
75
82
  import logging
76
83
  import os
@@ -1160,16 +1167,26 @@ def consume_grant(command: str, session_id: str = None) -> bool:
1160
1167
 
1161
1168
 
1162
1169
  def consume_session_grants(session_id: str = None) -> int:
1163
- """Consume all confirmed grants for a session.
1170
+ """Consume confirmed grants on the LEGACY FILESYSTEM plane for a session.
1171
+
1172
+ Called at SubagentStop. Scope is the deprecated FS plane ONLY: it sweeps
1173
+ ``grant-{session_id}-*.json`` files under the approvals cache dir and marks
1174
+ confirmed ones used (multi-use grants too, since the session is over).
1164
1175
 
1165
- Called at SubagentStop to clean up all grants that were used during the
1166
- subagent's lifetime. Multi-use grants are also consumed (session is over).
1176
+ This is a NO-OP for grants on the authoritative DB plane (post Brief 71):
1177
+ DB semantic grants are consumed on the MATCHING RETRY via
1178
+ ``consume_db_semantic_grant`` (see the module docstring, "DB-backed model"),
1179
+ NOT at SubagentStop. There is therefore no DB cleanup gap here -- DB replay
1180
+ protection is handled at consume-on-retry time, and this function
1181
+ intentionally does not (and must not) touch the DB plane. It remains live
1182
+ only to drain pre-cutover FS grants; new sessions that never write an FS
1183
+ grant simply get a return value of 0.
1167
1184
 
1168
1185
  Args:
1169
1186
  session_id: Session ID to scope consumption (defaults to env var).
1170
1187
 
1171
1188
  Returns:
1172
- Number of grants consumed.
1189
+ Number of legacy FS grants consumed (0 when no FS grants exist).
1173
1190
  """
1174
1191
  if not session_id:
1175
1192
  session_id = _get_session_id()
@@ -1789,7 +1806,31 @@ def activate_db_pending_by_prefix(
1789
1806
  reason="DB pending approval has invalid payload_json.",
1790
1807
  )
1791
1808
 
1809
+ # Multi-command (COMMAND_SET) detection. A payload carrying a
1810
+ # ``command_set`` list of more than one {command, rationale} item is a
1811
+ # batch the user approved under ONE consent. It must NOT be degraded to
1812
+ # a single command (the historic bug at this site) -- it activates into
1813
+ # a COMMAND_SET grant via the dedicated branch below. A set of length
1814
+ # <= 1 falls through to the singular SCOPE_SEMANTIC_SIGNATURE path so we
1815
+ # never mint a COMMAND_SET grant for one command.
1816
+ raw_command_set = payload.get("command_set")
1817
+ command_set_items: list = []
1818
+ if isinstance(raw_command_set, list):
1819
+ for _item in raw_command_set:
1820
+ if isinstance(_item, dict) and _item.get("command"):
1821
+ command_set_items.append(
1822
+ {
1823
+ "command": _item["command"],
1824
+ "rationale": _item.get("rationale", ""),
1825
+ }
1826
+ )
1827
+ is_command_set = len(command_set_items) > 1
1828
+
1792
1829
  command = payload.get("exact_content") or payload.get("commands", [None])[0] or ""
1830
+ if is_command_set and not command:
1831
+ # For a command_set the first item is a safe stand-in for the
1832
+ # singular display/signature path; the set itself is authoritative.
1833
+ command = command_set_items[0]["command"]
1793
1834
  if not command:
1794
1835
  logger.warning(
1795
1836
  "activate_db_pending_by_prefix: no command found in payload for %s",
@@ -1836,6 +1877,57 @@ def activate_db_pending_by_prefix(
1836
1877
  reason=f"DB transition failed: {ve}",
1837
1878
  )
1838
1879
 
1880
+ # Step 3b: COMMAND_SET branch. When the approved payload carries a set
1881
+ # of more than one command, create ONE COMMAND_SET grant covering the
1882
+ # whole batch instead of a singular SCOPE_SEMANTIC_SIGNATURE grant. The
1883
+ # set is consumed item-by-item (byte-for-byte) by bash_validator's
1884
+ # match_command_set_grant / mark_command_set_item_consumed path -- the
1885
+ # consume side is unchanged; this is the create side that was orphaned.
1886
+ #
1887
+ # Precondition: ``command_set`` in the payload is already pre-filtered to
1888
+ # mutative commands by ``_intake_command_set_pending`` (handoff_persister,
1889
+ # the only producer of these pending records in production). Activation
1890
+ # therefore assumes every item is consumable and does NOT re-filter here;
1891
+ # do not add a filtering step at this site -- it would silently drop items
1892
+ # the user already consented to under one grant.
1893
+ if is_command_set:
1894
+ created = create_command_set_grant(
1895
+ command_set_items,
1896
+ approval_id,
1897
+ session_id=current_session_id,
1898
+ agent_id=agent_id,
1899
+ ttl_minutes=DEFAULT_COMMAND_SET_TTL_MINUTES,
1900
+ )
1901
+ if not created:
1902
+ logger.error(
1903
+ "activate_db_pending_by_prefix: COMMAND_SET grant creation "
1904
+ "failed for approval_id=%s (items=%d)",
1905
+ approval_id[:16], len(command_set_items),
1906
+ )
1907
+ return ApprovalActivationResult(
1908
+ success=False,
1909
+ status=ACTIVATION_ERROR,
1910
+ reason="Failed to create COMMAND_SET grant from approved payload.",
1911
+ )
1912
+ logger.info(
1913
+ "activate_db_pending_by_prefix: COMMAND_SET grant created: "
1914
+ "approval_id=%s, items=%d, ttl=%d min, originating_session=%s, "
1915
+ "current_session=%s",
1916
+ approval_id[:16], len(command_set_items),
1917
+ DEFAULT_COMMAND_SET_TTL_MINUTES,
1918
+ (originating_session or "")[:12],
1919
+ current_session_id[:12],
1920
+ )
1921
+ return ApprovalActivationResult(
1922
+ success=True,
1923
+ status=ACTIVATION_ACTIVATED,
1924
+ reason=(
1925
+ "DB pending approval activated as a COMMAND_SET grant "
1926
+ f"({len(command_set_items)} commands under one consent)."
1927
+ ),
1928
+ grant_path=None,
1929
+ )
1930
+
1839
1931
  # Step 4: Rebuild approval signature from the command so the
1840
1932
  # filesystem grant has a valid scope_signature for check_approval_grant().
1841
1933
  from .approval_scopes import build_approval_signature, SCOPE_SEMANTIC_SIGNATURE
@@ -2026,7 +2118,13 @@ def activate_grants_for_session(
2026
2118
  # approved command (adding cd, redirect, pipe, flag) produces a different
2027
2119
  # string and requires fresh approval. Each item in the set is single-use.
2028
2120
 
2029
- DEFAULT_COMMAND_SET_TTL_MINUTES = 10
2121
+ # COMMAND_SET grant TTL in minutes. Aligned to the singular active-grant TTL
2122
+ # (DEFAULT_GRANT_TTL_MINUTES / APPROVAL_GRANT_TTL_MINUTES = 60) so a batch of
2123
+ # commands approved under one consent gets the same cross-session retry window
2124
+ # as a single approved command -- the block-approve-retry flow legitimately
2125
+ # spans sessions, and a shorter window would expire the batch before the
2126
+ # subagent could consume every item.
2127
+ DEFAULT_COMMAND_SET_TTL_MINUTES = 60
2030
2128
 
2031
2129
 
2032
2130
  def create_command_set_grant(
@@ -2107,7 +2205,6 @@ def create_command_set_grant(
2107
2205
  def match_command_set_grant(
2108
2206
  retried_command: str,
2109
2207
  *,
2110
- session_id: str | None = None,
2111
2208
  db_path=None,
2112
2209
  ) -> tuple | None:
2113
2210
  """Find an active COMMAND_SET grant containing ``retried_command``.
@@ -2117,14 +2214,26 @@ def match_command_set_grant(
2117
2214
  ``retried_command``. No normalization of any kind is applied.
2118
2215
 
2119
2216
  The grant must:
2217
+ - Have scope COMMAND_SET
2120
2218
  - Have status PENDING (not CONSUMED, REVOKED, or EXPIRED)
2121
2219
  - Not be past its expires_at timestamp
2122
2220
  - Contain ``retried_command`` at an index that has NOT been consumed
2123
- - Belong to the current session_id
2221
+
2222
+ The lookup is SESSION-AGNOSTIC (Brief 71), exactly like the singular path
2223
+ (``check_db_semantic_grant``). The block-approve-retry flow legitimately
2224
+ spans sessions, and CLAUDE_SESSION_ID is not guaranteed to be exported into
2225
+ the bash subprocess -- where ``get_session_id()`` falls back to the literal
2226
+ ``"default"``. A session_id filter therefore silently dropped every grant
2227
+ created under the real session, letting approved COMMAND_SET commands run
2228
+ WITHOUT being consumed (the consumption-bypass bug). Replay protection is
2229
+ preserved by the conjunction of the byte-for-byte match, status='PENDING'
2230
+ plus per-index ``consumed_indexes_json``, and the expires_at TTL -- none of
2231
+ which depend on which session is asking. See
2232
+ ``gaia.store.writer.list_command_set_grants_agnostic`` for the full
2233
+ security-boundary rationale.
2124
2234
 
2125
2235
  Args:
2126
2236
  retried_command: The exact command string the agent wants to run.
2127
- session_id: CLAUDE_SESSION_ID (defaults to current session).
2128
2237
  db_path: Optional explicit DB path override (used by tests).
2129
2238
 
2130
2239
  Returns:
@@ -2132,15 +2241,11 @@ def match_command_set_grant(
2132
2241
  The caller should call mark_command_set_item_consumed(approval_id, index)
2133
2242
  after successful execution.
2134
2243
  """
2135
- if session_id is None:
2136
- session_id = _get_session_id()
2137
-
2138
2244
  try:
2139
- from gaia.store.writer import list_approval_grants
2245
+ from gaia.store.writer import list_command_set_grants_agnostic
2140
2246
  from datetime import datetime, timezone
2141
2247
 
2142
- grants = list_approval_grants(
2143
- session_id=session_id,
2248
+ grants = list_command_set_grants_agnostic(
2144
2249
  status="PENDING",
2145
2250
  db_path=db_path,
2146
2251
  )