@jaguilar87/gaia 5.0.7 → 5.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +13 -0
  4. package/bin/README.md +6 -1
  5. package/bin/cli/approvals.py +486 -474
  6. package/bin/cli/brief.py +13 -0
  7. package/bin/cli/doctor.py +1 -1
  8. package/dist/gaia-ops/.claude-plugin/plugin.json +1 -1
  9. package/dist/gaia-ops/hooks/adapters/claude_code.py +92 -86
  10. package/dist/gaia-ops/hooks/modules/agents/handoff_persister.py +13 -2
  11. package/dist/gaia-ops/hooks/modules/context/context_injector.py +23 -7
  12. package/dist/gaia-ops/hooks/modules/events/event_writer.py +63 -96
  13. package/dist/gaia-ops/hooks/modules/security/__init__.py +0 -2
  14. package/dist/gaia-ops/hooks/modules/security/approval_cleanup.py +238 -69
  15. package/dist/gaia-ops/hooks/modules/security/approval_grants.py +506 -1103
  16. package/dist/gaia-ops/hooks/modules/security/mutative_verbs.py +24 -1
  17. package/dist/gaia-ops/hooks/modules/session/pending_scanner.py +150 -90
  18. package/dist/gaia-ops/hooks/modules/session/session_manifest.py +257 -28
  19. package/dist/gaia-ops/hooks/modules/tools/bash_validator.py +19 -0
  20. package/dist/gaia-ops/hooks/post_compact.py +1 -0
  21. package/dist/gaia-ops/hooks/pre_compact.py +1 -0
  22. package/dist/gaia-ops/hooks/user_prompt_submit.py +20 -0
  23. package/dist/gaia-ops/skills/agent-approval-protocol/SKILL.md +50 -14
  24. package/dist/gaia-ops/skills/agent-approval-protocol/reference.md +16 -9
  25. package/dist/gaia-ops/skills/agent-protocol/examples.md +12 -1
  26. package/dist/gaia-ops/skills/gaia-patterns/reference.md +2 -2
  27. package/dist/gaia-ops/skills/orchestrator-present-approval/SKILL.md +69 -22
  28. package/dist/gaia-ops/skills/orchestrator-present-approval/reference.md +16 -3
  29. package/dist/gaia-ops/skills/orchestrator-present-approval/template.md +20 -14
  30. package/dist/gaia-ops/skills/pending-approvals/SKILL.md +16 -11
  31. package/dist/gaia-ops/skills/subagent-request-approval/SKILL.md +28 -3
  32. package/dist/gaia-ops/skills/subagent-request-approval/reference.md +34 -8
  33. package/dist/gaia-ops/tools/migration/README.md +10 -12
  34. package/dist/gaia-ops/tools/scan/orchestrator.py +194 -10
  35. package/dist/gaia-ops/tools/scan/tests/test_integration.py +1 -2
  36. package/dist/gaia-security/.claude-plugin/plugin.json +1 -1
  37. package/dist/gaia-security/hooks/adapters/claude_code.py +92 -86
  38. package/dist/gaia-security/hooks/modules/agents/handoff_persister.py +13 -2
  39. package/dist/gaia-security/hooks/modules/context/context_injector.py +23 -7
  40. package/dist/gaia-security/hooks/modules/events/event_writer.py +63 -96
  41. package/dist/gaia-security/hooks/modules/security/__init__.py +0 -2
  42. package/dist/gaia-security/hooks/modules/security/approval_cleanup.py +238 -69
  43. package/dist/gaia-security/hooks/modules/security/approval_grants.py +506 -1103
  44. package/dist/gaia-security/hooks/modules/security/mutative_verbs.py +24 -1
  45. package/dist/gaia-security/hooks/modules/session/pending_scanner.py +150 -90
  46. package/dist/gaia-security/hooks/modules/session/session_manifest.py +257 -28
  47. package/dist/gaia-security/hooks/modules/tools/bash_validator.py +19 -0
  48. package/dist/gaia-security/hooks/user_prompt_submit.py +20 -0
  49. package/gaia/approvals/__init__.py +2 -1
  50. package/gaia/approvals/store.py +165 -15
  51. package/gaia/store/schema.sql +38 -1
  52. package/gaia/store/writer.py +400 -0
  53. package/hooks/adapters/claude_code.py +92 -86
  54. package/hooks/elicitation_result.py +20 -75
  55. package/hooks/modules/agents/handoff_persister.py +13 -2
  56. package/hooks/modules/context/context_injector.py +23 -7
  57. package/hooks/modules/events/event_writer.py +63 -96
  58. package/hooks/modules/security/__init__.py +0 -2
  59. package/hooks/modules/security/approval_cleanup.py +238 -69
  60. package/hooks/modules/security/approval_grants.py +506 -1103
  61. package/hooks/modules/security/mutative_verbs.py +24 -1
  62. package/hooks/modules/session/pending_scanner.py +150 -90
  63. package/hooks/modules/session/session_manifest.py +257 -28
  64. package/hooks/modules/tools/bash_validator.py +19 -0
  65. package/hooks/post_compact.py +1 -0
  66. package/hooks/pre_compact.py +1 -0
  67. package/hooks/user_prompt_submit.py +20 -0
  68. package/package.json +1 -1
  69. package/pyproject.toml +1 -1
  70. package/scripts/bootstrap_database.sh +66 -17
  71. package/scripts/migrations/README.md +26 -14
  72. package/scripts/migrations/schema.checksum +2 -2
  73. package/scripts/migrations/v18_to_v19.sql +36 -0
  74. package/scripts/migrations/v19_to_v20.sql +20 -0
  75. package/skills/agent-approval-protocol/SKILL.md +50 -14
  76. package/skills/agent-approval-protocol/reference.md +16 -9
  77. package/skills/agent-protocol/examples.md +12 -1
  78. package/skills/gaia-patterns/reference.md +2 -2
  79. package/skills/orchestrator-present-approval/SKILL.md +69 -22
  80. package/skills/orchestrator-present-approval/reference.md +16 -3
  81. package/skills/orchestrator-present-approval/template.md +20 -14
  82. package/skills/pending-approvals/SKILL.md +16 -11
  83. package/skills/subagent-request-approval/SKILL.md +28 -3
  84. package/skills/subagent-request-approval/reference.md +34 -8
  85. package/tools/migration/README.md +10 -12
  86. package/tools/scan/orchestrator.py +194 -10
  87. package/tools/scan/tests/test_integration.py +1 -2
  88. package/bin/cli/plans.py +0 -517
  89. package/dist/gaia-ops/tools/context/deep_merge.py +0 -159
  90. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.py +0 -132
  91. package/dist/gaia-ops/tools/migration/migrate_04_harness_events.sh +0 -23
  92. package/dist/gaia-ops/tools/scan/merge.py +0 -213
  93. package/dist/gaia-ops/tools/scan/tests/test_merge.py +0 -269
  94. package/gaia/approvals/revert.py +0 -282
  95. package/tools/context/deep_merge.py +0 -159
  96. package/tools/migration/migrate_04_harness_events.py +0 -132
  97. package/tools/migration/migrate_04_harness_events.sh +0 -23
  98. package/tools/scan/merge.py +0 -213
  99. package/tools/scan/tests/test_merge.py +0 -269
@@ -170,19 +170,30 @@ def _intake_command_set_pending(
170
170
  }
171
171
 
172
172
  try:
173
- from gaia.approvals.store import insert_requested
173
+ from gaia.approvals.store import derive_command_set_id, insert_requested
174
174
  except ImportError:
175
175
  import pathlib as _pl
176
176
  import sys as _sys
177
177
 
178
178
  _repo_root = _pl.Path(__file__).resolve().parent.parent.parent.parent
179
179
  _sys.path.insert(0, str(_repo_root))
180
- from gaia.approvals.store import insert_requested
180
+ from gaia.approvals.store import derive_command_set_id, insert_requested
181
+
182
+ # Derive the PUBLIC approval_id deterministically from the post-filter
183
+ # mutative command strings. Because the id is content-derived (not uuid4),
184
+ # the orchestrator reproduces the SAME id from the command_set it reads in
185
+ # the contract via `gaia approvals derive-id` -- no DB search, no
186
+ # cross-session miss. The list passed here is the SAME list the CLI helper
187
+ # derives over (post-mutative-filter), so both sides agree.
188
+ derived_id = derive_command_set_id(
189
+ [it["command"] for it in command_set_items]
190
+ )
181
191
 
182
192
  approval_id = insert_requested(
183
193
  sealed_payload,
184
194
  agent_id=agent_id,
185
195
  session_id=session_id or None,
196
+ approval_id=derived_id,
186
197
  )
187
198
  logger.info(
188
199
  "INTAKE: plan-first COMMAND_SET pending created approval_id=%s items=%d",
@@ -450,17 +450,33 @@ def build_project_context(
450
450
  if critical_summary:
451
451
  context_string += critical_summary
452
452
 
453
- # Inject recent operational events (non-blocking)
453
+ # Inject recent operational events (non-blocking).
454
+ # Brief 54 / Task 2.2: read from the harness_events DB table via
455
+ # gaia.store.reader.cross_surface_query instead of the legacy
456
+ # events.jsonl reader. The reader returns rows shaped as
457
+ # {surface, timestamp, type, agent, summary, raw} -- NOT the old
458
+ # {ts, type, agent, result} JSONL shape -- so the formatting loop
459
+ # below is remapped to those keys (audit Risk 4: without the remap
460
+ # the "Recent Events" block silently goes blank).
454
461
  try:
455
- from ..events.event_writer import read_events
456
- recent = read_events(hours=24, limit=20)
462
+ import sys as _sys
463
+ from pathlib import Path as _Path
464
+ try:
465
+ from gaia.store import reader as _reader
466
+ except ImportError:
467
+ _repo_root = _Path(__file__).resolve().parents[3]
468
+ _sys.path.insert(0, str(_repo_root))
469
+ from gaia.store import reader as _reader
470
+ recent = _reader.cross_surface_query(
471
+ surface="harness_events", since="24h", last=20,
472
+ )
457
473
  if recent:
458
474
  lines = ["\n# Recent Events (last 24h)"]
459
475
  for evt in recent:
460
- ts_short = evt.get("ts", "")[:16]
461
- etype = evt.get("type", "")
462
- agent_name = evt.get("agent", "")
463
- result_str = evt.get("result", "")
476
+ ts_short = (evt.get("timestamp") or "")[:16]
477
+ etype = evt.get("type") or ""
478
+ agent_name = evt.get("agent") or ""
479
+ result_str = evt.get("summary") or ""
464
480
  label = f"{agent_name}: " if agent_name else ""
465
481
  lines.append(f"- [{ts_short}] {etype}: {label}{result_str}")
466
482
  context_string += "\n".join(lines) + "\n"
@@ -1,16 +1,27 @@
1
- """Event writer and reader for the GAIA Event Context system.
1
+ """Event writer for the GAIA Event Context system.
2
+
3
+ As of Brief 54 / Task 2.2 the event pipeline writes to the ``harness_events``
4
+ table in the Gaia SQLite substrate (``~/.gaia/gaia.db``) instead of the legacy
5
+ ``events.jsonl`` file. This is an ATOMIC cutover: ``write_event`` no longer
6
+ touches ``events.jsonl`` in any code path -- there is NO dual-write.
2
7
 
3
8
  Provides:
4
- - EventWriter: append-only JSONL writer with file locking
5
- - read_events(): read events from last N hours with optional filtering
6
- - cleanup_old_events(): remove events older than N days
9
+ - EventWriter: non-blocking, silent-on-failure DB event writer
10
+ - read_events(): legacy JSONL reader (read-only; retained until Task 2.3
11
+ removes events.jsonl entirely -- no longer the canonical read path)
7
12
  - Event type constants
13
+
14
+ The DB write delegates to ``gaia.store.writer.write_harness_event``, which
15
+ resolves the DB path the same way every other gaia DB writer does (via
16
+ ``gaia.paths.db_path()`` -> ``GAIA_DATA_DIR`` / ``gaia.db``, falling back to
17
+ ``~/.gaia/gaia.db``). The hook subprocess imports the ``gaia`` package via the
18
+ repo-root fallback already established by handoff_persister.
8
19
  """
9
20
 
10
- import fcntl
11
21
  import json
12
22
  import logging
13
23
  import os
24
+ import sys
14
25
  from datetime import datetime, timedelta, timezone
15
26
  from pathlib import Path
16
27
  from typing import Any, Dict, List, Optional
@@ -32,17 +43,36 @@ HEARTBEAT = "heartbeat"
32
43
  USER_NOTE = "user.note"
33
44
 
34
45
 
46
+ def _import_store_writer():
47
+ """Import gaia.store.writer, falling back to the repo layout.
48
+
49
+ Mirrors the import contract used by
50
+ hooks/modules/agents/handoff_persister.py: prefer a sibling ``gaia``
51
+ package if installed; otherwise add the repo root (two levels above
52
+ ``hooks/``) to ``sys.path`` and import from there.
53
+ """
54
+ try:
55
+ from gaia.store import writer as _writer
56
+ except ImportError:
57
+ _repo_root = Path(__file__).resolve().parents[3]
58
+ sys.path.insert(0, str(_repo_root))
59
+ from gaia.store import writer as _writer
60
+ return _writer
61
+
62
+
35
63
  class EventWriter:
36
- """Append-only JSONL event writer with file locking.
64
+ """Non-blocking DB event writer.
37
65
 
38
- All writes are wrapped in try/except -- events are non-critical and
39
- must never block the hook pipeline.
66
+ All writes are wrapped in try/except -- events are non-critical and must
67
+ never block the hook pipeline. The ``events_dir`` argument is retained for
68
+ backward compatibility (legacy JSONL reads still resolve it) but is no
69
+ longer used for writes, which target the ``harness_events`` DB table.
40
70
  """
41
71
 
42
72
  def __init__(self, events_dir: Optional[Path] = None):
73
+ # Retained for compatibility with the legacy reader; not used for
74
+ # writes. Resolved lazily-safe (never raises here).
43
75
  self.events_dir = events_dir or get_events_dir()
44
- self.events_file = self.events_dir / "events.jsonl"
45
- self.lock_file = self.events_dir / "events.jsonl.lock"
46
76
 
47
77
  def write_event(
48
78
  self,
@@ -53,10 +83,10 @@ class EventWriter:
53
83
  severity: str = "info",
54
84
  meta: Optional[Dict[str, Any]] = None,
55
85
  ) -> None:
56
- """Append a single event to the JSONL log.
86
+ """Append a single event to the ``harness_events`` DB table.
57
87
 
58
- Thread-safe via exclusive file lock. Fails silently on any error
59
- to avoid disrupting the hook pipeline.
88
+ Fails silently on any error to avoid disrupting the hook pipeline --
89
+ same contract as the historical file writer.
60
90
 
61
91
  Args:
62
92
  event_type: Dotted event category (e.g. "agent.dispatch").
@@ -64,30 +94,21 @@ class EventWriter:
64
94
  agent: Agent involved, or empty string for non-agent events.
65
95
  result: Outcome summary string.
66
96
  severity: info | warning | error.
67
- meta: Optional type-specific structured data.
97
+ meta: Optional type-specific structured data (stored as JSON in
98
+ the ``payload`` column).
68
99
  """
69
100
  try:
70
- self.events_dir.mkdir(parents=True, exist_ok=True)
71
-
72
- record: Dict[str, Any] = {
73
- "ts": datetime.now(timezone.utc).isoformat(),
74
- "type": event_type,
75
- "source": source,
76
- "agent": agent,
77
- "result": result,
78
- "severity": severity,
79
- }
80
- if meta:
81
- record["meta"] = meta
82
-
83
- with open(self.lock_file, "w") as lf:
84
- fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
85
- try:
86
- with open(self.events_file, "a") as f:
87
- f.write(json.dumps(record, separators=(",", ":")) + "\n")
88
- finally:
89
- fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
90
-
101
+ writer = _import_store_writer()
102
+ workspace = os.environ.get("GAIA_WORKSPACE") or None
103
+ writer.write_harness_event(
104
+ event_type=event_type,
105
+ source=source,
106
+ agent=agent,
107
+ result=result,
108
+ severity=severity,
109
+ meta=meta,
110
+ workspace=workspace,
111
+ )
91
112
  except Exception as exc:
92
113
  logger.debug("Event write failed (non-fatal): %s", exc)
93
114
 
@@ -98,7 +119,13 @@ def read_events(
98
119
  limit: int = 50,
99
120
  events_dir: Optional[Path] = None,
100
121
  ) -> List[Dict[str, Any]]:
101
- """Read recent events from the JSONL log.
122
+ """Read recent events from the legacy JSONL log.
123
+
124
+ NOTE: As of Task 2.2 this is no longer the canonical read path -- new
125
+ events are written to the ``harness_events`` DB table. This reader is
126
+ retained read-only until Task 2.3 removes ``events.jsonl`` entirely, so
127
+ historical pre-cutover events remain consultable. New callers should use
128
+ ``gaia.store.reader.cross_surface_query(surface="harness_events")``.
102
129
 
103
130
  Args:
104
131
  hours: How far back to look (default 24h).
@@ -148,63 +175,3 @@ def read_events(
148
175
  except Exception as exc:
149
176
  logger.debug("Event read failed (non-fatal): %s", exc)
150
177
  return []
151
-
152
-
153
- def cleanup_old_events(
154
- days: int = 7,
155
- events_dir: Optional[Path] = None,
156
- ) -> int:
157
- """Remove events older than *days* from the JSONL log.
158
-
159
- Uses file locking to avoid conflicts with concurrent writers.
160
- Retains lines that cannot be parsed (conservative).
161
-
162
- Args:
163
- days: Retention window in days (default 7).
164
- events_dir: Override events directory (for testing).
165
-
166
- Returns:
167
- Number of events removed.
168
- """
169
- try:
170
- edir = events_dir or get_events_dir()
171
- events_file = edir / "events.jsonl"
172
- lock_file = edir / "events.jsonl.lock"
173
-
174
- if not events_file.exists():
175
- return 0
176
-
177
- retention_days = int(os.environ.get("GAIA_EVENT_RETENTION_DAYS", str(days)))
178
- cutoff = datetime.now(timezone.utc) - timedelta(days=retention_days)
179
- kept: List[str] = []
180
- removed = 0
181
-
182
- with open(lock_file, "w") as lf:
183
- fcntl.flock(lf.fileno(), fcntl.LOCK_EX)
184
- try:
185
- with open(events_file, "r") as f:
186
- for line in f:
187
- line = line.strip()
188
- if not line:
189
- continue
190
- try:
191
- evt = json.loads(line)
192
- ts = datetime.fromisoformat(evt["ts"])
193
- if ts < cutoff:
194
- removed += 1
195
- continue
196
- except (json.JSONDecodeError, KeyError, ValueError):
197
- pass # Keep unparseable lines
198
- kept.append(line)
199
-
200
- with open(events_file, "w") as f:
201
- for line in kept:
202
- f.write(line + "\n")
203
- finally:
204
- fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
205
-
206
- return removed
207
-
208
- except Exception as exc:
209
- logger.debug("Event cleanup failed (non-fatal): %s", exc)
210
- return 0
@@ -45,7 +45,6 @@ from .approval_scopes import (
45
45
  from .approval_grants import (
46
46
  check_approval_grant,
47
47
  cleanup_expired_grants,
48
- get_latest_pending_approval,
49
48
  last_check_found_expired,
50
49
  ApprovalGrant,
51
50
  )
@@ -93,7 +92,6 @@ __all__ = [
93
92
  # Approval Grants
94
93
  "check_approval_grant",
95
94
  "cleanup_expired_grants",
96
- "get_latest_pending_approval",
97
95
  "last_check_found_expired",
98
96
  "ApprovalGrant",
99
97
  # Shell unwrapper
@@ -1,14 +1,33 @@
1
1
  """
2
- Approval file cleanup for the subagent stop hook.
3
-
4
- Cleans up pending approval files after an agent completes, using the current
5
- per-nonce file layout under .claude/cache/approvals/pending-{nonce}.json.
2
+ Approval cleanup for the subagent stop hook.
3
+
4
+ DB-only since Task E FS retirement:
5
+ All pending approvals are stored exclusively in gaia.db (approvals table).
6
+
7
+ P-3d23 invariant (Fix A): a pending younger than its TTL MUST survive ANY
8
+ subagent's SubagentStop, regardless of that subagent's final plan_status.
9
+ SubagentStop is the normal lifecycle of the documented block -> approve ->
10
+ retry flow, and because subagents share the main session_id, revoking pendings
11
+ by session-membership at SubagentStop wiped out every other outstanding pending
12
+ in the session whenever any subagent finished as COMPLETE. cleanup() therefore
13
+ no longer revokes fresh pendings by session membership; it only EXPIRES pendings
14
+ that have genuinely aged past DEFAULT_PENDING_TTL_MINUTES (the 24h user-wait
15
+ window). Expiry transitions the row to the schema 'expired' terminal status,
16
+ distinct from a user/admin 'revoked'.
17
+
18
+ The EXPIRE sweep is GLOBAL across sessions (list_pending(all_sessions=True)):
19
+ SubagentStop is the only periodic sweep trigger, so a session-scoped sweep
20
+ would never reap past-TTL pendings orphaned by dead/other sessions and they
21
+ accumulate forever. The age gate (age_seconds >= TTL) is the sole guard and is
22
+ session-independent, so a FRESH pending in ANY session always survives -- the
23
+ widening to all sessions cannot regress the P-3d23 invariant.
6
24
 
7
25
  Also performs DB-backed soft-expire of PENDING approval_grants rows whose
8
26
  expires_at timestamp has passed (M3 addition).
9
27
 
10
28
  Provides:
11
- - cleanup(): Delete pending approval files that match agent session
29
+ - cleanup(): Expire genuinely-aged pending DB approvals for the session
30
+ - expire_db_pendings(): TTL-sweep PENDING approvals past their pending TTL
12
31
  - expire_db_grants(): Soft-expire PENDING DB grants past their expires_at
13
32
  - consume_approval_file(): Backward-compatible alias for cleanup()
14
33
  """
@@ -16,20 +35,13 @@ Provides:
16
35
  import json
17
36
  import logging
18
37
  from datetime import datetime, timezone
19
- from pathlib import Path
20
38
  from typing import Optional, Set
21
39
 
22
- from ..core.paths import find_claude_dir
23
40
  from ..core.state import get_session_id
24
41
 
25
42
  logger = logging.getLogger(__name__)
26
43
 
27
44
 
28
- def _get_approvals_dir() -> Path:
29
- """Return the approvals cache directory."""
30
- return find_claude_dir() / "cache" / "approvals"
31
-
32
-
33
45
  def expire_db_grants(session_id: Optional[str] = None) -> int:
34
46
  """Soft-expire PENDING approval_grants rows whose expires_at has passed.
35
47
 
@@ -76,82 +88,239 @@ def expire_db_grants(session_id: Optional[str] = None) -> int:
76
88
  return 0
77
89
 
78
90
 
91
+ def expire_db_pendings(
92
+ agent_type: str,
93
+ session_id: Optional[str] = None,
94
+ ) -> int:
95
+ """TTL-sweep: expire PENDING approvals aged past DEFAULT_PENDING_TTL_MINUTES.
96
+
97
+ Mirrors expire_db_grants() but for the pending plane. A pending row is
98
+ eligible for expiry only when its age (list_pending enriches each row with
99
+ age_seconds) is >= the 24h pending window. Fresh pendings are left
100
+ untouched -- this is the P-3d23 invariant: a pending within its TTL survives
101
+ any SubagentStop.
102
+
103
+ Each expiry transitions the row to the schema 'expired' terminal status via
104
+ store.expire(), carrying provenance: agent_id = the agent that triggered the
105
+ sweep and metadata reason="expired_ttl" so the auto-transition event is never
106
+ null-provenance.
107
+
108
+ Args:
109
+ agent_type: The agent whose SubagentStop drove the sweep (provenance +
110
+ logging).
111
+ session_id: Recorded as the expirer_session provenance on each expiry
112
+ event (the session whose SubagentStop drove the sweep). The sweep
113
+ itself is GLOBAL (all_sessions=True): TTL is the only gate, so
114
+ past-TTL pendings from any session -- including dead/other sessions
115
+ -- are reaped, while fresh pendings in any session survive.
116
+
117
+ Returns:
118
+ Number of pendings transitioned to 'expired'.
119
+ """
120
+ if session_id is None:
121
+ session_id = get_session_id()
122
+
123
+ try:
124
+ from gaia.approvals.store import list_pending, expire
125
+ from modules.security.approval_grants import DEFAULT_PENDING_TTL_MINUTES
126
+ except ImportError:
127
+ import pathlib as _pl
128
+ import sys as _sys
129
+ _repo = _pl.Path(__file__).resolve().parent.parent.parent.parent.parent
130
+ _sys.path.insert(0, str(_repo))
131
+ try:
132
+ from gaia.approvals.store import list_pending, expire
133
+ from modules.security.approval_grants import DEFAULT_PENDING_TTL_MINUTES
134
+ except ImportError as exc:
135
+ logger.debug(
136
+ "expire_db_pendings: dependencies unavailable (non-fatal): %s", exc
137
+ )
138
+ return 0
139
+
140
+ try:
141
+ # all_sessions=True: the TTL sweep is global. SubagentStop is the only
142
+ # periodic trigger we have, so it must expire EVERY past-TTL pending --
143
+ # including stale pendings from dead or other sessions, which would
144
+ # otherwise accumulate forever (no session-scoped Stop ever fires for
145
+ # them). The age gate below is what protects fresh pendings; widening
146
+ # the scope to all sessions does not touch any pending under its TTL.
147
+ pending_rows = list_pending(session_id=session_id, all_sessions=True)
148
+ except Exception as exc:
149
+ logger.debug("expire_db_pendings: list_pending failed (non-fatal): %s", exc)
150
+ return 0
151
+
152
+ ttl_seconds = DEFAULT_PENDING_TTL_MINUTES * 60
153
+ metadata = json.dumps(
154
+ {"reason": "expired_ttl", "source": "approval_cleanup.cleanup"}
155
+ )
156
+
157
+ expired = 0
158
+ for row in pending_rows:
159
+ approval_id = row.get("id", "")
160
+ if not approval_id:
161
+ continue
162
+
163
+ age_seconds = row.get("age_seconds", 0.0) or 0.0
164
+ if age_seconds < ttl_seconds:
165
+ # Fresh pending -- MUST survive (P-3d23 invariant).
166
+ continue
167
+
168
+ try:
169
+ expire(
170
+ approval_id,
171
+ expirer_session=session_id,
172
+ agent_id=agent_type,
173
+ metadata_json=metadata,
174
+ )
175
+ logger.info(
176
+ "Expired pending DB approval past TTL for agent '%s' "
177
+ "(approval_id: %s, age=%.0fs >= %ds)",
178
+ agent_type,
179
+ approval_id[:20],
180
+ age_seconds,
181
+ ttl_seconds,
182
+ )
183
+ expired += 1
184
+ except ValueError as exc:
185
+ # Already transitioned (race or double-call) -- not an error.
186
+ logger.debug(
187
+ "expire_db_pendings: expire skipped for approval_id=%s "
188
+ "(non-fatal): %s",
189
+ approval_id[:20], exc,
190
+ )
191
+ except Exception as exc:
192
+ logger.debug(
193
+ "expire_db_pendings: expire error for approval_id=%s "
194
+ "(non-fatal): %s",
195
+ approval_id[:20], exc,
196
+ )
197
+
198
+ return expired
199
+
200
+
79
201
  def cleanup(
80
202
  agent_type: str,
81
203
  session_id: Optional[str] = None,
82
204
  preserve_nonces: Optional[Set[str]] = None,
83
205
  ) -> bool:
84
- """
85
- Delete pending-{nonce}.json files for the current session after agent completion.
206
+ """Expire genuinely-aged pending DB approvals at subagent stop.
207
+
208
+ P-3d23 invariant (Fix A): cleanup() no longer revokes fresh pendings by
209
+ session-membership. SubagentStop is the normal lifecycle of the documented
210
+ block -> approve -> retry flow, and subagents share the main session_id, so
211
+ revoking every session pending at Stop wiped out outstanding approvals the
212
+ user still needed to act on. cleanup() now only EXPIRES pendings that have
213
+ aged past DEFAULT_PENDING_TTL_MINUTES (the 24h user-wait window); a pending
214
+ within its TTL ALWAYS survives, regardless of the stopping subagent's
215
+ plan_status.
86
216
 
87
- Scans .claude/cache/approvals/ for pending files scoped to the current
88
- session and removes them, preventing stale pending approvals from
89
- accumulating after the agent run finishes.
217
+ DB-only since Task E FS retirement. No filesystem files are scanned or
218
+ deleted.
90
219
 
91
220
  Args:
92
- agent_type: The agent type that just completed (for logging).
93
- session_id: Session ID to scope cleanup (defaults to CLAUDE_SESSION_ID).
94
- preserve_nonces: Optional set of nonce strings to skip during cleanup.
95
- Used when an agent's final agent_contract_handoff still carries an
96
- APPROVAL_REQUEST so that the pending file remains available for
97
- the user to approve or reject. When None or empty, all session
98
- pendings are eligible for deletion (legacy behaviour).
221
+ agent_type: The agent type that just completed (provenance + logging).
222
+ session_id: Session ID to scope the TTL sweep (defaults to
223
+ CLAUDE_SESSION_ID).
224
+ preserve_nonces: Optional set of approval_id strings the agent's final
225
+ agent_contract_handoff still references via APPROVAL_REQUEST. With
226
+ Fix A these are protected by their TTL already (they are fresh by
227
+ construction), so this set is now belt-and-suspenders: it guarantees
228
+ an explicitly-referenced pending is never expired even at a TTL edge.
229
+ It is no longer the only thing protecting a fresh pending.
99
230
 
100
231
  Returns:
101
- True if any pending approval files were consumed, False otherwise.
232
+ True if any pending DB approvals were expired, False otherwise.
102
233
  """
103
234
  if session_id is None:
104
235
  session_id = get_session_id()
105
236
 
106
237
  preserve_nonces = preserve_nonces or set()
107
238
 
108
- approvals_dir = _get_approvals_dir()
109
- if not approvals_dir.exists():
110
- return False
239
+ try:
240
+ from gaia.approvals.store import list_pending, expire
241
+ from modules.security.approval_grants import DEFAULT_PENDING_TTL_MINUTES
242
+ except ImportError:
243
+ import pathlib as _pl
244
+ import sys as _sys
245
+ _repo = _pl.Path(__file__).resolve().parent.parent.parent.parent.parent
246
+ _sys.path.insert(0, str(_repo))
247
+ try:
248
+ from gaia.approvals.store import list_pending, expire
249
+ from modules.security.approval_grants import DEFAULT_PENDING_TTL_MINUTES
250
+ except ImportError as exc:
251
+ logger.debug("cleanup: gaia.approvals.store unavailable (non-fatal): %s", exc)
252
+ return False
111
253
 
112
- consumed = False
113
254
  try:
114
- for pending_file in approvals_dir.glob("pending-*.json"):
115
- # Skip the per-session index files
116
- if pending_file.name.startswith("pending-index-"):
117
- continue
118
- try:
119
- data = json.loads(pending_file.read_text())
120
- if data.get("session_id") != session_id:
121
- continue
122
-
123
- nonce = data.get("nonce", "")
124
- if nonce and nonce in preserve_nonces:
125
- logger.info(
126
- "Preserving pending nonce=%s (still in APPROVAL_REQUEST)",
127
- nonce[:12],
128
- )
129
- continue
130
-
131
- pending_file.unlink(missing_ok=True)
132
- logger.info(
133
- "Consumed pending approval for agent '%s' "
134
- "(nonce: %s, command: %s)",
135
- agent_type,
136
- nonce or "unknown",
137
- data.get("command", "unknown"),
138
- )
139
- consumed = True
140
-
141
- except (json.JSONDecodeError, TypeError):
142
- # Corrupt file -- remove it (corrupt files are never
143
- # preserve-eligible because we cannot read their nonce).
144
- pending_file.unlink(missing_ok=True)
145
- consumed = True
146
- except Exception as e:
147
- logger.debug(
148
- "Failed to process pending file %s (non-fatal): %s",
149
- pending_file.name, e,
150
- )
151
- except Exception as e:
152
- logger.debug("Failed to scan approvals dir (non-fatal): %s", e)
255
+ # all_sessions=True: the stale-pending EXPIRE sweep is GLOBAL, not
256
+ # session-scoped. SubagentStop is our only periodic sweep trigger, so a
257
+ # session-scoped sweep never reaps past-TTL pendings left by dead or
258
+ # other sessions -- they accumulate forever (we had to drain 102 by
259
+ # hand). The age gate below (age_seconds >= ttl_seconds) is the sole
260
+ # guard: a FRESH pending in ANY session is < TTL and is skipped, so
261
+ # widening to all_sessions cannot expire a fresh pending. This stays
262
+ # EXPIRE-only and age-gated -- no session-membership revoke is
263
+ # reintroduced (the P-3d23 invariant holds at global scope).
264
+ pending_rows = list_pending(session_id=session_id, all_sessions=True)
265
+ except Exception as exc:
266
+ logger.debug("cleanup: list_pending failed (non-fatal): %s", exc)
267
+ return False
153
268
 
154
- return consumed
269
+ ttl_seconds = DEFAULT_PENDING_TTL_MINUTES * 60
270
+ metadata = json.dumps(
271
+ {"reason": "expired_ttl", "source": "approval_cleanup.cleanup"}
272
+ )
273
+
274
+ expired = False
275
+ for row in pending_rows:
276
+ approval_id = row.get("id", "")
277
+ if not approval_id:
278
+ continue
279
+
280
+ age_seconds = row.get("age_seconds", 0.0) or 0.0
281
+ if age_seconds < ttl_seconds:
282
+ # Fresh pending -- MUST survive (P-3d23 invariant). preserve_nonces
283
+ # is no longer load-bearing here; the TTL gate already protects it.
284
+ continue
285
+
286
+ if approval_id in preserve_nonces:
287
+ # Belt-and-suspenders: an explicitly APPROVAL_REQUEST-referenced
288
+ # pending is never expired, even at the TTL edge.
289
+ logger.info(
290
+ "Preserving pending approval_id=%s (still in APPROVAL_REQUEST)",
291
+ approval_id[:20],
292
+ )
293
+ continue
294
+
295
+ try:
296
+ expire(
297
+ approval_id,
298
+ expirer_session=session_id,
299
+ agent_id=agent_type,
300
+ metadata_json=metadata,
301
+ )
302
+ logger.info(
303
+ "Expired pending DB approval past TTL for agent '%s' "
304
+ "(approval_id: %s, age=%.0fs >= %ds)",
305
+ agent_type,
306
+ approval_id[:20],
307
+ age_seconds,
308
+ ttl_seconds,
309
+ )
310
+ expired = True
311
+ except ValueError as exc:
312
+ # Approval was already transitioned (race or double-call) -- not an error.
313
+ logger.debug(
314
+ "cleanup: expire skipped for approval_id=%s (non-fatal): %s",
315
+ approval_id[:20], exc,
316
+ )
317
+ except Exception as exc:
318
+ logger.debug(
319
+ "cleanup: expire error for approval_id=%s (non-fatal): %s",
320
+ approval_id[:20], exc,
321
+ )
322
+
323
+ return expired
155
324
 
156
325
 
157
326
  # Backward-compatible alias