delimit-cli 4.4.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,565 @@
1
+ """Inbox executor — LED-1134 Phase 2.
2
+
3
+ Closes the email→action loop: consumes the inbox-drafts registry written
4
+ by Phase 1 and dispatches autonomous actions when founder Ship-it
5
+ replies have transitioned drafts from pending → approved.
6
+
7
+ Constitutional reference: docs/inbox_executor_v1.md is the source of
8
+ truth for the wire contract, state machine, allowlist, and non-delegable
9
+ refusal list. Authorized via owner attestation 2026-04-26T02:49Z
10
+ (scope=authority_class_expansion, evidence_ref=LED-1134).
11
+
12
+ DESIGN INTENT (per the strategic + operational deliberations):
13
+
14
+ 1. Separate process from inbox_daemon — daemon parses untrusted email
15
+ (large attack surface); executor performs privileged actions (small
16
+ attack surface). 3-1 panel vote against in-process consolidation.
17
+
18
+ 2. Re-verify HMAC + TTL at execute time, not just at insert time.
19
+ A draft sitting in the DB for 23h59m must NOT execute when it's
20
+ 24h+1m stale by the time we get to it.
21
+
22
+ 3. Atomic transition approved → executing BEFORE the side effect.
23
+ SQLite UPDATE with rowcount=1 wins; rowcount=0 means another
24
+ instance already took it. At-most-once.
25
+
26
+ 4. Crash mid-execute leaves the row at status=executing for human
27
+ reconciliation. NO auto-retry — that turns at-most-once into
28
+ at-least-once.
29
+
30
+ 5. Non-delegable refusal list (per CLAUDE.md "Non-Delegable Decisions"):
31
+ force_push_shared, ruleset_disable, account_switch, cross_account_ops,
32
+ irreversible_capital_commit, constitutional_rewrite,
33
+ authority_class_expansion, venture_kill, permission_escalation,
34
+ public_truth_claim. ANY of these refuse, log, email founder for
35
+ fresh attestation through different channel.
36
+
37
+ 6. Thermal cutout — pause if more than N actions in T seconds. v1
38
+ default: 10 actions / 15 minutes (Haiku's original 3-in-5min would
39
+ trip on legitimate batch sweeps).
40
+
41
+ 7. Allowlist of dispatch handlers — only github_comment is wired in
42
+ PR-A; others land progressively.
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import json
48
+ import logging
49
+ import shlex
50
+ import subprocess
51
+ import threading
52
+ import time
53
+ from dataclasses import dataclass, field
54
+ from datetime import datetime, timezone
55
+ from pathlib import Path
56
+ from typing import Any, Callable, Dict, List, Optional, Tuple
57
+
58
+ from ai.inbox_drafts import (
59
+ DraftRow,
60
+ DraftStatus,
61
+ list_drafts,
62
+ record_attempt,
63
+ transition,
64
+ verify_draft,
65
+ )
66
+
67
+ logger = logging.getLogger("delimit.inbox_executor")
68
+
69
+ # State path for the daemon-thread control. Mirrors the inbox_daemon
70
+ # convention so operators can find both files in the same place.
71
+ STATE_PATH = Path.home() / ".delimit" / "inbox_executor_state.json"
72
+ DEFAULT_POLL_INTERVAL_SECONDS = 30
73
+
74
+
75
+ # ── Constitutional refusal list ──────────────────────────────────────
76
+
77
+
78
+ # Mirrors ai.governance.NON_DELEGABLE_OPERATION_CLASSES. We hard-code the
79
+ # set here so the executor can refuse without an import dependency that
80
+ # could theoretically be tampered. Both lists must stay in sync; the spec
81
+ # doc (docs/inbox_executor_v1.md) is the single source of truth.
82
+ NON_DELEGABLE_REFUSAL_LIST = frozenset({
83
+ "force_push_shared",
84
+ "ruleset_disable",
85
+ "account_switch",
86
+ "cross_account_ops",
87
+ "irreversible_capital_commit",
88
+ "constitutional_rewrite",
89
+ "authority_class_expansion",
90
+ "venture_kill",
91
+ "permission_escalation",
92
+ "public_truth_claim",
93
+ })
94
+
95
+
96
+ # ── Thermal cutout ────────────────────────────────────────────────────
97
+
98
+
99
+ @dataclass
100
+ class ThermalState:
101
+ """Tracks recent action timestamps to detect bursts.
102
+
103
+ Default: 10 actions / 15 minutes. Above the threshold the executor
104
+ self-pauses for a cooldown. Per the deliberation: 3-in-5min would
105
+ trip on legitimate batch sweeps (founder clearing 5–8 morning
106
+ approvals at once); 10-in-15min is a real burst.
107
+ """
108
+
109
+ threshold_count: int = 10
110
+ threshold_seconds: int = 15 * 60
111
+ cooldown_seconds: int = 5 * 60
112
+ recent_action_times: List[int] = field(default_factory=list)
113
+ paused_until: int = 0
114
+
115
+ def record(self, now: Optional[int] = None) -> None:
116
+ if now is None:
117
+ now = int(time.time())
118
+ self.recent_action_times.append(now)
119
+ # Drop entries older than the window.
120
+ cutoff = now - self.threshold_seconds
121
+ self.recent_action_times = [t for t in self.recent_action_times if t >= cutoff]
122
+ if len(self.recent_action_times) > self.threshold_count:
123
+ self.paused_until = now + self.cooldown_seconds
124
+ logger.warning(
125
+ "thermal cutout tripped: %d actions in last %ds; pausing %ds",
126
+ len(self.recent_action_times),
127
+ self.threshold_seconds,
128
+ self.cooldown_seconds,
129
+ )
130
+
131
+ def is_paused(self, now: Optional[int] = None) -> bool:
132
+ if now is None:
133
+ now = int(time.time())
134
+ return now < self.paused_until
135
+
136
+
137
+ # ── Dispatch handlers ────────────────────────────────────────────────
138
+
139
+
140
+ # A dispatch handler is a callable: (DraftRow) -> (ok: bool, executed_url: Optional[str], reason: Optional[str])
141
+ # Pure functions — no shared state — to keep the executor predictable.
142
+ DispatchHandler = Callable[[DraftRow], Tuple[bool, Optional[str], Optional[str]]]
143
+
144
+
145
+ def _dispatch_github_comment(row: DraftRow) -> Tuple[bool, Optional[str], Optional[str]]:
146
+ """Post a GitHub issue comment via gh CLI.
147
+
148
+ Payload schema: {"body": "..."}
149
+ Target schema: {"repo": "owner/name", "issue": <int>}
150
+ Returns the resulting comment URL on success.
151
+ """
152
+ repo = row.target.get("repo")
153
+ issue = row.target.get("issue")
154
+ body = (row.payload or {}).get("body") if isinstance(row.payload, dict) else None
155
+ if not (repo and issue and body):
156
+ return False, None, "github_comment requires target.repo, target.issue, payload.body"
157
+
158
+ cmd = ["gh", "issue", "comment", str(issue), "--repo", repo, "--body", body]
159
+ try:
160
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
161
+ except FileNotFoundError:
162
+ return False, None, "gh CLI not found"
163
+ except subprocess.TimeoutExpired:
164
+ return False, None, "gh issue comment timed out"
165
+
166
+ if result.returncode != 0:
167
+ stderr = (result.stderr or "").strip()[:300]
168
+ return False, None, f"gh issue comment failed: {stderr}"
169
+
170
+ # gh prints the URL on stdout, e.g.
171
+ # "https://github.com/owner/repo/issues/123#issuecomment-..."
172
+ url = (result.stdout or "").strip().splitlines()[-1] if result.stdout else None
173
+ return True, url, None
174
+
175
+
176
+ def _dispatch_unimplemented(row: DraftRow) -> Tuple[bool, Optional[str], Optional[str]]:
177
+ """Placeholder for kinds the executor knows about but hasn't wired yet.
178
+
179
+ PR-A ships only github_comment. The other allowlist kinds
180
+ (social_post, ledger_done, notify_routing_update,
181
+ deploy_publish_prevalidated_artifact) will be wired in subsequent PRs;
182
+ for now they refuse loudly so the founder isn't surprised when
183
+ "Ship it" doesn't fire on a kind we haven't built yet.
184
+ """
185
+ return False, None, f"dispatch handler for kind={row.draft_kind} not implemented in PR-A"
186
+
187
+
188
+ # Dispatch table. Adding a new key here is itself an authority_class_expansion
189
+ # event — the spec doc must be updated and a fresh attestation logged.
190
+ DISPATCH_TABLE: Dict[str, DispatchHandler] = {
191
+ "github_comment": _dispatch_github_comment,
192
+ "social_post": _dispatch_unimplemented,
193
+ "ledger_done": _dispatch_unimplemented,
194
+ "notify_routing_update": _dispatch_unimplemented,
195
+ "deploy_publish_prevalidated_artifact": _dispatch_unimplemented,
196
+ }
197
+
198
+
199
+ # ── Failure-notification hook ────────────────────────────────────────
200
+
201
+
202
+ # Decoupled so tests can patch it without spinning up SMTP.
203
+ NotifyFn = Callable[[str, str], None]
204
+
205
+
206
+ def _default_notify(subject: str, body: str) -> None:
207
+ """Email founder via delimit_notify with [ALERT] subject prefix.
208
+
209
+ Imported lazily so this module doesn't import the entire notify
210
+ surface at startup. Best-effort: failures are logged but don't
211
+ interrupt the executor's main loop.
212
+ """
213
+ try:
214
+ from ai.notify import send_notification
215
+ send_notification(
216
+ channel="email",
217
+ subject=subject,
218
+ message=body,
219
+ event_type="executor_alert",
220
+ )
221
+ except Exception:
222
+ logger.exception("failure-notification hook itself failed; logging only")
223
+
224
+
225
+ # ── Core executor cycle ──────────────────────────────────────────────
226
+
227
+
228
+ def _execute_one(
229
+ row: DraftRow,
230
+ *,
231
+ notify: NotifyFn,
232
+ ) -> Dict[str, Any]:
233
+ """Process one approved draft. Returns a result dict for diagnostics.
234
+
235
+ Order of operations (the at-most-once contract):
236
+
237
+ 1. Re-verify HMAC + TTL (drafts may have been signed long ago).
238
+ 2. Refuse non-delegable kinds.
239
+ 3. Atomically transition approved → executing. Lose the race → no-op.
240
+ 4. Run the dispatch handler (the actual side effect).
241
+ 5. Transition executing → completed (with executed_url) OR
242
+ executing → completed_with_error (with last_error) + email founder.
243
+
244
+ A crash between steps 3 and 5 leaves the row stuck at status=executing
245
+ for human reconciliation — we never auto-retry from executing.
246
+ """
247
+ out: Dict[str, Any] = {"draft_id": row.draft_id, "kind": row.draft_kind}
248
+
249
+ # Step 1: re-verify
250
+ ok, reason = verify_draft(row.to_signed_dict())
251
+ record_attempt(row.draft_id, kind="verify", outcome=("ok" if ok else "failed"), reason=reason)
252
+ if not ok:
253
+ out["outcome"] = "verify_failed"
254
+ out["reason"] = reason
255
+ # The draft was approved earlier (HMAC was good then) but is no
256
+ # longer verifiable now (TTL elapsed, or signature mismatch from
257
+ # tampering). Mark it terminal; do NOT execute.
258
+ transition(
259
+ row.draft_id,
260
+ expected=DraftStatus.APPROVED.value,
261
+ new=DraftStatus.COMPLETED_WITH_ERROR.value,
262
+ last_error=f"verify failed at execute time: {reason}",
263
+ completed=True,
264
+ )
265
+ notify(
266
+ f"[ALERT] Inbox executor refused {row.draft_id}",
267
+ f"Draft kind={row.draft_kind} failed re-verify at execute time:\n\n{reason}\n\n"
268
+ f"Marked completed_with_error. No retry.",
269
+ )
270
+ return out
271
+
272
+ # Step 2: refusal list
273
+ if row.draft_kind in NON_DELEGABLE_REFUSAL_LIST:
274
+ out["outcome"] = "refused_non_delegable"
275
+ out["reason"] = f"{row.draft_kind} is non-delegable per STR-183"
276
+ transition(
277
+ row.draft_id,
278
+ expected=DraftStatus.APPROVED.value,
279
+ new=DraftStatus.TERMINAL_UNRECOVERABLE.value,
280
+ last_error="kind is on the non-delegable refusal list",
281
+ completed=True,
282
+ )
283
+ notify(
284
+ f"[ALERT] Inbox executor refused {row.draft_id}",
285
+ f"Draft kind={row.draft_kind} is on the non-delegable refusal list. "
286
+ f"This action requires fresh per-invocation founder attestation through "
287
+ f"a different channel (not email Ship-it).",
288
+ )
289
+ return out
290
+
291
+ # Step 3: take the row atomically. Lose the race → no-op.
292
+ took = transition(
293
+ row.draft_id,
294
+ expected=DraftStatus.APPROVED.value,
295
+ new=DraftStatus.EXECUTING.value,
296
+ )
297
+ if not took:
298
+ out["outcome"] = "lost_race"
299
+ return out
300
+
301
+ # Step 4: dispatch
302
+ handler = DISPATCH_TABLE.get(row.draft_kind, _dispatch_unimplemented)
303
+ ok, executed_url, reason = handler(row)
304
+ record_attempt(
305
+ row.draft_id,
306
+ kind="execute",
307
+ outcome=("ok" if ok else "failed"),
308
+ reason=reason,
309
+ executed_url=executed_url,
310
+ )
311
+
312
+ # Step 5: terminal transition
313
+ if ok:
314
+ transition(
315
+ row.draft_id,
316
+ expected=DraftStatus.EXECUTING.value,
317
+ new=DraftStatus.COMPLETED.value,
318
+ executed_url=executed_url,
319
+ completed=True,
320
+ )
321
+ out["outcome"] = "executed"
322
+ out["executed_url"] = executed_url
323
+ else:
324
+ transition(
325
+ row.draft_id,
326
+ expected=DraftStatus.EXECUTING.value,
327
+ new=DraftStatus.COMPLETED_WITH_ERROR.value,
328
+ last_error=reason,
329
+ completed=True,
330
+ )
331
+ notify(
332
+ f"[ALERT] Inbox executor failed {row.draft_id}",
333
+ f"Draft kind={row.draft_kind} failed during dispatch:\n\n{reason}\n\n"
334
+ f"Marked completed_with_error. No retry — please re-trigger manually if needed.",
335
+ )
336
+ out["outcome"] = "execute_failed"
337
+ out["reason"] = reason
338
+
339
+ return out
340
+
341
+
342
+ def run_cycle(
343
+ *,
344
+ thermal: ThermalState,
345
+ batch_limit: int = 10,
346
+ notify: Optional[NotifyFn] = None,
347
+ ) -> Dict[str, Any]:
348
+ """One pass of the executor poll loop.
349
+
350
+ Picks up to `batch_limit` approved drafts and processes each. Updates
351
+ thermal state on every action. Returns a summary dict suitable for
352
+ logging or status tooling.
353
+
354
+ Designed to be safe to call from a 30s scheduler/timer outside this
355
+ process (cron / systemd / supervisor).
356
+ """
357
+ notify_fn = notify or _default_notify
358
+
359
+ if thermal.is_paused():
360
+ return {
361
+ "status": "paused",
362
+ "paused_until": thermal.paused_until,
363
+ "processed": 0,
364
+ }
365
+
366
+ approved = list_drafts(status=DraftStatus.APPROVED.value, limit=batch_limit)
367
+ if not approved:
368
+ return {"status": "idle", "processed": 0}
369
+
370
+ results: List[Dict[str, Any]] = []
371
+ for row in approved:
372
+ if thermal.is_paused():
373
+ results.append({
374
+ "draft_id": row.draft_id,
375
+ "kind": row.draft_kind,
376
+ "outcome": "deferred_thermal",
377
+ })
378
+ continue
379
+ try:
380
+ r = _execute_one(row, notify=notify_fn)
381
+ except Exception as e:
382
+ # Cardinal rule: never let one bad draft kill the loop.
383
+ # The row stays at whatever state we last transitioned it
384
+ # to — likely executing if we crashed inside dispatch —
385
+ # which surfaces it for human reconciliation.
386
+ logger.exception("execute_one raised for %s", row.draft_id)
387
+ r = {
388
+ "draft_id": row.draft_id,
389
+ "kind": row.draft_kind,
390
+ "outcome": "exception",
391
+ "reason": f"{type(e).__name__}: {e}",
392
+ }
393
+ results.append(r)
394
+ # Only count actual side-effect attempts toward thermal, not
395
+ # refusals or verify-failures (those don't reach an external
396
+ # service).
397
+ if r.get("outcome") in {"executed", "execute_failed"}:
398
+ thermal.record()
399
+
400
+ return {
401
+ "status": "ran",
402
+ "processed": len(results),
403
+ "results": results,
404
+ }
405
+
406
+
407
+ # ── Daemon control surface ───────────────────────────────────────────
408
+
409
+
410
+ @dataclass
411
+ class _ExecutorState:
412
+ """Thread-safe state for the daemon's start/stop/status surface.
413
+
414
+ Mirrors inbox_daemon's pattern. Writes to STATE_PATH on every cycle
415
+ so an operator who can't import this module can still cat the file
416
+ and see what's happening.
417
+ """
418
+
419
+ running: bool = False
420
+ last_cycle_at: Optional[str] = None
421
+ total_cycles: int = 0
422
+ total_processed: int = 0
423
+ total_executed: int = 0
424
+ total_failed: int = 0
425
+ consecutive_failures: int = 0
426
+ stopped_reason: Optional[str] = None
427
+ poll_interval_seconds: int = DEFAULT_POLL_INTERVAL_SECONDS
428
+ thread: Optional[threading.Thread] = None
429
+ stop_event: Optional[threading.Event] = None
430
+ thermal: Optional[ThermalState] = None
431
+ _lock: threading.Lock = field(default_factory=threading.Lock)
432
+
433
+ def to_status_dict(self) -> Dict[str, Any]:
434
+ with self._lock:
435
+ return {
436
+ "running": self.running,
437
+ "last_cycle_at": self.last_cycle_at,
438
+ "total_cycles": self.total_cycles,
439
+ "total_processed": self.total_processed,
440
+ "total_executed": self.total_executed,
441
+ "total_failed": self.total_failed,
442
+ "consecutive_failures": self.consecutive_failures,
443
+ "stopped_reason": self.stopped_reason,
444
+ "poll_interval_seconds": self.poll_interval_seconds,
445
+ "thermal_paused_until": (
446
+ self.thermal.paused_until if self.thermal else None
447
+ ),
448
+ }
449
+
450
+ def persist(self) -> None:
451
+ try:
452
+ STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
453
+ STATE_PATH.write_text(json.dumps(self.to_status_dict(), indent=2))
454
+ except Exception:
455
+ logger.exception("could not persist executor state")
456
+
457
+
458
+ _state = _ExecutorState()
459
+
460
+
461
+ def _daemon_loop(state: _ExecutorState, stop_event: threading.Event) -> None:
462
+ """Run forever (until stop_event), invoking run_cycle each tick.
463
+
464
+ Records cycle count + outcome stats on the shared state and writes
465
+ a status file every cycle so external tools can monitor progress.
466
+ """
467
+ while not stop_event.is_set():
468
+ try:
469
+ result = run_cycle(thermal=state.thermal)
470
+ with state._lock:
471
+ state.total_cycles += 1
472
+ state.last_cycle_at = datetime.now(timezone.utc).isoformat()
473
+ if result.get("status") == "ran":
474
+ state.total_processed += result.get("processed", 0)
475
+ for r in result.get("results", []):
476
+ if r.get("outcome") == "executed":
477
+ state.total_executed += 1
478
+ elif r.get("outcome") == "execute_failed":
479
+ state.total_failed += 1
480
+ state.consecutive_failures = 0
481
+ except Exception:
482
+ with state._lock:
483
+ state.consecutive_failures += 1
484
+ state.last_cycle_at = datetime.now(timezone.utc).isoformat()
485
+ logger.exception("inbox_executor cycle raised")
486
+ state.persist()
487
+ # Sleep with early-exit on stop_event.
488
+ if stop_event.wait(timeout=state.poll_interval_seconds):
489
+ break
490
+ with state._lock:
491
+ state.running = False
492
+ state.persist()
493
+
494
+
495
+ def start(
496
+ *,
497
+ poll_interval_seconds: int = DEFAULT_POLL_INTERVAL_SECONDS,
498
+ thermal_threshold_count: int = 10,
499
+ thermal_threshold_seconds: int = 15 * 60,
500
+ thermal_cooldown_seconds: int = 5 * 60,
501
+ ) -> Dict[str, Any]:
502
+ """Start the executor daemon thread.
503
+
504
+ Idempotent — calling start() on a running daemon returns the same
505
+ status without spawning a second thread. Mirrors the inbox_daemon
506
+ contract so the two control surfaces are operationally symmetric.
507
+ """
508
+ with _state._lock:
509
+ if _state.running:
510
+ return {**_state.to_status_dict(), "action": "already_running"}
511
+ _state.running = True
512
+ _state.stopped_reason = None
513
+ _state.poll_interval_seconds = poll_interval_seconds
514
+ _state.thermal = ThermalState(
515
+ threshold_count=thermal_threshold_count,
516
+ threshold_seconds=thermal_threshold_seconds,
517
+ cooldown_seconds=thermal_cooldown_seconds,
518
+ )
519
+ _state.stop_event = threading.Event()
520
+ _state.thread = threading.Thread(
521
+ target=_daemon_loop,
522
+ args=(_state, _state.stop_event),
523
+ name="inbox_executor",
524
+ daemon=True,
525
+ )
526
+ _state.thread.start()
527
+ _state.persist()
528
+ return {**_state.to_status_dict(), "action": "started"}
529
+
530
+
531
+ def stop(reason: str = "manual") -> Dict[str, Any]:
532
+ """Stop the executor daemon. Idempotent."""
533
+ with _state._lock:
534
+ if not _state.running or not _state.stop_event:
535
+ return {**_state.to_status_dict(), "action": "already_stopped"}
536
+ _state.stop_event.set()
537
+ _state.stopped_reason = reason
538
+ thread = _state.thread
539
+ if thread:
540
+ thread.join(timeout=10.0)
541
+ _state.persist()
542
+ return {**_state.to_status_dict(), "action": "stopped"}
543
+
544
+
545
+ def status() -> Dict[str, Any]:
546
+ """Return current daemon status — does not read SQLite."""
547
+ return _state.to_status_dict()
548
+
549
+
550
+ def control(action: str = "status", **kwargs) -> Dict[str, Any]:
551
+ """Single entry-point matching the delimit_inbox_daemon pattern.
552
+
553
+ actions: 'start' (begin polling), 'stop' (halt), 'status' (show state).
554
+
555
+ kwargs are forwarded to start() (poll_interval_seconds, thermal
556
+ thresholds). Mostly used by tests to override the defaults.
557
+ """
558
+ action = (action or "status").lower().strip()
559
+ if action == "start":
560
+ return start(**kwargs)
561
+ if action == "stop":
562
+ return stop(**kwargs)
563
+ if action == "status":
564
+ return status()
565
+ return {"error": f"unknown action: {action!r}; use start|stop|status"}