@os-eco/overstory-cli 0.9.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +50 -19
  2. package/agents/builder.md +19 -9
  3. package/agents/coordinator.md +6 -6
  4. package/agents/lead.md +204 -87
  5. package/agents/merger.md +25 -14
  6. package/agents/reviewer.md +22 -16
  7. package/agents/scout.md +17 -12
  8. package/package.json +6 -3
  9. package/src/agents/capabilities.test.ts +85 -0
  10. package/src/agents/capabilities.ts +125 -0
  11. package/src/agents/headless-mail-injector.test.ts +448 -0
  12. package/src/agents/headless-mail-injector.ts +219 -0
  13. package/src/agents/headless-prompt.test.ts +102 -0
  14. package/src/agents/headless-prompt.ts +68 -0
  15. package/src/agents/hooks-deployer.test.ts +514 -14
  16. package/src/agents/hooks-deployer.ts +141 -0
  17. package/src/agents/mail-poll-detect.test.ts +153 -0
  18. package/src/agents/mail-poll-detect.ts +73 -0
  19. package/src/agents/overlay.test.ts +60 -4
  20. package/src/agents/overlay.ts +63 -8
  21. package/src/agents/scope-detect.test.ts +190 -0
  22. package/src/agents/scope-detect.ts +146 -0
  23. package/src/agents/turn-lock.test.ts +181 -0
  24. package/src/agents/turn-lock.ts +235 -0
  25. package/src/agents/turn-runner-dispatch.test.ts +182 -0
  26. package/src/agents/turn-runner-dispatch.ts +105 -0
  27. package/src/agents/turn-runner.test.ts +2312 -0
  28. package/src/agents/turn-runner.ts +1383 -0
  29. package/src/commands/agents.ts +9 -0
  30. package/src/commands/clean.ts +54 -0
  31. package/src/commands/coordinator.test.ts +254 -0
  32. package/src/commands/coordinator.ts +273 -8
  33. package/src/commands/dashboard.test.ts +188 -0
  34. package/src/commands/dashboard.ts +14 -4
  35. package/src/commands/doctor.ts +3 -1
  36. package/src/commands/group.test.ts +94 -0
  37. package/src/commands/group.ts +49 -20
  38. package/src/commands/init.test.ts +8 -0
  39. package/src/commands/init.ts +8 -1
  40. package/src/commands/log.test.ts +187 -11
  41. package/src/commands/log.ts +171 -71
  42. package/src/commands/mail.test.ts +162 -0
  43. package/src/commands/mail.ts +64 -9
  44. package/src/commands/merge.test.ts +230 -1
  45. package/src/commands/merge.ts +68 -12
  46. package/src/commands/nudge.test.ts +351 -4
  47. package/src/commands/nudge.ts +356 -34
  48. package/src/commands/run.test.ts +43 -7
  49. package/src/commands/serve/build.test.ts +202 -0
  50. package/src/commands/serve/build.ts +206 -0
  51. package/src/commands/serve/coordinator-actions.test.ts +339 -0
  52. package/src/commands/serve/coordinator-actions.ts +408 -0
  53. package/src/commands/serve/dev.test.ts +168 -0
  54. package/src/commands/serve/dev.ts +117 -0
  55. package/src/commands/serve/mail-actions.test.ts +312 -0
  56. package/src/commands/serve/mail-actions.ts +167 -0
  57. package/src/commands/serve/rest.test.ts +1323 -0
  58. package/src/commands/serve/rest.ts +708 -0
  59. package/src/commands/serve/static.ts +51 -0
  60. package/src/commands/serve/ws.test.ts +361 -0
  61. package/src/commands/serve/ws.ts +332 -0
  62. package/src/commands/serve.test.ts +459 -0
  63. package/src/commands/serve.ts +565 -0
  64. package/src/commands/sling.test.ts +177 -1
  65. package/src/commands/sling.ts +243 -71
  66. package/src/commands/status.test.ts +9 -0
  67. package/src/commands/status.ts +12 -4
  68. package/src/commands/stop.test.ts +255 -1
  69. package/src/commands/stop.ts +107 -8
  70. package/src/commands/watch.test.ts +43 -0
  71. package/src/commands/watch.ts +153 -28
  72. package/src/config.ts +23 -0
  73. package/src/doctor/consistency.test.ts +106 -0
  74. package/src/doctor/consistency.ts +48 -1
  75. package/src/doctor/serve.test.ts +95 -0
  76. package/src/doctor/serve.ts +86 -0
  77. package/src/doctor/types.ts +2 -1
  78. package/src/doctor/watchdog.ts +57 -1
  79. package/src/events/tailer.test.ts +234 -1
  80. package/src/events/tailer.ts +90 -0
  81. package/src/index.ts +57 -6
  82. package/src/insights/quality-gates.test.ts +141 -0
  83. package/src/insights/quality-gates.ts +156 -0
  84. package/src/json.ts +29 -0
  85. package/src/logging/theme.ts +4 -0
  86. package/src/mail/client.ts +15 -2
  87. package/src/mail/store.test.ts +82 -0
  88. package/src/mail/store.ts +41 -4
  89. package/src/merge/lock.test.ts +149 -0
  90. package/src/merge/lock.ts +140 -0
  91. package/src/merge/predict.test.ts +387 -0
  92. package/src/merge/predict.ts +249 -0
  93. package/src/merge/resolver.ts +1 -1
  94. package/src/mulch/client.ts +3 -3
  95. package/src/runtimes/__fixtures__/claude-stream-fixture.ts +22 -0
  96. package/src/runtimes/claude.test.ts +791 -1
  97. package/src/runtimes/claude.ts +323 -1
  98. package/src/runtimes/connections.test.ts +141 -1
  99. package/src/runtimes/connections.ts +73 -4
  100. package/src/runtimes/headless-connection.test.ts +264 -0
  101. package/src/runtimes/headless-connection.ts +158 -0
  102. package/src/runtimes/types.ts +10 -0
  103. package/src/schema-consistency.test.ts +1 -0
  104. package/src/sessions/store.test.ts +657 -29
  105. package/src/sessions/store.ts +286 -23
  106. package/src/test-setup.test.ts +31 -0
  107. package/src/test-setup.ts +28 -0
  108. package/src/types.ts +107 -2
  109. package/src/utils/pid.test.ts +85 -1
  110. package/src/utils/pid.ts +86 -1
  111. package/src/utils/process-scan.test.ts +53 -0
  112. package/src/utils/process-scan.ts +76 -0
  113. package/src/watchdog/daemon.test.ts +1607 -376
  114. package/src/watchdog/daemon.ts +462 -88
  115. package/src/watchdog/health.test.ts +282 -0
  116. package/src/watchdog/health.ts +126 -27
  117. package/src/worktree/manager.test.ts +218 -1
  118. package/src/worktree/manager.ts +55 -0
  119. package/src/worktree/process.test.ts +71 -0
  120. package/src/worktree/process.ts +25 -5
  121. package/src/worktree/tmux.test.ts +28 -0
  122. package/src/worktree/tmux.ts +27 -3
  123. package/templates/CLAUDE.md.tmpl +19 -8
  124. package/templates/overlay.md.tmpl +5 -2
@@ -339,6 +339,114 @@ export function getTrackerCloseGuards(): HookEntry[] {
339
339
  ];
340
340
  }
341
341
 
342
+ /**
343
+ * Build a PreToolUse guard script that enforces the merge_ready gate on lead
344
+ * agents (overstory-3899, overstory-da9b): a lead may not run
345
+ * `sd/bd close $OVERSTORY_TASK_ID` unless (a) it has sent at least one
346
+ * `merge_ready` mail AND has sent at least one `merge_ready` per `worker_done`
347
+ * it has received, AND (b) the lead's branch (worktree HEAD) is reachable
348
+ * from the merge target (session-branch.txt > "main") via
349
+ * `git merge-base --is-ancestor`. (a) proves the lead reported completion;
350
+ * (b) proves the coordinator actually merged the work.
351
+ *
352
+ * Counts are derived by querying `ov mail list --json` and grep-counting
353
+ * `"id":"` occurrences in the JSON response (no jq dependency). The gate
354
+ * is a no-op for non-lead agents because it is only deployed to leads via
355
+ * `getLeadCloseGateGuards()`, but it still self-protects: the script
356
+ * exits early when OVERSTORY_AGENT_NAME or OVERSTORY_TASK_ID is unset.
357
+ * The merge-ancestor check fails open when OVERSTORY_WORKTREE_PATH is unset
358
+ * or the target ref cannot be resolved locally — in those cases we cannot
359
+ * make a definitive claim, so we don't block.
360
+ *
361
+ * Foreign-task closes are caught earlier by `buildTrackerCloseGuardScript`,
362
+ * so this gate only fires when the issue ID matches OVERSTORY_TASK_ID.
363
+ */
364
+ export function buildLeadCloseGateScript(): string {
365
+ const blockNoMergeReady = JSON.stringify({
366
+ decision: "block",
367
+ reason:
368
+ 'merge_ready gate: cannot close your task — you have not sent a merge_ready mail to coordinator. Required: ov mail send --to coordinator --subject "merge_ready: <task>" --body "<branch + files>" --type merge_ready --from $OVERSTORY_AGENT_NAME. Then retry the close.',
369
+ });
370
+ const blockUnderCount = JSON.stringify({
371
+ decision: "block",
372
+ reason:
373
+ "merge_ready gate: cannot close your task — merge_ready count is less than worker_done received. Send one merge_ready per worker_done before closing.",
374
+ });
375
+ const blockNotMerged = JSON.stringify({
376
+ decision: "block",
377
+ reason:
378
+ "merge_ready gate: cannot close your task — your branch is not yet merged into the target (session-branch.txt or main). Wait for the coordinator to merge before closing. The merge step is what makes the work real.",
379
+ });
380
+
381
+ const script = [
382
+ // Only enforce for overstory agent sessions
383
+ ENV_GUARD,
384
+ // Skip if task ID is not set (coordinator/monitor have no task)
385
+ '[ -z "$OVERSTORY_TASK_ID" ] && exit 0;',
386
+ "read -r INPUT;",
387
+ // Extract command value from JSON
388
+ 'CMD=$(echo "$INPUT" | sed \'s/.*"command": *"\\([^"]*\\)".*/\\1/\');',
389
+ // Only inspect sd/bd close commands
390
+ "if ! echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+close\\s'; then exit 0; fi;",
391
+ // Extract the issue ID being closed
392
+ "ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+close[[:space:]]+([^ ]+).*/\\2/');",
393
+ // Only gate when the lead is closing its own task. Foreign closes are blocked by buildTrackerCloseGuardScript.
394
+ '[ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ] && exit 0;',
395
+ // Count merge_ready mails sent by this agent
396
+ 'MR=$(ov mail list --json --from "$OVERSTORY_AGENT_NAME" --type merge_ready 2>/dev/null | grep -o \'"id":"\' | wc -l | tr -d \' \');',
397
+ // Count worker_done mails received by this agent
398
+ 'WD=$(ov mail list --json --to "$OVERSTORY_AGENT_NAME" --type worker_done 2>/dev/null | grep -o \'"id":"\' | wc -l | tr -d \' \');',
399
+ // Default to 0 if the count failed for any reason.
400
+ // biome-ignore lint/suspicious/noTemplateCurlyInString: shell parameter expansion, not a JS template
401
+ "MR=${MR:-0}; WD=${WD:-0};",
402
+ // Block if no merge_ready was ever sent
403
+ 'if [ "$MR" -eq 0 ]; then',
404
+ ` echo '${escapeForSingleQuotedShell(blockNoMergeReady)}';`,
405
+ " exit 0;",
406
+ "fi;",
407
+ // Block if not enough merge_ready for the worker_done count
408
+ 'if [ "$MR" -lt "$WD" ]; then',
409
+ ` echo '${escapeForSingleQuotedShell(blockUnderCount)}';`,
410
+ " exit 0;",
411
+ "fi;",
412
+ // Verify the lead's branch is actually merged into the target (overstory-da9b).
413
+ // merge_ready alone doesn't prove the work landed — the coordinator may still be
414
+ // verifying or the merge may have failed.
415
+ // Skip if worktree path is missing (test envs etc.) — fail open.
416
+ '[ -z "$OVERSTORY_WORKTREE_PATH" ] && exit 0;',
417
+ // Resolve target branch: $OVERSTORY_PROJECT_ROOT/.overstory/session-branch.txt > "main"
418
+ 'TARGET="";',
419
+ 'if [ -n "$OVERSTORY_PROJECT_ROOT" ] && [ -f "$OVERSTORY_PROJECT_ROOT/.overstory/session-branch.txt" ]; then',
420
+ ' TARGET=$(tr -d "[:space:]" < "$OVERSTORY_PROJECT_ROOT/.overstory/session-branch.txt" 2>/dev/null);',
421
+ "fi;",
422
+ '[ -z "$TARGET" ] && TARGET=main;',
423
+ // If the target ref doesn't exist locally, we can't verify — fail open.
424
+ 'if ! git -C "$OVERSTORY_WORKTREE_PATH" rev-parse --verify "$TARGET" >/dev/null 2>&1; then exit 0; fi;',
425
+ // Block if HEAD is not yet an ancestor of the target.
426
+ 'if ! git -C "$OVERSTORY_WORKTREE_PATH" merge-base --is-ancestor HEAD "$TARGET" >/dev/null 2>&1; then',
427
+ ` echo '${escapeForSingleQuotedShell(blockNotMerged)}';`,
428
+ " exit 0;",
429
+ "fi;",
430
+ ].join(" ");
431
+ return script;
432
+ }
433
+
434
+ /**
435
+ * Generate the lead-only PreToolUse guard that gates `sd/bd close <own-task>`
436
+ * on merge_ready emission. Wraps `buildLeadCloseGateScript` with the standard
437
+ * PATH_PREFIX so `ov` resolves under Claude Code's minimal hook PATH.
438
+ *
439
+ * Only deployed to lead agents (see getCapabilityGuards).
440
+ */
441
+ export function getLeadCloseGateGuards(): HookEntry[] {
442
+ return [
443
+ {
444
+ matcher: "Bash",
445
+ hooks: [{ type: "command", command: `${PATH_PREFIX} ${buildLeadCloseGateScript()}` }],
446
+ },
447
+ ];
448
+ }
449
+
342
450
  /**
343
451
  * Capabilities that are allowed to modify files via Bash commands.
344
452
  * These get the Bash path boundary guard instead of a blanket file-modification block.
@@ -507,6 +615,13 @@ export function getCapabilityGuards(capability: string, qualityGates?: QualityGa
507
615
  guards.push(...getBashPathBoundaryGuards());
508
616
  }
509
617
 
618
+ // Lead agents get the merge_ready gate on sd/bd close (overstory-3899).
619
+ // Blocks closing the lead's own task unless at least one merge_ready mail
620
+ // has been sent and the count covers all worker_done received.
621
+ if (capability === "lead") {
622
+ guards.push(...getLeadCloseGateGuards());
623
+ }
624
+
510
625
  return guards;
511
626
  }
512
627
 
@@ -538,9 +653,23 @@ export function isOverstoryHookEntry(entry: HookEntry): boolean {
538
653
  * Overstory hooks are placed before user hooks per event type so security
539
654
  * guards run first.
540
655
  *
656
+ * In `headlessOnly` mode, only PreToolUse hooks are deployed (overstory-e24b).
657
+ * Headless Claude Code (`-p --output-format stream-json`) DOES dispatch hooks
658
+ * from settings.local.json, so PreToolUse security guards (path boundary,
659
+ * capability blocks, bash danger patterns, tracker close, lead close gate)
660
+ * are required to keep parity with tmux mode. The other hook types are dropped
661
+ * because they have headless equivalents already wired up:
662
+ * - SessionStart → buildInitialHeadlessPrompt() in sling.ts
663
+ * - UserPromptSubmit → mail injection loop owned by `ov serve`
664
+ * - PostToolUse → stream-json parser captures tool_use/tool_result
665
+ * - Stop → stream-json parser captures the `result` event
666
+ * - PreCompact → deferred (tracked separately)
667
+ *
541
668
  * @param worktreePath - Absolute path to the agent's git worktree (or project root)
542
669
  * @param agentName - The unique name of the agent
543
670
  * @param capability - Agent capability (builder, scout, reviewer, lead, merger)
671
+ * @param qualityGates - Quality gates whose commands are whitelisted as safe Bash prefixes
672
+ * @param headlessOnly - When true, deploy only PreToolUse entries (overstory-e24b)
544
673
  * @throws {AgentError} If the template is not found or the write fails
545
674
  */
546
675
  export async function deployHooks(
@@ -548,6 +677,7 @@ export async function deployHooks(
548
677
  agentName: string,
549
678
  capability = "builder",
550
679
  qualityGates?: QualityGate[],
680
+ headlessOnly = false,
551
681
  ): Promise<void> {
552
682
  const templatePath = getTemplatePath();
553
683
  const file = Bun.file(templatePath);
@@ -578,6 +708,17 @@ export async function deployHooks(
578
708
  // Parse the base config from the template
579
709
  const config = JSON.parse(content) as { hooks: Record<string, HookEntry[]> };
580
710
 
711
+ // Headless mode: drop all template-derived hook entries.
712
+ // Under spawn-per-turn (Phase 3, overstory-2cf9), the turn-runner provides
713
+ // the user prompt and emits its own observability events for every turn;
714
+ // the template's SessionStart/UserPromptSubmit/PostToolUse/Stop/PreCompact
715
+ // hooks would either double-deliver mail (UserPromptSubmit re-injects on top
716
+ // of the runner's prompt) or duplicate session_end / per-tool events.
717
+ // Only the dynamic PreToolUse security guards added below are retained.
718
+ if (headlessOnly) {
719
+ config.hooks = {};
720
+ }
721
+
581
722
  // Extend PATH in all template hook commands.
582
723
  // Claude Code invokes hooks with PATH=/usr/bin:/bin:/usr/sbin:/sbin — ~/.bun/bin
583
724
  // (where ov, ml, sd, etc. live) is not included. Prepend PATH_PREFIX so CLIs resolve.
@@ -0,0 +1,153 @@
1
+ import { describe, expect, test } from "bun:test";
2
+ import { detectMailPollPattern } from "./mail-poll-detect.ts";
3
+
4
+ describe("detectMailPollPattern", () => {
5
+ describe("matched patterns", () => {
6
+ test("until ov mail list with sleep body", () => {
7
+ const result = detectMailPollPattern("until ov mail list; do sleep 1; done");
8
+ expect(result.matched).toBe(true);
9
+ expect(result.reason).toBe("until ov mail loop");
10
+ });
11
+
12
+ test("while ! ov mail check with sleep body", () => {
13
+ const result = detectMailPollPattern("while ! ov mail check; do sleep 5; done");
14
+ expect(result.matched).toBe(true);
15
+ expect(result.reason).toBe("while-not ov mail loop");
16
+ });
17
+
18
+ test("while ! ov mail list --unread with sleep body", () => {
19
+ const result = detectMailPollPattern("while ! ov mail list --unread; do sleep 2; done");
20
+ expect(result.matched).toBe(true);
21
+ expect(result.reason).toBe("while-not ov mail loop");
22
+ });
23
+
24
+ test("until ov mail check with extra args and sleep body", () => {
25
+ const result = detectMailPollPattern("until ov mail check --agent foo; do sleep 1; done");
26
+ expect(result.matched).toBe(true);
27
+ expect(result.reason).toBe("until ov mail loop");
28
+ });
29
+
30
+ test("until [ ... $(ov mail list ... | wc -l) ... ] piped condition", () => {
31
+ const result = detectMailPollPattern(
32
+ `until [ "$(ov mail list --unread | wc -l)" -gt 0 ]; do sleep 1; done`,
33
+ );
34
+ expect(result.matched).toBe(true);
35
+ expect(result.reason).toBe("ov mail piped condition");
36
+ });
37
+
38
+ test("while [ -z $(ov mail check | jq) ] piped condition", () => {
39
+ const result = detectMailPollPattern(
40
+ `while [ -z "$(ov mail check | jq '.id')" ]; do sleep 2; done`,
41
+ );
42
+ expect(result.matched).toBe(true);
43
+ expect(result.reason).toBe("ov mail piped condition");
44
+ });
45
+
46
+ test("multi-line with leading whitespace and tabs is detected", () => {
47
+ const cmd = "\t\tuntil ov mail list;\n\t\tdo\n\t\t\tsleep 1;\n\t\tdone";
48
+ const result = detectMailPollPattern(cmd);
49
+ expect(result.matched).toBe(true);
50
+ expect(result.reason).toBe("until ov mail loop");
51
+ });
52
+
53
+ test("multi-line newline-separated (no semicolons before do/done) is detected", () => {
54
+ const cmd = "until ov mail list\ndo\n sleep 1\ndone";
55
+ const result = detectMailPollPattern(cmd);
56
+ expect(result.matched).toBe(true);
57
+ expect(result.reason).toBe("until ov mail loop");
58
+ });
59
+
60
+ test("while loop with negated ov mail and pipe-through is the piped variant", () => {
61
+ // `while [ ... ]` (no `!`) with `ov mail` substituted inside the test
62
+ // expression is the piped form, not while-not.
63
+ const result = detectMailPollPattern(
64
+ `while [ "$(ov mail list --unread --json)" = "[]" ]; do sleep 3; done`,
65
+ );
66
+ expect(result.matched).toBe(true);
67
+ expect(result.reason).toBe("ov mail piped condition");
68
+ });
69
+
70
+ test("until with extra padding around ! does not derail kind detection", () => {
71
+ // Note: `until !` is unusual but the spec says `!` may have surrounding
72
+ // spaces; we only assert that `until` direct form still classifies.
73
+ const result = detectMailPollPattern("until ov mail check ; do sleep 1 ; done");
74
+ expect(result.matched).toBe(true);
75
+ expect(result.reason).toBe("until ov mail loop");
76
+ });
77
+
78
+ test("while !ov (no space after !) still classifies as while-not", () => {
79
+ const result = detectMailPollPattern("while !ov mail check; do sleep 1; done");
80
+ expect(result.matched).toBe(true);
81
+ expect(result.reason).toBe("while-not ov mail loop");
82
+ });
83
+ });
84
+
85
+ describe("not matched", () => {
86
+ test("ov mail check (no loop wrapper)", () => {
87
+ expect(detectMailPollPattern("ov mail check").matched).toBe(false);
88
+ });
89
+
90
+ test("ov mail list --unread --json (no loop wrapper)", () => {
91
+ expect(detectMailPollPattern("ov mail list --unread --json").matched).toBe(false);
92
+ });
93
+
94
+ test("for loop sending mail (bounded, not a wait-poll)", () => {
95
+ const cmd =
96
+ "for i in 1 2 3; do ov mail send --to lead --subject hi --body x --type status; done";
97
+ expect(detectMailPollPattern(cmd).matched).toBe(false);
98
+ });
99
+
100
+ test("while read line over a file (no ov mail reference)", () => {
101
+ expect(detectMailPollPattern("while read line; do echo $line; done < file.txt").matched).toBe(
102
+ false,
103
+ );
104
+ });
105
+
106
+ test("until-loop with ov mail in condition but no sleep in body (not a poll)", () => {
107
+ // Without `sleep` the body is a one-shot reaction, not a wait-poll.
108
+ expect(detectMailPollPattern("until ov mail check; do echo got-mail; done").matched).toBe(
109
+ false,
110
+ );
111
+ });
112
+
113
+ test("non-string command (undefined) returns matched=false without throwing", () => {
114
+ expect(() => detectMailPollPattern(undefined)).not.toThrow();
115
+ expect(detectMailPollPattern(undefined).matched).toBe(false);
116
+ });
117
+
118
+ test("non-string command (null) returns matched=false", () => {
119
+ expect(detectMailPollPattern(null).matched).toBe(false);
120
+ });
121
+
122
+ test("non-string command (number) returns matched=false", () => {
123
+ expect(detectMailPollPattern(42).matched).toBe(false);
124
+ });
125
+
126
+ test("empty string returns matched=false", () => {
127
+ expect(detectMailPollPattern("").matched).toBe(false);
128
+ });
129
+
130
+ test("for loop with sleep but no ov mail reference is not a poll", () => {
131
+ expect(detectMailPollPattern("for i in 1 2 3; do sleep 1; echo hi; done").matched).toBe(
132
+ false,
133
+ );
134
+ });
135
+ });
136
+
137
+ describe("regex statefulness", () => {
138
+ test("repeated calls return consistent results (no lastIndex leakage)", () => {
139
+ const cmd = "until ov mail list; do sleep 1; done";
140
+ for (let i = 0; i < 5; i++) {
141
+ const result = detectMailPollPattern(cmd);
142
+ expect(result.matched).toBe(true);
143
+ expect(result.reason).toBe("until ov mail loop");
144
+ }
145
+ });
146
+
147
+ test("matched call followed by non-match returns non-match correctly", () => {
148
+ expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
149
+ expect(detectMailPollPattern("ov mail check").matched).toBe(false);
150
+ expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
151
+ });
152
+ });
153
+ });
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Defense-in-depth detector for Bash mail-poll patterns (overstory-c92c).
3
+ *
4
+ * The lead.md prompt forbids Bash polling for mail (overstory-fa84) — the
5
+ * primary mitigation. This helper is the runtime backstop: if a future custom
6
+ * overlay or contributed agent definition silently reintroduces the pattern,
7
+ * the turn-runner emits a warning and a custom event so it surfaces in
8
+ * `ov logs` / `ov feed` / the UI. Warn-only by design; the seed's P3 severity
9
+ * is met without aborting the turn.
10
+ *
11
+ * What counts as a wait-poll:
12
+ * 1. A `until` or `while` loop construct.
13
+ * 2. The loop condition references `ov mail check` or `ov mail list`
14
+ * (directly, negated with `!`, or wrapped in `[ "$(...)" ... ]`).
15
+ * 3. The loop body contains `sleep` (otherwise it's bounded work, not a
16
+ * poll).
17
+ *
18
+ * `for` loops are bounded and never classified as wait-polls — `for i in 1 2 3;
19
+ * do ov mail send ...; done` is a legitimate batched send, not a poll.
20
+ */
21
+
22
+ const LOOP_PATTERN =
23
+ /\b(until|while)\b([\s\S]*?)\s*(?:;|\n)\s*do\b([\s\S]*?)\s*(?:;|\n)\s*\bdone\b/g;
24
+ const SLEEP_IN_BODY = /\bsleep\b/;
25
+ const OV_MAIL_REF = /\bov\s+mail\s+(?:check|list)\b/;
26
+ const DIRECT_OV_MAIL = /^ov\s+mail\s+(?:check|list)\b/;
27
+ const NEGATED_OV_MAIL = /^!\s*ov\s+mail\s+(?:check|list)\b/;
28
+
29
+ export interface MailPollDetectionResult {
30
+ matched: boolean;
31
+ reason?: string;
32
+ }
33
+
34
+ /**
35
+ * Pure detector — no I/O, no side effects. Accepts any input and returns
36
+ * `{ matched: false }` for non-string values so callers can pass the raw
37
+ * `event.input.command` field without pre-validation.
38
+ */
39
+ export function detectMailPollPattern(command: unknown): MailPollDetectionResult {
40
+ if (typeof command !== "string") return { matched: false };
41
+
42
+ // Reset lastIndex because the regex is module-level with the `g` flag.
43
+ LOOP_PATTERN.lastIndex = 0;
44
+ let match: RegExpExecArray | null = LOOP_PATTERN.exec(command);
45
+ while (match !== null) {
46
+ const kind = match[1] as "until" | "while";
47
+ const condition = (match[2] ?? "").trim();
48
+ const body = match[3] ?? "";
49
+
50
+ if (!SLEEP_IN_BODY.test(body)) {
51
+ match = LOOP_PATTERN.exec(command);
52
+ continue;
53
+ }
54
+ if (!OV_MAIL_REF.test(condition)) {
55
+ match = LOOP_PATTERN.exec(command);
56
+ continue;
57
+ }
58
+
59
+ if (kind === "until") {
60
+ if (DIRECT_OV_MAIL.test(condition)) {
61
+ return { matched: true, reason: "until ov mail loop" };
62
+ }
63
+ return { matched: true, reason: "ov mail piped condition" };
64
+ }
65
+
66
+ if (NEGATED_OV_MAIL.test(condition)) {
67
+ return { matched: true, reason: "while-not ov mail loop" };
68
+ }
69
+ return { matched: true, reason: "ov mail piped condition" };
70
+ }
71
+
72
+ return { matched: false };
73
+ }
@@ -10,6 +10,7 @@ import {
10
10
  formatQualityGatesCapabilities,
11
11
  formatQualityGatesInline,
12
12
  formatQualityGatesSteps,
13
+ formatSiblings,
13
14
  generateOverlay,
14
15
  isCanonicalRoot,
15
16
  writeOverlay,
@@ -523,7 +524,7 @@ describe("generateOverlay", () => {
523
524
  expect(output).toContain("3");
524
525
  });
525
526
 
526
- test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
527
+ test("dispatch overrides: maxAgentsOverride of 1 directs the lead to spend the slot on a single builder", async () => {
527
528
  const config = makeConfig({
528
529
  capability: "lead",
529
530
  maxAgentsOverride: 1,
@@ -532,8 +533,8 @@ describe("generateOverlay", () => {
532
533
  const output = await generateOverlay(config);
533
534
 
534
535
  expect(output).toContain("MAX AGENTS");
535
- expect(output).toContain("combined **lead/worker**");
536
- expect(output).toContain("only slot");
536
+ expect(output).toContain("single builder");
537
+ expect(output).toContain("Leads cannot implement directly");
537
538
  });
538
539
 
539
540
  test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
@@ -546,7 +547,7 @@ describe("generateOverlay", () => {
546
547
 
547
548
  expect(output).toContain("MAX AGENTS");
548
549
  expect(output).toContain("compressed mode");
549
- expect(output).toContain("self-verification");
550
+ expect(output).toContain("Leads do not implement");
550
551
  });
551
552
 
552
553
  test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
@@ -1000,3 +1001,58 @@ describe("quality gate placeholders in base definitions", () => {
1000
1001
  expect(output).not.toContain("{{QUALITY_GATE");
1001
1002
  });
1002
1003
  });
1004
+
1005
+ describe("formatSiblings (overstory-f76a)", () => {
1006
+ test("empty siblings array → empty string", () => {
1007
+ const config = makeConfig({ siblings: [] });
1008
+ expect(formatSiblings(config)).toBe("");
1009
+ });
1010
+
1011
+ test("missing siblings field → empty string", () => {
1012
+ const config = makeConfig();
1013
+ expect(formatSiblings(config)).toBe("");
1014
+ });
1015
+
1016
+ test("one sibling → markdown with the name and rebase guidance", () => {
1017
+ const config = makeConfig({ siblings: ["sibling-a"] });
1018
+ const out = formatSiblings(config);
1019
+ expect(out).toContain("## Parallel Siblings");
1020
+ expect(out).toContain("- sibling-a");
1021
+ expect(out).toContain("git fetch origin main:main");
1022
+ expect(out).toContain("git rebase main");
1023
+ expect(out).toContain("merge_ready");
1024
+ });
1025
+
1026
+ test("multiple siblings render every name as a bullet", () => {
1027
+ const config = makeConfig({ siblings: ["sibling-a", "sibling-b", "sibling-c"] });
1028
+ const out = formatSiblings(config);
1029
+ expect(out).toContain("- sibling-a");
1030
+ expect(out).toContain("- sibling-b");
1031
+ expect(out).toContain("- sibling-c");
1032
+ });
1033
+ });
1034
+
1035
+ describe("generateOverlay siblings wiring (overstory-f76a)", () => {
1036
+ test("siblings field renders Parallel Siblings section in overlay", async () => {
1037
+ const config = makeConfig({ siblings: ["sibling-a", "sibling-b"] });
1038
+ const output = await generateOverlay(config);
1039
+ expect(output).toContain("## Parallel Siblings");
1040
+ expect(output).toContain("- sibling-a");
1041
+ expect(output).toContain("- sibling-b");
1042
+ expect(output).toContain("git rebase main");
1043
+ expect(output).not.toContain("{{SIBLINGS}}");
1044
+ });
1045
+
1046
+ test("no siblings → overlay omits Parallel Siblings section", async () => {
1047
+ const config = makeConfig();
1048
+ const output = await generateOverlay(config);
1049
+ expect(output).not.toContain("## Parallel Siblings");
1050
+ expect(output).not.toContain("{{SIBLINGS}}");
1051
+ });
1052
+
1053
+ test("empty siblings array → overlay omits Parallel Siblings section", async () => {
1054
+ const config = makeConfig({ siblings: [] });
1055
+ const output = await generateOverlay(config);
1056
+ expect(output).not.toContain("## Parallel Siblings");
1057
+ });
1058
+ });
@@ -3,6 +3,26 @@ import { dirname, join, resolve } from "node:path";
3
3
  import { DEFAULT_QUALITY_GATES } from "../config.ts";
4
4
  import { AgentError } from "../errors.ts";
5
5
  import type { OverlayConfig, QualityGate } from "../types.ts";
6
+ import { terminalMailTypesFor } from "./capabilities.ts";
7
+
8
+ /**
9
+ * Capability-specific completion-mail guidance for the dynamic overlay.
10
+ *
11
+ * Returns the terminal mail-type name and a one-line example fragment so the
12
+ * overlay can render: "ov mail send ... --type <terminalType> ...".
13
+ *
14
+ * Crucial: this MUST stay in sync with `terminalMailTypesFor()` — overstory-1a4c
15
+ * found that overlay text saying `--type result` while the runner watched only
16
+ * for `worker_done` left worker sessions stuck in `working`.
17
+ */
18
+ function completionMailTypeFor(capability: string): string {
19
+ const types = terminalMailTypesFor(capability);
20
+ // `terminalMailTypesFor` returns the canonical type first
21
+ // (worker_done for workers, merged for mergers). Use that for prose;
22
+ // agents may also use the secondary types (`merge_failed`, etc.) where
23
+ // applicable per their base prompt.
24
+ return types[0] ?? "worker_done";
25
+ }
6
26
 
7
27
  /**
8
28
  * Resolve the path to the overlay template file.
@@ -13,6 +33,38 @@ function getTemplatePath(): string {
13
33
  return join(dirname(import.meta.dir), "..", "templates", "overlay.md.tmpl");
14
34
  }
15
35
 
36
+ /**
37
+ * Format the parallel-siblings section (overstory-f76a). Returns empty string
38
+ * when no siblings are configured. When set, renders a markdown section that
39
+ * names each sibling and instructs the agent to rebase onto `main` BEFORE
40
+ * sending `merge_ready`. Reason: parallel leads branch off pre-merge `main`;
41
+ * whichever merges second carries a stale base and risks reverting sibling
42
+ * work (mx-c0c122 stale-base-revert).
43
+ *
44
+ * Exported for unit-testing.
45
+ */
46
+ export function formatSiblings(config: OverlayConfig): string {
47
+ const siblings = config.siblings;
48
+ if (!siblings || siblings.length === 0) return "";
49
+
50
+ const bullets = siblings.map((name) => `- ${name}`).join("\n");
51
+ return [
52
+ "## Parallel Siblings",
53
+ "",
54
+ "The coordinator has dispatched the following sibling agents in parallel that may share file scope with you:",
55
+ "",
56
+ bullets,
57
+ "",
58
+ "**CRITICAL**: rebase your branch onto the latest `main` BEFORE sending `merge_ready`, then re-run quality gates AFTER the rebase. Sibling work may have landed on `main` while you were working — sending `merge_ready` from a stale base risks reverting their changes (mx-c0c122 stale-base-revert).",
59
+ "",
60
+ "```bash",
61
+ "git fetch origin main:main",
62
+ "git rebase main",
63
+ "# re-run quality gates here, then signal merge_ready",
64
+ "```",
65
+ ].join("\n");
66
+ }
67
+
16
68
  /**
17
69
  * Format the file scope list as a markdown bullet list.
18
70
  * Returns a human-readable fallback if no files are scoped.
@@ -105,14 +157,14 @@ function formatDispatchOverrides(config: OverlayConfig): string {
105
157
  if (config.maxAgentsOverride === 1) {
106
158
  sections.push(
107
159
  "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
108
- "Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
109
- "Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
160
+ "Spend that slot on a single builder for the whole task skip scouts and reviewers and self-verify the builder's diff yourself. " +
161
+ "Leads cannot implement directly (Write/Edit/`git add`/`git commit` are blocked by the harness), so the one slot must be a builder.",
110
162
  );
111
163
  } else if (config.maxAgentsOverride === 2) {
112
164
  sections.push(
113
165
  "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
114
- "Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
115
- "Prefer self-verification over spawning a separate reviewer.",
166
+ "Operate in compressed mode: spend the slots on builders (one or two), skip scouts and reviewers, and self-verify each diff yourself. " +
167
+ "Leads do not implement; every change requires a builder spawn.",
116
168
  );
117
169
  } else {
118
170
  sections.push(
@@ -202,14 +254,15 @@ export function formatQualityGatesCapabilities(gates: QualityGate[] | undefined)
202
254
 
203
255
  function formatQualityGates(config: OverlayConfig): string {
204
256
  if (READ_ONLY_CAPABILITIES.has(config.capability)) {
257
+ const completionType = completionMailTypeFor(config.capability);
205
258
  return [
206
259
  "## Completion",
207
260
  "",
208
261
  "Before reporting completion:",
209
262
  "",
210
263
  `1. **Record mulch learnings:** \`ml record <domain> --type <convention|pattern|reference> --description "..."\` — capture reusable knowledge from your work`,
211
- `2. **Close issue:** \`${config.trackerCli ?? "sd"} close ${config.taskId} --reason "summary of findings"\``,
212
- `3. **Send results:** \`ov mail send --to ${config.parentAgent ?? "coordinator"} --subject "done" --body "Summary" --type result --agent ${config.agentName}\``,
264
+ `2. **Signal completion:** send \`${completionType}\` mail to ${config.parentAgent ?? "coordinator"}: \`ov mail send --to ${config.parentAgent ?? "coordinator"} --subject "Worker done: ${config.taskId}" --body "Summary of findings" --type ${completionType} --agent ${config.agentName}\``,
265
+ `3. **Close issue:** \`${config.trackerCli ?? "sd"} close ${config.taskId} --reason "summary of findings"\``,
213
266
  "",
214
267
  "You are a read-only agent. Do NOT commit, modify files, or run quality gates.",
215
268
  ].join("\n");
@@ -245,13 +298,14 @@ function formatQualityGates(config: OverlayConfig): string {
245
298
  * Writable agents get file-scope and branch constraints.
246
299
  */
247
300
  function formatConstraints(config: OverlayConfig): string {
301
+ const completionType = completionMailTypeFor(config.capability);
248
302
  if (READ_ONLY_CAPABILITIES.has(config.capability)) {
249
303
  return [
250
304
  "## Constraints",
251
305
  "",
252
306
  "- You are **read-only**: do NOT modify, create, or delete any files",
253
307
  "- Do NOT commit, push, or make any git state changes",
254
- `- Report completion via \`${config.trackerCli ?? "sd"} close\` AND \`ov mail send --type result\``,
308
+ `- Report completion via \`${config.trackerCli ?? "sd"} close\` AND \`ov mail send --type ${completionType}\``,
255
309
  "- If you encounter a blocking issue, send mail with `--priority urgent --type error`",
256
310
  ].join("\n");
257
311
  }
@@ -264,7 +318,7 @@ function formatConstraints(config: OverlayConfig): string {
264
318
  "- Only modify files in your File Scope",
265
319
  `- Commit only to your branch: ${config.branchName}`,
266
320
  "- Never push to the canonical branch",
267
- `- Report completion via \`${config.trackerCli ?? "sd"} close\` AND \`ov mail send --type result\``,
321
+ `- Report completion via \`${config.trackerCli ?? "sd"} close\` AND \`ov mail send --type ${completionType}\``,
268
322
  "- If you encounter a blocking issue, send mail with `--priority urgent --type error`",
269
323
  ].join("\n");
270
324
  }
@@ -339,6 +393,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
339
393
  "{{SPEC_INSTRUCTION}}": specInstruction,
340
394
  "{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
341
395
  "{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
396
+ "{{SIBLINGS}}": formatSiblings(config),
342
397
  "{{BASE_DEFINITION}}": config.baseDefinition,
343
398
  "{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
344
399
  "{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),