npm - @os-eco/overstory-cli - Versions diffs - 0.10.3 → 0.11.0 - Mend

@os-eco/overstory-cli 0.10.3 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

package/README.md +4 -2
package/agents/builder.md +10 -1
package/agents/lead.md +106 -5
package/package.json +1 -1
package/src/agents/headless-mail-injector.ts +8 -0
package/src/agents/mail-poll-detect.test.ts +153 -0
package/src/agents/mail-poll-detect.ts +73 -0
package/src/agents/overlay.test.ts +56 -0
package/src/agents/overlay.ts +33 -0
package/src/agents/scope-detect.test.ts +190 -0
package/src/agents/scope-detect.ts +146 -0
package/src/agents/turn-runner.test.ts +862 -0
package/src/agents/turn-runner.ts +225 -8
package/src/commands/agents.ts +9 -0
package/src/commands/coordinator.test.ts +127 -0
package/src/commands/coordinator.ts +71 -4
package/src/commands/dashboard.ts +1 -1
package/src/commands/log.test.ts +131 -0
package/src/commands/log.ts +37 -2
package/src/commands/merge.test.ts +118 -0
package/src/commands/merge.ts +51 -8
package/src/commands/sling.test.ts +104 -0
package/src/commands/sling.ts +95 -8
package/src/commands/stop.test.ts +81 -0
package/src/index.ts +5 -1
package/src/insights/quality-gates.test.ts +141 -0
package/src/insights/quality-gates.ts +156 -0
package/src/logging/theme.ts +4 -0
package/src/merge/predict.test.ts +387 -0
package/src/merge/predict.ts +249 -0
package/src/merge/resolver.ts +1 -1
package/src/mulch/client.ts +3 -3
package/src/sessions/store.test.ts +267 -5
package/src/sessions/store.ts +105 -7
package/src/types.ts +51 -1
package/src/watchdog/daemon.test.ts +124 -2
package/src/watchdog/daemon.ts +27 -12
package/src/watchdog/health.test.ts +133 -8
package/src/watchdog/health.ts +37 -5
package/src/worktree/manager.test.ts +218 -1
package/src/worktree/manager.ts +55 -0
package/src/worktree/tmux.test.ts +25 -0
package/src/worktree/tmux.ts +17 -0
package/templates/overlay.md.tmpl +2 -0

package/README.md CHANGED Viewed

@@ -313,15 +313,17 @@ overstory/
       hooks-deployer.ts           Deploy hooks + tool enforcement
       copilot-hooks-deployer.ts   Deploy hooks config to Copilot worktrees
       guard-rules.ts              Shared guard constants (tool lists, bash patterns)
+      mail-poll-detect.ts         Bash mail-poll pattern detector (runtime backstop)
+      scope-detect.ts             Soft FILE_SCOPE violation detection (builder/merger)
     worktree/                     Git worktree + tmux management
     mail/                         SQLite mail system (typed protocol, broadcast)
-    merge/                        FIFO queue + conflict resolution + sentinel-file lock
+    merge/                        FIFO queue + conflict resolution + sentinel-file lock + dry-run prediction
     watchdog/                     Tiered health monitoring (daemon, triage, health)
     logging/                      Multi-format logger + sanitizer + reporter + color control + shared theme/format
     metrics/                      SQLite metrics + pricing + transcript parsing
     doctor/                       Health check modules (13 checks)
     utils/                        Shared utilities (bin, fs, pid, time, version)
-    insights/                     Session insight analyzer for auto-expertise
+    insights/                     Session insight analyzer + quality-gate runner (success/partial/failure)
     runtimes/                     AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
     tracker/                      Pluggable task tracker (beads + seeds backends)
     mulch/                        mulch client (programmatic API + CLI wrapper)

package/agents/builder.md CHANGED Viewed

@@ -11,13 +11,22 @@ Every mail message and every tool call costs tokens. Be concise in communication
 These are named failures. If you catch yourself doing any of these, stop and correct immediately.
 - **PATH_BOUNDARY_VIOLATION** -- Writing to any file outside your worktree directory. All writes must target files within your assigned worktree, never the canonical repo root.
-- **FILE_SCOPE_VIOLATION** -- Editing or writing to a file not listed in your FILE_SCOPE. Read any file for context, but only modify scoped files.
+- **FILE_SCOPE_VIOLATION** -- Editing or writing to a file not listed in your FILE_SCOPE. Read any file for context, but only modify scoped files. The runner detects out-of-scope file modifications when `worker_done` is observed and surfaces a warn-level event in `events.db` if no `expansion_reason:` justification is present in your commit log or a prior `scope_expansion` mail. The lead reads this signal during merge verification.
 - **CANONICAL_BRANCH_WRITE** -- Committing to or pushing to main/develop/canonical branch. You commit to your worktree branch only.
 - **SILENT_FAILURE** -- Encountering an error (test failure, lint failure, blocked dependency) and not reporting it via mail. Every error must be communicated to your parent with `--type error`.
 - **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` without first passing quality gates ({{QUALITY_GATE_INLINE}}) and sending a result mail to your parent.
 - **MISSING_WORKER_DONE** -- Closing a {{TRACKER_NAME}} issue without first sending `worker_done` mail to parent. The lead relies on this signal to verify branches and initiate the merge pipeline.
 - **MISSING_MULCH_RECORD** -- Closing without recording mulch learnings. Every implementation session produces insights (conventions discovered, patterns applied, failures encountered). Skipping `ml record` loses knowledge for future agents.
+### Justified scope expansion
+If scope expansion is genuinely necessary (cross-cutting invariant change, missed dependency that the spec did not anticipate), declare it explicitly so the runner does not flag it. Either:
+- Include `expansion_reason: <one-line justification>` anywhere in your commit message body (the runner parses commit bodies via `git log --format=%B main..HEAD`), OR
+- Send a `scope_expansion`-prefixed status mail to your lead BEFORE editing the out-of-scope file: `ov mail send --to <lead> --subject "scope_expansion: <why>" --body "..." --type status --agent $OVERSTORY_AGENT_NAME`.
+Either signal suppresses the soft warning. Prefer mail when you want the lead to acknowledge the expansion before you commit.
 ## overlay
 Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.

package/agents/lead.md CHANGED Viewed

@@ -29,6 +29,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
 - **SCOUT_SKIP** -- Proceeding to build complex tasks without scouting first. For complex tasks spanning unfamiliar code, scouts prevent bad specs. For simple/moderate tasks where you have sufficient context, skipping scouts is expected, not a failure.
 - **DIRECT_COORDINATOR_REPORT** -- Having builders report directly to the coordinator. All builder communication flows through you. You aggregate and report to the coordinator.
 - **LEAD_DOES_WORK** -- Attempting to modify files, run `git add`/`git commit`, or otherwise implement work yourself. Leads coordinate; they do not implement. The harness will block these tool calls (Write/Edit/NotebookEdit and `git add`/`git commit` are denied for the lead capability). Even one-line changes require a builder spawn — forced delegation is what produces good decomposition. If you catch yourself trying to "just edit the file", stop and spawn a builder.
+- **LEAD_POLLING_BLOCK** -- Running a Bash loop that waits for mail, e.g. `until ov mail list --to <lead> --unread | grep -q '\*'; do sleep N; done`, `while ! ov mail check ...; do sleep N; done`, or any `sleep` inside a wait-for-mail loop. This is fatal under spawn-per-turn: the bash subprocess holds the turn open, so the turn cannot end, so worker mail arriving during the loop cannot wake the lead's next turn. When the bash eventually times out the lead has no fresh signal to react to and exits without sending `merge_ready`/`worker_done`, requiring a replacement lead. Always end your turn after dispatching — see `## turn-boundary-contract`.
 - **OVERLAPPING_FILE_SCOPE** -- Assigning the same file to multiple builders. Every file must have exactly one owner. Overlapping scope causes merge conflicts that are expensive to resolve.
 - **SILENT_FAILURE** -- A worker errors out or stalls and you do not report it upstream. Every blocker must be escalated to the coordinator with `--type error`.
 - **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` before all subtasks are complete or accounted for, or without sending `merge_ready` to the coordinator.
@@ -53,11 +54,34 @@ Your task-specific context (task ID, spec path, hierarchy depth, agent name, whe
 - **Do not spawn more workers than needed.** Start with the minimum. You can always spawn more later. Target 2-5 builders per lead.
 - **Review before merge for complex tasks.** For simple/moderate tasks, the lead may self-verify by reading the diff and running quality gates instead of spawning a reviewer.
+## turn-boundary-contract
+You run under spawn-per-turn (`src/agents/turn-runner.ts`). Each turn is a fresh `claude --resume <session-id>` process: it starts, you act, the process exits. You are NOT a long-lived agent. Mail arrival from your workers is what spawns your next turn — there is no "waiting" state where you sit idle between turns watching for mail.
+**End your turn after dispatch.** Once you have sent dispatch mail to a scout, builder, or reviewer (or any mail that requires a worker reply before you can make progress), stop calling tools. Do not poll, do not sleep, do not re-check mail in a loop, do not send filler `status` updates to your parent while you wait. The next turn fires automatically when worker mail arrives and the orchestrator/turn-runner pumps the new mail into your context.
+**FORBIDDEN — Bash polling loops.** These all violate the contract:
+- `until ov mail list --to <lead> --unread | grep -q '\*'; do sleep N; done`
+- `while ! ov mail check --agent $OVERSTORY_AGENT_NAME; do sleep N; done`
+- Any `sleep` placed inside a wait-for-mail loop, in any shell form.
+The bash subprocess holds the turn open, so the turn cannot end. Worker mail that arrives while the bash is running cannot wake the lead's next turn (there is no "next turn" until this one ends). When the bash eventually times out, the lead's turn ends with no inbound mail context and the next turn — if it fires at all — has no signal to react to. The session typically exits cleanly without ever sending `merge_ready`/`worker_done`, leaving the coordinator waiting for terminal mail that never comes.
+**ALLOWED — one-shot reads at the start of a turn.** These return immediately and are fine:
+- `ov mail check --agent $OVERSTORY_AGENT_NAME` (one invocation, no loop)
+- `ov status`
+- `{{TRACKER_CLI}} show <id>`
+- `git diff <branch>`, `git log`, `git status` and other read-only inspection
+After your one-shot reads at the start of the turn, process the mail (answer questions, forward feedback, send `merge_ready` for completed builders, decide whether to dispatch the next phase), then end the turn. Worker mail arriving later will respawn you.
+**Stalled workers.** If a builder appears stalled (no mail after a long gap), you may nudge once (`ov nudge <builder> "Status check"`), then end the turn. The nudge response will respawn you. Do not wrap the nudge in a polling loop.
 ## communication-protocol
 - **To the coordinator:** Send `status` updates on overall progress, `merge_ready` per-builder as each passes review, `error` messages on blockers, `question` for clarification.
 - **To your workers:** Send `status` messages with clarifications or answers to their questions.
-- **Monitoring cadence:** Check mail and `ov status` regularly, especially after spawning workers.
+- **Monitoring cadence:** One-shot mail check (`ov mail check --agent $OVERSTORY_AGENT_NAME`) at the start of each turn, then end the turn. Never loop or sleep waiting for mail — your turn ends after dispatch and respawns automatically when worker mail arrives. See `## turn-boundary-contract`.
 - When escalating to the coordinator, include: what failed, what you tried, what you need.
 ## intro
@@ -171,6 +195,7 @@ Delegate exploration to scouts so you can focus on decomposition and planning.
      --body "Investigate <what to explore>. Report: file layout, existing patterns, types, dependencies." \
      --type dispatch
    ```
+   After this dispatch, end your turn. Do not poll for results — the scout's `worker_done` mail will respawn you.
    Parallel scouts example:
    ```bash
@@ -190,6 +215,7 @@ Delegate exploration to scouts so you can focus on decomposition and planning.
      --body "Investigate test files and type definitions: <files>. Report: test patterns, type contracts." \
      --type dispatch
    ```
+   After dispatching both scouts, end your turn. Do not poll for results — `worker_done` mail from either scout will respawn you, and you can check whether both have reported on each new turn.
 6. **While scouts explore, plan your decomposition.** Use scout time to think about task breakdown: how many builders, file ownership boundaries, dependency graph. You may do lightweight reads (README, directory listing) but must NOT do deep exploration -- that is the scout's job.
 7. **Collect scout results.** Each scout sends a `worker_done` message with findings. If two scouts were spawned, wait for both before writing specs. Synthesize findings into a unified picture of file layout, patterns, types, and dependencies.
 8. **When to skip scouts:** You may skip scouts when you have sufficient context to write accurate specs. Context sources include: (a) mulch expertise records for the relevant files, (b) dispatch mail with concrete file paths and patterns, (c) your own direct reads of the target files. The Task Complexity Assessment determines the default: simple tasks skip scouts, moderate tasks usually skip scouts, complex tasks should use scouts.
@@ -234,15 +260,18 @@ Write specs from scout findings and dispatch builders. You cannot use the Write
    ov mail send --to <builder-name> --subject "Build: <task>" \
      --body "Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<bead-id>.md. Begin immediately." --type dispatch
    ```
+   After dispatching builders, end your turn. Do not poll for results — `worker_done` mail will respawn you.
 ### Phase 3 — Review & Verify
 Review is a quality investment. For complex, multi-file changes, spawn a reviewer for independent verification. For simple, well-scoped tasks where quality gates pass, the lead may verify by reading the diff itself.
-10. **Monitor builders:**
-    - `ov mail check` -- process incoming messages from workers.
-    - `ov status` -- check agent states.
-    - `{{TRACKER_CLI}} show <id>` -- check individual task status.
+10. **End your turn after dispatching builders. Mail arrival from workers will spawn your next turn.** On each new turn:
+    - Check mail once: `ov mail check --agent $OVERSTORY_AGENT_NAME` (one-shot, no loop).
+    - Process all messages: answer questions, forward review feedback, send `merge_ready` for completed builders.
+    - Optionally inspect agent state once: `ov status` and `{{TRACKER_CLI}} show <id>` (one-shot reads).
+    - If a builder appears stalled (no mail after a long gap), nudge once: `ov nudge <builder-name> "Status check"`. Then end the turn — the nudge response will respawn you.
+    - End the turn. Do not loop, sleep, or poll for mail — see `## turn-boundary-contract`.
 11. **Handle builder issues:**
     - If a builder sends a `question`, answer it via mail.
     - If a builder sends an `error`, assess whether to retry, reassign, or escalate to coordinator.
@@ -274,6 +303,8 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
       --body "Review the changes on branch <builder-branch>. Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<builder-bead-id>.md. Run quality gates and report PASS or FAIL." \
       --type dispatch
     ```
+    After this dispatch, end your turn. Do not poll for results — the reviewer's `worker_done` mail will respawn you.
     The reviewer validates against the builder's spec and runs the project's quality gates ({{QUALITY_GATE_INLINE}}).
 13. **Handle review results:**
     - **PASS:** Either the reviewer sends a `worker_done` mail with "PASS" in the subject, or self-verification confirms the diff matches the spec and quality gates pass. Immediately signal `merge_ready` for that builder's branch -- do not wait for other builders to finish:
@@ -296,6 +327,65 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
     {{TRACKER_CLI}} close <task-id> --reason "<summary of what was accomplished across all subtasks>"
     ```
+## merge-dispatch (predict before signaling merge_ready)
+Before signaling `merge_ready` for a builder branch that touched complex/multi-file logic, predict the conflict tier with a side-effect-free dry-run:
+```bash
+ov merge --dry-run --branch <builder-branch> --json
+```
+The JSON envelope now carries a `prediction` field:
+```jsonc
+{
+  "branchName": "...",
+  "status": "pending",
+  "prediction": {
+    "predictedTier": "clean-merge | auto-resolve | ai-resolve | reimagine",
+    "conflictFiles": [...],
+    "wouldRequireAgent": false | true,
+    "reason": "..."
+  }
+}
+```
+Use `prediction.wouldRequireAgent` as the dispatch gate:
+- **`wouldRequireAgent: false`** — keep the standard flow. Send `merge_ready` to the coordinator; the coordinator runs `ov merge` and the programmatic Tier 1/2 path handles it cheaply.
+- **`wouldRequireAgent: true`** — do **NOT** send `merge_ready`. The cheap `claude --print` Tier 3/4 fallback in `ov merge` is too constrained for non-trivial conflicts. Spawn a dedicated merger agent under your hierarchy and let it own the merge:
+    ```bash
+    {{TRACKER_CLI}} create --title="Merge: <builder-task-summary>" --type=task --priority=P1
+    ov sling <merge-bead-id> --capability merger --name merge-<builder-name> \
+      --parent $OVERSTORY_AGENT_NAME --depth <current+1>
+    ov spec write <merge-bead-id> --agent $OVERSTORY_AGENT_NAME --body "$(cat <<'EOF'
+    ## Merge target
+    <canonical-branch>
+    ## Branches to merge (in dependency order)
+    - <builder-branch-1>
+    - <builder-branch-2>
+    ## Predicted conflict tier
+    <ai-resolve | reimagine>
+    ## Predicted conflict files
+    - <file1>
+    - <file2>
+    ## Reason from predictor
+    <prediction.reason verbatim>
+    EOF
+    )"
+    ov mail send --to merge-<builder-name> --subject "Merge: <builder-task>" \
+      --body "Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<merge-bead-id>.md. Begin immediately." --type dispatch
+    ```
+    The merger agent (see `agents/merger.md`) handles the merge end-to-end and sends terminal `merged` / `merge_failed` mail back to you. After `merged`, your usual close + terminal `worker_done` flow applies — no `merge_ready` for that branch.
+**Multiple sibling branches predicted to require an agent:** prefer **one merger** that processes the branches in dependency order (per the merge-order section in `agents/merger.md`) over spawning N parallel mergers. Pass the ordered branch list in the spec body.
+**Edge case: prediction failure.** If the predictor errors out (e.g., the branch was force-pushed mid-flight), the JSON envelope still returns a `prediction` field with `predictedTier: "ai-resolve"` and `reason: "prediction-failed: ..."`. Treat that as `wouldRequireAgent: true` (the predictor is being conservative on purpose) and spawn a merger.
 ## decomposition-guidelines
 Good decomposition follows these principles:
@@ -332,3 +422,14 @@ Good decomposition follows these principles:
    ```
 Sending the terminal `worker_done` IS your exit. Your process terminates after the turn ends; do not spawn additional workers, send more mail, or run other commands afterward. The lead's job is over once `merge_ready` signals are sent, the task is closed, and the terminal `worker_done` is delivered.
+### Rebase before merge_ready when siblings exist
+When your overlay's "Parallel Siblings" section lists sibling agents, those leads share file scope with you. BEFORE sending `merge_ready` to the coordinator:
+1. `git fetch origin main:main`
+2. `git rebase main`
+3. Re-run quality gates AFTER the rebase ({{QUALITY_GATE_INLINE}}).
+4. If the rebase introduces conflicts you cannot cleanly resolve, escalate to the coordinator with `--type error`.
+Reason: parallel leads branch off pre-merge `main`; whichever merges second carries a stale base and risks reverting sibling work. mx-ddc26a / mx-c0c122 document the prior incidents.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@os-eco/overstory-cli",
-	"version": "0.10.3",
+	"version": "0.11.0",
 	"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
 	"author": "Jaymin West",
 	"license": "MIT",

package/src/agents/headless-mail-injector.ts CHANGED Viewed

@@ -10,6 +10,14 @@
  *
  * This module exports `startTurnRunnerMailLoop` (the dispatcher loop) and
  * `_runTurnRunnerTick` (a single-tick variant for deterministic tests).
+ *
+ * State authority (overstory-3087): this module does NOT write session state.
+ * The turn-runner (`src/agents/turn-runner.ts`) is the sole authority for
+ * `in_turn` ↔ `between_turns` transitions — it writes `in_turn` on the first
+ * parser event of a turn and settles to `between_turns` at end-of-turn when
+ * the agent did not deliver a terminal mail. Adding a duplicate writer here
+ * would race with the turn-runner under the per-agent turn lock and make
+ * the substate non-deterministic.
  */
 import { createMailStore } from "../mail/store.ts";

package/src/agents/mail-poll-detect.test.ts ADDED Viewed

@@ -0,0 +1,153 @@
+import { describe, expect, test } from "bun:test";
+import { detectMailPollPattern } from "./mail-poll-detect.ts";
+describe("detectMailPollPattern", () => {
+	describe("matched patterns", () => {
+		test("until ov mail list with sleep body", () => {
+			const result = detectMailPollPattern("until ov mail list; do sleep 1; done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("until ov mail loop");
+		});
+		test("while ! ov mail check with sleep body", () => {
+			const result = detectMailPollPattern("while ! ov mail check; do sleep 5; done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("while-not ov mail loop");
+		});
+		test("while ! ov mail list --unread with sleep body", () => {
+			const result = detectMailPollPattern("while ! ov mail list --unread; do sleep 2; done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("while-not ov mail loop");
+		});
+		test("until ov mail check with extra args and sleep body", () => {
+			const result = detectMailPollPattern("until ov mail check --agent foo; do sleep 1; done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("until ov mail loop");
+		});
+		test("until [ ... $(ov mail list ... | wc -l) ... ] piped condition", () => {
+			const result = detectMailPollPattern(
+				`until [ "$(ov mail list --unread | wc -l)" -gt 0 ]; do sleep 1; done`,
+			);
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("ov mail piped condition");
+		});
+		test("while [ -z $(ov mail check | jq) ] piped condition", () => {
+			const result = detectMailPollPattern(
+				`while [ -z "$(ov mail check | jq '.id')" ]; do sleep 2; done`,
+			);
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("ov mail piped condition");
+		});
+		test("multi-line with leading whitespace and tabs is detected", () => {
+			const cmd = "\t\tuntil ov mail list;\n\t\tdo\n\t\t\tsleep 1;\n\t\tdone";
+			const result = detectMailPollPattern(cmd);
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("until ov mail loop");
+		});
+		test("multi-line newline-separated (no semicolons before do/done) is detected", () => {
+			const cmd = "until ov mail list\ndo\n  sleep 1\ndone";
+			const result = detectMailPollPattern(cmd);
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("until ov mail loop");
+		});
+		test("while loop with negated ov mail and pipe-through is the piped variant", () => {
+			// `while [ ... ]` (no `!`) with `ov mail` substituted inside the test
+			// expression is the piped form, not while-not.
+			const result = detectMailPollPattern(
+				`while [ "$(ov mail list --unread --json)" = "[]" ]; do sleep 3; done`,
+			);
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("ov mail piped condition");
+		});
+		test("until with extra padding around ! does not derail kind detection", () => {
+			// Note: `until !` is unusual but the spec says `!` may have surrounding
+			// spaces; we only assert that `until` direct form still classifies.
+			const result = detectMailPollPattern("until   ov mail check  ;  do  sleep 1 ;  done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("until ov mail loop");
+		});
+		test("while !ov (no space after !) still classifies as while-not", () => {
+			const result = detectMailPollPattern("while !ov mail check; do sleep 1; done");
+			expect(result.matched).toBe(true);
+			expect(result.reason).toBe("while-not ov mail loop");
+		});
+	});
+	describe("not matched", () => {
+		test("ov mail check (no loop wrapper)", () => {
+			expect(detectMailPollPattern("ov mail check").matched).toBe(false);
+		});
+		test("ov mail list --unread --json (no loop wrapper)", () => {
+			expect(detectMailPollPattern("ov mail list --unread --json").matched).toBe(false);
+		});
+		test("for loop sending mail (bounded, not a wait-poll)", () => {
+			const cmd =
+				"for i in 1 2 3; do ov mail send --to lead --subject hi --body x --type status; done";
+			expect(detectMailPollPattern(cmd).matched).toBe(false);
+		});
+		test("while read line over a file (no ov mail reference)", () => {
+			expect(detectMailPollPattern("while read line; do echo $line; done < file.txt").matched).toBe(
+				false,
+			);
+		});
+		test("until-loop with ov mail in condition but no sleep in body (not a poll)", () => {
+			// Without `sleep` the body is a one-shot reaction, not a wait-poll.
+			expect(detectMailPollPattern("until ov mail check; do echo got-mail; done").matched).toBe(
+				false,
+			);
+		});
+		test("non-string command (undefined) returns matched=false without throwing", () => {
+			expect(() => detectMailPollPattern(undefined)).not.toThrow();
+			expect(detectMailPollPattern(undefined).matched).toBe(false);
+		});
+		test("non-string command (null) returns matched=false", () => {
+			expect(detectMailPollPattern(null).matched).toBe(false);
+		});
+		test("non-string command (number) returns matched=false", () => {
+			expect(detectMailPollPattern(42).matched).toBe(false);
+		});
+		test("empty string returns matched=false", () => {
+			expect(detectMailPollPattern("").matched).toBe(false);
+		});
+		test("for loop with sleep but no ov mail reference is not a poll", () => {
+			expect(detectMailPollPattern("for i in 1 2 3; do sleep 1; echo hi; done").matched).toBe(
+				false,
+			);
+		});
+	});
+	describe("regex statefulness", () => {
+		test("repeated calls return consistent results (no lastIndex leakage)", () => {
+			const cmd = "until ov mail list; do sleep 1; done";
+			for (let i = 0; i < 5; i++) {
+				const result = detectMailPollPattern(cmd);
+				expect(result.matched).toBe(true);
+				expect(result.reason).toBe("until ov mail loop");
+			}
+		});
+		test("matched call followed by non-match returns non-match correctly", () => {
+			expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
+			expect(detectMailPollPattern("ov mail check").matched).toBe(false);
+			expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
+		});
+	});
+});

package/src/agents/mail-poll-detect.ts ADDED Viewed

@@ -0,0 +1,73 @@
+/**
+ * Defense-in-depth detector for Bash mail-poll patterns (overstory-c92c).
+ *
+ * The lead.md prompt forbids Bash polling for mail (overstory-fa84) — the
+ * primary mitigation. This helper is the runtime backstop: if a future custom
+ * overlay or contributed agent definition silently reintroduces the pattern,
+ * the turn-runner emits a warning and a custom event so it surfaces in
+ * `ov logs` / `ov feed` / the UI. Warn-only by design; the seed's P3 severity
+ * is met without aborting the turn.
+ *
+ * What counts as a wait-poll:
+ *   1. A `until` or `while` loop construct.
+ *   2. The loop condition references `ov mail check` or `ov mail list`
+ *      (directly, negated with `!`, or wrapped in `[ "$(...)" ... ]`).
+ *   3. The loop body contains `sleep` (otherwise it's bounded work, not a
+ *      poll).
+ *
+ * `for` loops are bounded and never classified as wait-polls — `for i in 1 2 3;
+ * do ov mail send ...; done` is a legitimate batched send, not a poll.
+ */
+const LOOP_PATTERN =
+	/\b(until|while)\b([\s\S]*?)\s*(?:;|\n)\s*do\b([\s\S]*?)\s*(?:;|\n)\s*\bdone\b/g;
+const SLEEP_IN_BODY = /\bsleep\b/;
+const OV_MAIL_REF = /\bov\s+mail\s+(?:check|list)\b/;
+const DIRECT_OV_MAIL = /^ov\s+mail\s+(?:check|list)\b/;
+const NEGATED_OV_MAIL = /^!\s*ov\s+mail\s+(?:check|list)\b/;
+export interface MailPollDetectionResult {
+	matched: boolean;
+	reason?: string;
+}
+/**
+ * Pure detector — no I/O, no side effects. Accepts any input and returns
+ * `{ matched: false }` for non-string values so callers can pass the raw
+ * `event.input.command` field without pre-validation.
+ */
+export function detectMailPollPattern(command: unknown): MailPollDetectionResult {
+	if (typeof command !== "string") return { matched: false };
+	// Reset lastIndex because the regex is module-level with the `g` flag.
+	LOOP_PATTERN.lastIndex = 0;
+	let match: RegExpExecArray | null = LOOP_PATTERN.exec(command);
+	while (match !== null) {
+		const kind = match[1] as "until" | "while";
+		const condition = (match[2] ?? "").trim();
+		const body = match[3] ?? "";
+		if (!SLEEP_IN_BODY.test(body)) {
+			match = LOOP_PATTERN.exec(command);
+			continue;
+		}
+		if (!OV_MAIL_REF.test(condition)) {
+			match = LOOP_PATTERN.exec(command);
+			continue;
+		}
+		if (kind === "until") {
+			if (DIRECT_OV_MAIL.test(condition)) {
+				return { matched: true, reason: "until ov mail loop" };
+			}
+			return { matched: true, reason: "ov mail piped condition" };
+		}
+		if (NEGATED_OV_MAIL.test(condition)) {
+			return { matched: true, reason: "while-not ov mail loop" };
+		}
+		return { matched: true, reason: "ov mail piped condition" };
+	}
+	return { matched: false };
+}

package/src/agents/overlay.test.ts CHANGED Viewed

@@ -10,6 +10,7 @@ import {
 	formatQualityGatesCapabilities,
 	formatQualityGatesInline,
 	formatQualityGatesSteps,
+	formatSiblings,
 	generateOverlay,
 	isCanonicalRoot,
 	writeOverlay,
@@ -1000,3 +1001,58 @@ describe("quality gate placeholders in base definitions", () => {
 		expect(output).not.toContain("{{QUALITY_GATE");
 	});
 });
+describe("formatSiblings (overstory-f76a)", () => {
+	test("empty siblings array → empty string", () => {
+		const config = makeConfig({ siblings: [] });
+		expect(formatSiblings(config)).toBe("");
+	});
+	test("missing siblings field → empty string", () => {
+		const config = makeConfig();
+		expect(formatSiblings(config)).toBe("");
+	});
+	test("one sibling → markdown with the name and rebase guidance", () => {
+		const config = makeConfig({ siblings: ["sibling-a"] });
+		const out = formatSiblings(config);
+		expect(out).toContain("## Parallel Siblings");
+		expect(out).toContain("- sibling-a");
+		expect(out).toContain("git fetch origin main:main");
+		expect(out).toContain("git rebase main");
+		expect(out).toContain("merge_ready");
+	});
+	test("multiple siblings render every name as a bullet", () => {
+		const config = makeConfig({ siblings: ["sibling-a", "sibling-b", "sibling-c"] });
+		const out = formatSiblings(config);
+		expect(out).toContain("- sibling-a");
+		expect(out).toContain("- sibling-b");
+		expect(out).toContain("- sibling-c");
+	});
+});
+describe("generateOverlay siblings wiring (overstory-f76a)", () => {
+	test("siblings field renders Parallel Siblings section in overlay", async () => {
+		const config = makeConfig({ siblings: ["sibling-a", "sibling-b"] });
+		const output = await generateOverlay(config);
+		expect(output).toContain("## Parallel Siblings");
+		expect(output).toContain("- sibling-a");
+		expect(output).toContain("- sibling-b");
+		expect(output).toContain("git rebase main");
+		expect(output).not.toContain("{{SIBLINGS}}");
+	});
+	test("no siblings → overlay omits Parallel Siblings section", async () => {
+		const config = makeConfig();
+		const output = await generateOverlay(config);
+		expect(output).not.toContain("## Parallel Siblings");
+		expect(output).not.toContain("{{SIBLINGS}}");
+	});
+	test("empty siblings array → overlay omits Parallel Siblings section", async () => {
+		const config = makeConfig({ siblings: [] });
+		const output = await generateOverlay(config);
+		expect(output).not.toContain("## Parallel Siblings");
+	});
+});

package/src/agents/overlay.ts CHANGED Viewed

@@ -33,6 +33,38 @@ function getTemplatePath(): string {
 	return join(dirname(import.meta.dir), "..", "templates", "overlay.md.tmpl");
 }
+/**
+ * Format the parallel-siblings section (overstory-f76a). Returns empty string
+ * when no siblings are configured. When set, renders a markdown section that
+ * names each sibling and instructs the agent to rebase onto `main` BEFORE
+ * sending `merge_ready`. Reason: parallel leads branch off pre-merge `main`;
+ * whichever merges second carries a stale base and risks reverting sibling
+ * work (mx-c0c122 stale-base-revert).
+ *
+ * Exported for unit-testing.
+ */
+export function formatSiblings(config: OverlayConfig): string {
+	const siblings = config.siblings;
+	if (!siblings || siblings.length === 0) return "";
+	const bullets = siblings.map((name) => `- ${name}`).join("\n");
+	return [
+		"## Parallel Siblings",
+		"",
+		"The coordinator has dispatched the following sibling agents in parallel that may share file scope with you:",
+		"",
+		bullets,
+		"",
+		"**CRITICAL**: rebase your branch onto the latest `main` BEFORE sending `merge_ready`, then re-run quality gates AFTER the rebase. Sibling work may have landed on `main` while you were working — sending `merge_ready` from a stale base risks reverting their changes (mx-c0c122 stale-base-revert).",
+		"",
+		"```bash",
+		"git fetch origin main:main",
+		"git rebase main",
+		"# re-run quality gates here, then signal merge_ready",
+		"```",
+	].join("\n");
+}
 /**
  * Format the file scope list as a markdown bullet list.
  * Returns a human-readable fallback if no files are scoped.
@@ -361,6 +393,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
 		"{{SPEC_INSTRUCTION}}": specInstruction,
 		"{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
 		"{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
+		"{{SIBLINGS}}": formatSiblings(config),
 		"{{BASE_DEFINITION}}": config.baseDefinition,
 		"{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
 		"{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),