@os-eco/overstory-cli 0.10.3 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -2
- package/agents/builder.md +10 -1
- package/agents/lead.md +106 -5
- package/package.json +1 -1
- package/src/agents/headless-mail-injector.ts +8 -0
- package/src/agents/mail-poll-detect.test.ts +153 -0
- package/src/agents/mail-poll-detect.ts +73 -0
- package/src/agents/overlay.test.ts +56 -0
- package/src/agents/overlay.ts +33 -0
- package/src/agents/scope-detect.test.ts +190 -0
- package/src/agents/scope-detect.ts +146 -0
- package/src/agents/turn-runner.test.ts +862 -0
- package/src/agents/turn-runner.ts +225 -8
- package/src/commands/agents.ts +9 -0
- package/src/commands/coordinator.test.ts +127 -0
- package/src/commands/coordinator.ts +71 -4
- package/src/commands/dashboard.ts +1 -1
- package/src/commands/log.test.ts +131 -0
- package/src/commands/log.ts +37 -2
- package/src/commands/merge.test.ts +118 -0
- package/src/commands/merge.ts +51 -8
- package/src/commands/sling.test.ts +104 -0
- package/src/commands/sling.ts +95 -8
- package/src/commands/stop.test.ts +81 -0
- package/src/index.ts +5 -1
- package/src/insights/quality-gates.test.ts +141 -0
- package/src/insights/quality-gates.ts +156 -0
- package/src/logging/theme.ts +4 -0
- package/src/merge/predict.test.ts +387 -0
- package/src/merge/predict.ts +249 -0
- package/src/merge/resolver.ts +1 -1
- package/src/mulch/client.ts +3 -3
- package/src/sessions/store.test.ts +267 -5
- package/src/sessions/store.ts +105 -7
- package/src/types.ts +51 -1
- package/src/watchdog/daemon.test.ts +124 -2
- package/src/watchdog/daemon.ts +27 -12
- package/src/watchdog/health.test.ts +133 -8
- package/src/watchdog/health.ts +37 -5
- package/src/worktree/manager.test.ts +218 -1
- package/src/worktree/manager.ts +55 -0
- package/src/worktree/tmux.test.ts +25 -0
- package/src/worktree/tmux.ts +17 -0
- package/templates/overlay.md.tmpl +2 -0
package/README.md
CHANGED
|
@@ -313,15 +313,17 @@ overstory/
|
|
|
313
313
|
hooks-deployer.ts Deploy hooks + tool enforcement
|
|
314
314
|
copilot-hooks-deployer.ts Deploy hooks config to Copilot worktrees
|
|
315
315
|
guard-rules.ts Shared guard constants (tool lists, bash patterns)
|
|
316
|
+
mail-poll-detect.ts Bash mail-poll pattern detector (runtime backstop)
|
|
317
|
+
scope-detect.ts Soft FILE_SCOPE violation detection (builder/merger)
|
|
316
318
|
worktree/ Git worktree + tmux management
|
|
317
319
|
mail/ SQLite mail system (typed protocol, broadcast)
|
|
318
|
-
merge/ FIFO queue + conflict resolution + sentinel-file lock
|
|
320
|
+
merge/ FIFO queue + conflict resolution + sentinel-file lock + dry-run prediction
|
|
319
321
|
watchdog/ Tiered health monitoring (daemon, triage, health)
|
|
320
322
|
logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
|
|
321
323
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
322
324
|
doctor/ Health check modules (13 checks)
|
|
323
325
|
utils/ Shared utilities (bin, fs, pid, time, version)
|
|
324
|
-
insights/ Session insight analyzer
|
|
326
|
+
insights/ Session insight analyzer + quality-gate runner (success/partial/failure)
|
|
325
327
|
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
|
|
326
328
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
327
329
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
package/agents/builder.md
CHANGED
|
@@ -11,13 +11,22 @@ Every mail message and every tool call costs tokens. Be concise in communication
|
|
|
11
11
|
These are named failures. If you catch yourself doing any of these, stop and correct immediately.
|
|
12
12
|
|
|
13
13
|
- **PATH_BOUNDARY_VIOLATION** -- Writing to any file outside your worktree directory. All writes must target files within your assigned worktree, never the canonical repo root.
|
|
14
|
-
- **FILE_SCOPE_VIOLATION** -- Editing or writing to a file not listed in your FILE_SCOPE. Read any file for context, but only modify scoped files.
|
|
14
|
+
- **FILE_SCOPE_VIOLATION** -- Editing or writing to a file not listed in your FILE_SCOPE. Read any file for context, but only modify scoped files. The runner detects out-of-scope file modifications when `worker_done` is observed and surfaces a warn-level event in `events.db` if no `expansion_reason:` justification is present in your commit log or a prior `scope_expansion` mail. The lead reads this signal during merge verification.
|
|
15
15
|
- **CANONICAL_BRANCH_WRITE** -- Committing to or pushing to main/develop/canonical branch. You commit to your worktree branch only.
|
|
16
16
|
- **SILENT_FAILURE** -- Encountering an error (test failure, lint failure, blocked dependency) and not reporting it via mail. Every error must be communicated to your parent with `--type error`.
|
|
17
17
|
- **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` without first passing quality gates ({{QUALITY_GATE_INLINE}}) and sending a result mail to your parent.
|
|
18
18
|
- **MISSING_WORKER_DONE** -- Closing a {{TRACKER_NAME}} issue without first sending `worker_done` mail to parent. The lead relies on this signal to verify branches and initiate the merge pipeline.
|
|
19
19
|
- **MISSING_MULCH_RECORD** -- Closing without recording mulch learnings. Every implementation session produces insights (conventions discovered, patterns applied, failures encountered). Skipping `ml record` loses knowledge for future agents.
|
|
20
20
|
|
|
21
|
+
### Justified scope expansion
|
|
22
|
+
|
|
23
|
+
If scope expansion is genuinely necessary (cross-cutting invariant change, missed dependency that the spec did not anticipate), declare it explicitly so the runner does not flag it. Either:
|
|
24
|
+
|
|
25
|
+
- Include `expansion_reason: <one-line justification>` anywhere in your commit message body (the runner parses commit bodies via `git log --format=%B main..HEAD`), OR
|
|
26
|
+
- Send a `scope_expansion`-prefixed status mail to your lead BEFORE editing the out-of-scope file: `ov mail send --to <lead> --subject "scope_expansion: <why>" --body "..." --type status --agent $OVERSTORY_AGENT_NAME`.
|
|
27
|
+
|
|
28
|
+
Either signal suppresses the soft warning. Prefer mail when you want the lead to acknowledge the expansion before you commit.
|
|
29
|
+
|
|
21
30
|
## overlay
|
|
22
31
|
|
|
23
32
|
Your task-specific context (task ID, file scope, spec path, branch name, parent agent) is in `{{INSTRUCTION_PATH}}` in your worktree. That file is generated by `ov sling` and tells you WHAT to work on. This file tells you HOW to work.
|
package/agents/lead.md
CHANGED
|
@@ -29,6 +29,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
29
29
|
- **SCOUT_SKIP** -- Proceeding to build complex tasks without scouting first. For complex tasks spanning unfamiliar code, scouts prevent bad specs. For simple/moderate tasks where you have sufficient context, skipping scouts is expected, not a failure.
|
|
30
30
|
- **DIRECT_COORDINATOR_REPORT** -- Having builders report directly to the coordinator. All builder communication flows through you. You aggregate and report to the coordinator.
|
|
31
31
|
- **LEAD_DOES_WORK** -- Attempting to modify files, run `git add`/`git commit`, or otherwise implement work yourself. Leads coordinate; they do not implement. The harness will block these tool calls (Write/Edit/NotebookEdit and `git add`/`git commit` are denied for the lead capability). Even one-line changes require a builder spawn — forced delegation is what produces good decomposition. If you catch yourself trying to "just edit the file", stop and spawn a builder.
|
|
32
|
+
- **LEAD_POLLING_BLOCK** -- Running a Bash loop that waits for mail, e.g. `until ov mail list --to <lead> --unread | grep -q '\*'; do sleep N; done`, `while ! ov mail check ...; do sleep N; done`, or any `sleep` inside a wait-for-mail loop. This is fatal under spawn-per-turn: the bash subprocess holds the turn open, so the turn cannot end, so worker mail arriving during the loop cannot wake the lead's next turn. When the bash eventually times out the lead has no fresh signal to react to and exits without sending `merge_ready`/`worker_done`, requiring a replacement lead. Always end your turn after dispatching — see `## turn-boundary-contract`.
|
|
32
33
|
- **OVERLAPPING_FILE_SCOPE** -- Assigning the same file to multiple builders. Every file must have exactly one owner. Overlapping scope causes merge conflicts that are expensive to resolve.
|
|
33
34
|
- **SILENT_FAILURE** -- A worker errors out or stalls and you do not report it upstream. Every blocker must be escalated to the coordinator with `--type error`.
|
|
34
35
|
- **INCOMPLETE_CLOSE** -- Running `{{TRACKER_CLI}} close` before all subtasks are complete or accounted for, or without sending `merge_ready` to the coordinator.
|
|
@@ -53,11 +54,34 @@ Your task-specific context (task ID, spec path, hierarchy depth, agent name, whe
|
|
|
53
54
|
- **Do not spawn more workers than needed.** Start with the minimum. You can always spawn more later. Target 2-5 builders per lead.
|
|
54
55
|
- **Review before merge for complex tasks.** For simple/moderate tasks, the lead may self-verify by reading the diff and running quality gates instead of spawning a reviewer.
|
|
55
56
|
|
|
57
|
+
## turn-boundary-contract
|
|
58
|
+
|
|
59
|
+
You run under spawn-per-turn (`src/agents/turn-runner.ts`). Each turn is a fresh `claude --resume <session-id>` process: it starts, you act, the process exits. You are NOT a long-lived agent. Mail arrival from your workers is what spawns your next turn — there is no "waiting" state where you sit idle between turns watching for mail.
|
|
60
|
+
|
|
61
|
+
**End your turn after dispatch.** Once you have sent dispatch mail to a scout, builder, or reviewer (or any mail that requires a worker reply before you can make progress), stop calling tools. Do not poll, do not sleep, do not re-check mail in a loop, do not send filler `status` updates to your parent while you wait. The next turn fires automatically when worker mail arrives and the orchestrator/turn-runner pumps the new mail into your context.
|
|
62
|
+
|
|
63
|
+
**FORBIDDEN — Bash polling loops.** These all violate the contract:
|
|
64
|
+
- `until ov mail list --to <lead> --unread | grep -q '\*'; do sleep N; done`
|
|
65
|
+
- `while ! ov mail check --agent $OVERSTORY_AGENT_NAME; do sleep N; done`
|
|
66
|
+
- Any `sleep` placed inside a wait-for-mail loop, in any shell form.
|
|
67
|
+
|
|
68
|
+
The bash subprocess holds the turn open, so the turn cannot end. Worker mail that arrives while the bash is running cannot wake the lead's next turn (there is no "next turn" until this one ends). When the bash eventually times out, the lead's turn ends with no inbound mail context and the next turn — if it fires at all — has no signal to react to. The session typically exits cleanly without ever sending `merge_ready`/`worker_done`, leaving the coordinator waiting for terminal mail that never comes.
|
|
69
|
+
|
|
70
|
+
**ALLOWED — one-shot reads at the start of a turn.** These return immediately and are fine:
|
|
71
|
+
- `ov mail check --agent $OVERSTORY_AGENT_NAME` (one invocation, no loop)
|
|
72
|
+
- `ov status`
|
|
73
|
+
- `{{TRACKER_CLI}} show <id>`
|
|
74
|
+
- `git diff <branch>`, `git log`, `git status` and other read-only inspection
|
|
75
|
+
|
|
76
|
+
After your one-shot reads at the start of the turn, process the mail (answer questions, forward feedback, send `merge_ready` for completed builders, decide whether to dispatch the next phase), then end the turn. Worker mail arriving later will respawn you.
|
|
77
|
+
|
|
78
|
+
**Stalled workers.** If a builder appears stalled (no mail after a long gap), you may nudge once (`ov nudge <builder> "Status check"`), then end the turn. The nudge response will respawn you. Do not wrap the nudge in a polling loop.
|
|
79
|
+
|
|
56
80
|
## communication-protocol
|
|
57
81
|
|
|
58
82
|
- **To the coordinator:** Send `status` updates on overall progress, `merge_ready` per-builder as each passes review, `error` messages on blockers, `question` for clarification.
|
|
59
83
|
- **To your workers:** Send `status` messages with clarifications or answers to their questions.
|
|
60
|
-
- **Monitoring cadence:**
|
|
84
|
+
- **Monitoring cadence:** One-shot mail check (`ov mail check --agent $OVERSTORY_AGENT_NAME`) at the start of each turn, then end the turn. Never loop or sleep waiting for mail — your turn ends after dispatch and respawns automatically when worker mail arrives. See `## turn-boundary-contract`.
|
|
61
85
|
- When escalating to the coordinator, include: what failed, what you tried, what you need.
|
|
62
86
|
|
|
63
87
|
## intro
|
|
@@ -171,6 +195,7 @@ Delegate exploration to scouts so you can focus on decomposition and planning.
|
|
|
171
195
|
--body "Investigate <what to explore>. Report: file layout, existing patterns, types, dependencies." \
|
|
172
196
|
--type dispatch
|
|
173
197
|
```
|
|
198
|
+
After this dispatch, end your turn. Do not poll for results — the scout's `worker_done` mail will respawn you.
|
|
174
199
|
|
|
175
200
|
Parallel scouts example:
|
|
176
201
|
```bash
|
|
@@ -190,6 +215,7 @@ Delegate exploration to scouts so you can focus on decomposition and planning.
|
|
|
190
215
|
--body "Investigate test files and type definitions: <files>. Report: test patterns, type contracts." \
|
|
191
216
|
--type dispatch
|
|
192
217
|
```
|
|
218
|
+
After dispatching both scouts, end your turn. Do not poll for results — `worker_done` mail from either scout will respawn you, and you can check whether both have reported on each new turn.
|
|
193
219
|
6. **While scouts explore, plan your decomposition.** Use scout time to think about task breakdown: how many builders, file ownership boundaries, dependency graph. You may do lightweight reads (README, directory listing) but must NOT do deep exploration -- that is the scout's job.
|
|
194
220
|
7. **Collect scout results.** Each scout sends a `worker_done` message with findings. If two scouts were spawned, wait for both before writing specs. Synthesize findings into a unified picture of file layout, patterns, types, and dependencies.
|
|
195
221
|
8. **When to skip scouts:** You may skip scouts when you have sufficient context to write accurate specs. Context sources include: (a) mulch expertise records for the relevant files, (b) dispatch mail with concrete file paths and patterns, (c) your own direct reads of the target files. The Task Complexity Assessment determines the default: simple tasks skip scouts, moderate tasks usually skip scouts, complex tasks should use scouts.
|
|
@@ -234,15 +260,18 @@ Write specs from scout findings and dispatch builders. You cannot use the Write
|
|
|
234
260
|
ov mail send --to <builder-name> --subject "Build: <task>" \
|
|
235
261
|
--body "Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<bead-id>.md. Begin immediately." --type dispatch
|
|
236
262
|
```
|
|
263
|
+
After dispatching builders, end your turn. Do not poll for results — `worker_done` mail will respawn you.
|
|
237
264
|
|
|
238
265
|
### Phase 3 — Review & Verify
|
|
239
266
|
|
|
240
267
|
Review is a quality investment. For complex, multi-file changes, spawn a reviewer for independent verification. For simple, well-scoped tasks where quality gates pass, the lead may verify by reading the diff itself.
|
|
241
268
|
|
|
242
|
-
10. **
|
|
243
|
-
- `ov mail check
|
|
244
|
-
-
|
|
245
|
-
- `{{TRACKER_CLI}} show <id>`
|
|
269
|
+
10. **End your turn after dispatching builders. Mail arrival from workers will spawn your next turn.** On each new turn:
|
|
270
|
+
- Check mail once: `ov mail check --agent $OVERSTORY_AGENT_NAME` (one-shot, no loop).
|
|
271
|
+
- Process all messages: answer questions, forward review feedback, send `merge_ready` for completed builders.
|
|
272
|
+
- Optionally inspect agent state once: `ov status` and `{{TRACKER_CLI}} show <id>` (one-shot reads).
|
|
273
|
+
- If a builder appears stalled (no mail after a long gap), nudge once: `ov nudge <builder-name> "Status check"`. Then end the turn — the nudge response will respawn you.
|
|
274
|
+
- End the turn. Do not loop, sleep, or poll for mail — see `## turn-boundary-contract`.
|
|
246
275
|
11. **Handle builder issues:**
|
|
247
276
|
- If a builder sends a `question`, answer it via mail.
|
|
248
277
|
- If a builder sends an `error`, assess whether to retry, reassign, or escalate to coordinator.
|
|
@@ -274,6 +303,8 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
|
|
|
274
303
|
--body "Review the changes on branch <builder-branch>. Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<builder-bead-id>.md. Run quality gates and report PASS or FAIL." \
|
|
275
304
|
--type dispatch
|
|
276
305
|
```
|
|
306
|
+
After this dispatch, end your turn. Do not poll for results — the reviewer's `worker_done` mail will respawn you.
|
|
307
|
+
|
|
277
308
|
The reviewer validates against the builder's spec and runs the project's quality gates ({{QUALITY_GATE_INLINE}}).
|
|
278
309
|
13. **Handle review results:**
|
|
279
310
|
- **PASS:** Either the reviewer sends a `worker_done` mail with "PASS" in the subject, or self-verification confirms the diff matches the spec and quality gates pass. Immediately signal `merge_ready` for that builder's branch -- do not wait for other builders to finish:
|
|
@@ -296,6 +327,65 @@ Review is a quality investment. For complex, multi-file changes, spawn a reviewe
|
|
|
296
327
|
{{TRACKER_CLI}} close <task-id> --reason "<summary of what was accomplished across all subtasks>"
|
|
297
328
|
```
|
|
298
329
|
|
|
330
|
+
## merge-dispatch (predict before signaling merge_ready)
|
|
331
|
+
|
|
332
|
+
Before signaling `merge_ready` for a builder branch that touched complex/multi-file logic, predict the conflict tier with a side-effect-free dry-run:
|
|
333
|
+
|
|
334
|
+
```bash
|
|
335
|
+
ov merge --dry-run --branch <builder-branch> --json
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
The JSON envelope now carries a `prediction` field:
|
|
339
|
+
|
|
340
|
+
```jsonc
|
|
341
|
+
{
|
|
342
|
+
"branchName": "...",
|
|
343
|
+
"status": "pending",
|
|
344
|
+
"prediction": {
|
|
345
|
+
"predictedTier": "clean-merge | auto-resolve | ai-resolve | reimagine",
|
|
346
|
+
"conflictFiles": [...],
|
|
347
|
+
"wouldRequireAgent": false | true,
|
|
348
|
+
"reason": "..."
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
Use `prediction.wouldRequireAgent` as the dispatch gate:
|
|
354
|
+
|
|
355
|
+
- **`wouldRequireAgent: false`** — keep the standard flow. Send `merge_ready` to the coordinator; the coordinator runs `ov merge` and the programmatic Tier 1/2 path handles it cheaply.
|
|
356
|
+
- **`wouldRequireAgent: true`** — do **NOT** send `merge_ready`. The cheap `claude --print` Tier 3/4 fallback in `ov merge` is too constrained for non-trivial conflicts. Spawn a dedicated merger agent under your hierarchy and let it own the merge:
|
|
357
|
+
```bash
|
|
358
|
+
{{TRACKER_CLI}} create --title="Merge: <builder-task-summary>" --type=task --priority=P1
|
|
359
|
+
ov sling <merge-bead-id> --capability merger --name merge-<builder-name> \
|
|
360
|
+
--parent $OVERSTORY_AGENT_NAME --depth <current+1>
|
|
361
|
+
ov spec write <merge-bead-id> --agent $OVERSTORY_AGENT_NAME --body "$(cat <<'EOF'
|
|
362
|
+
## Merge target
|
|
363
|
+
<canonical-branch>
|
|
364
|
+
|
|
365
|
+
## Branches to merge (in dependency order)
|
|
366
|
+
- <builder-branch-1>
|
|
367
|
+
- <builder-branch-2>
|
|
368
|
+
|
|
369
|
+
## Predicted conflict tier
|
|
370
|
+
<ai-resolve | reimagine>
|
|
371
|
+
|
|
372
|
+
## Predicted conflict files
|
|
373
|
+
- <file1>
|
|
374
|
+
- <file2>
|
|
375
|
+
|
|
376
|
+
## Reason from predictor
|
|
377
|
+
<prediction.reason verbatim>
|
|
378
|
+
EOF
|
|
379
|
+
)"
|
|
380
|
+
ov mail send --to merge-<builder-name> --subject "Merge: <builder-task>" \
|
|
381
|
+
--body "Spec: \$OVERSTORY_PROJECT_ROOT/.overstory/specs/<merge-bead-id>.md. Begin immediately." --type dispatch
|
|
382
|
+
```
|
|
383
|
+
The merger agent (see `agents/merger.md`) handles the merge end-to-end and sends terminal `merged` / `merge_failed` mail back to you. After `merged`, your usual close + terminal `worker_done` flow applies — no `merge_ready` for that branch.
|
|
384
|
+
|
|
385
|
+
**Multiple sibling branches predicted to require an agent:** prefer **one merger** that processes the branches in dependency order (per the merge-order section in `agents/merger.md`) over spawning N parallel mergers. Pass the ordered branch list in the spec body.
|
|
386
|
+
|
|
387
|
+
**Edge case: prediction failure.** If the predictor errors out (e.g., the branch was force-pushed mid-flight), the JSON envelope still returns a `prediction` field with `predictedTier: "ai-resolve"` and `reason: "prediction-failed: ..."`. Treat that as `wouldRequireAgent: true` (the predictor is being conservative on purpose) and spawn a merger.
|
|
388
|
+
|
|
299
389
|
## decomposition-guidelines
|
|
300
390
|
|
|
301
391
|
Good decomposition follows these principles:
|
|
@@ -332,3 +422,14 @@ Good decomposition follows these principles:
|
|
|
332
422
|
```
|
|
333
423
|
|
|
334
424
|
Sending the terminal `worker_done` IS your exit. Your process terminates after the turn ends; do not spawn additional workers, send more mail, or run other commands afterward. The lead's job is over once `merge_ready` signals are sent, the task is closed, and the terminal `worker_done` is delivered.
|
|
425
|
+
|
|
426
|
+
### Rebase before merge_ready when siblings exist
|
|
427
|
+
|
|
428
|
+
When your overlay's "Parallel Siblings" section lists sibling agents, those leads share file scope with you. BEFORE sending `merge_ready` to the coordinator:
|
|
429
|
+
|
|
430
|
+
1. `git fetch origin main:main`
|
|
431
|
+
2. `git rebase main`
|
|
432
|
+
3. Re-run quality gates AFTER the rebase ({{QUALITY_GATE_INLINE}}).
|
|
433
|
+
4. If the rebase introduces conflicts you cannot cleanly resolve, escalate to the coordinator with `--type error`.
|
|
434
|
+
|
|
435
|
+
Reason: parallel leads branch off pre-merge `main`; whichever merges second carries a stale base and risks reverting sibling work. mx-ddc26a / mx-c0c122 document the prior incidents.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.0",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -10,6 +10,14 @@
|
|
|
10
10
|
*
|
|
11
11
|
* This module exports `startTurnRunnerMailLoop` (the dispatcher loop) and
|
|
12
12
|
* `_runTurnRunnerTick` (a single-tick variant for deterministic tests).
|
|
13
|
+
*
|
|
14
|
+
* State authority (overstory-3087): this module does NOT write session state.
|
|
15
|
+
* The turn-runner (`src/agents/turn-runner.ts`) is the sole authority for
|
|
16
|
+
* `in_turn` ↔ `between_turns` transitions — it writes `in_turn` on the first
|
|
17
|
+
* parser event of a turn and settles to `between_turns` at end-of-turn when
|
|
18
|
+
* the agent did not deliver a terminal mail. Adding a duplicate writer here
|
|
19
|
+
* would race with the turn-runner under the per-agent turn lock and make
|
|
20
|
+
* the substate non-deterministic.
|
|
13
21
|
*/
|
|
14
22
|
|
|
15
23
|
import { createMailStore } from "../mail/store.ts";
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import { describe, expect, test } from "bun:test";
|
|
2
|
+
import { detectMailPollPattern } from "./mail-poll-detect.ts";
|
|
3
|
+
|
|
4
|
+
describe("detectMailPollPattern", () => {
|
|
5
|
+
describe("matched patterns", () => {
|
|
6
|
+
test("until ov mail list with sleep body", () => {
|
|
7
|
+
const result = detectMailPollPattern("until ov mail list; do sleep 1; done");
|
|
8
|
+
expect(result.matched).toBe(true);
|
|
9
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
test("while ! ov mail check with sleep body", () => {
|
|
13
|
+
const result = detectMailPollPattern("while ! ov mail check; do sleep 5; done");
|
|
14
|
+
expect(result.matched).toBe(true);
|
|
15
|
+
expect(result.reason).toBe("while-not ov mail loop");
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
test("while ! ov mail list --unread with sleep body", () => {
|
|
19
|
+
const result = detectMailPollPattern("while ! ov mail list --unread; do sleep 2; done");
|
|
20
|
+
expect(result.matched).toBe(true);
|
|
21
|
+
expect(result.reason).toBe("while-not ov mail loop");
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test("until ov mail check with extra args and sleep body", () => {
|
|
25
|
+
const result = detectMailPollPattern("until ov mail check --agent foo; do sleep 1; done");
|
|
26
|
+
expect(result.matched).toBe(true);
|
|
27
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("until [ ... $(ov mail list ... | wc -l) ... ] piped condition", () => {
|
|
31
|
+
const result = detectMailPollPattern(
|
|
32
|
+
`until [ "$(ov mail list --unread | wc -l)" -gt 0 ]; do sleep 1; done`,
|
|
33
|
+
);
|
|
34
|
+
expect(result.matched).toBe(true);
|
|
35
|
+
expect(result.reason).toBe("ov mail piped condition");
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test("while [ -z $(ov mail check | jq) ] piped condition", () => {
|
|
39
|
+
const result = detectMailPollPattern(
|
|
40
|
+
`while [ -z "$(ov mail check | jq '.id')" ]; do sleep 2; done`,
|
|
41
|
+
);
|
|
42
|
+
expect(result.matched).toBe(true);
|
|
43
|
+
expect(result.reason).toBe("ov mail piped condition");
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("multi-line with leading whitespace and tabs is detected", () => {
|
|
47
|
+
const cmd = "\t\tuntil ov mail list;\n\t\tdo\n\t\t\tsleep 1;\n\t\tdone";
|
|
48
|
+
const result = detectMailPollPattern(cmd);
|
|
49
|
+
expect(result.matched).toBe(true);
|
|
50
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("multi-line newline-separated (no semicolons before do/done) is detected", () => {
|
|
54
|
+
const cmd = "until ov mail list\ndo\n sleep 1\ndone";
|
|
55
|
+
const result = detectMailPollPattern(cmd);
|
|
56
|
+
expect(result.matched).toBe(true);
|
|
57
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
test("while loop with negated ov mail and pipe-through is the piped variant", () => {
|
|
61
|
+
// `while [ ... ]` (no `!`) with `ov mail` substituted inside the test
|
|
62
|
+
// expression is the piped form, not while-not.
|
|
63
|
+
const result = detectMailPollPattern(
|
|
64
|
+
`while [ "$(ov mail list --unread --json)" = "[]" ]; do sleep 3; done`,
|
|
65
|
+
);
|
|
66
|
+
expect(result.matched).toBe(true);
|
|
67
|
+
expect(result.reason).toBe("ov mail piped condition");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("until with extra padding around ! does not derail kind detection", () => {
|
|
71
|
+
// Note: `until !` is unusual but the spec says `!` may have surrounding
|
|
72
|
+
// spaces; we only assert that `until` direct form still classifies.
|
|
73
|
+
const result = detectMailPollPattern("until ov mail check ; do sleep 1 ; done");
|
|
74
|
+
expect(result.matched).toBe(true);
|
|
75
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
test("while !ov (no space after !) still classifies as while-not", () => {
|
|
79
|
+
const result = detectMailPollPattern("while !ov mail check; do sleep 1; done");
|
|
80
|
+
expect(result.matched).toBe(true);
|
|
81
|
+
expect(result.reason).toBe("while-not ov mail loop");
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("not matched", () => {
|
|
86
|
+
test("ov mail check (no loop wrapper)", () => {
|
|
87
|
+
expect(detectMailPollPattern("ov mail check").matched).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("ov mail list --unread --json (no loop wrapper)", () => {
|
|
91
|
+
expect(detectMailPollPattern("ov mail list --unread --json").matched).toBe(false);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
test("for loop sending mail (bounded, not a wait-poll)", () => {
|
|
95
|
+
const cmd =
|
|
96
|
+
"for i in 1 2 3; do ov mail send --to lead --subject hi --body x --type status; done";
|
|
97
|
+
expect(detectMailPollPattern(cmd).matched).toBe(false);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
test("while read line over a file (no ov mail reference)", () => {
|
|
101
|
+
expect(detectMailPollPattern("while read line; do echo $line; done < file.txt").matched).toBe(
|
|
102
|
+
false,
|
|
103
|
+
);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("until-loop with ov mail in condition but no sleep in body (not a poll)", () => {
|
|
107
|
+
// Without `sleep` the body is a one-shot reaction, not a wait-poll.
|
|
108
|
+
expect(detectMailPollPattern("until ov mail check; do echo got-mail; done").matched).toBe(
|
|
109
|
+
false,
|
|
110
|
+
);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
test("non-string command (undefined) returns matched=false without throwing", () => {
|
|
114
|
+
expect(() => detectMailPollPattern(undefined)).not.toThrow();
|
|
115
|
+
expect(detectMailPollPattern(undefined).matched).toBe(false);
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test("non-string command (null) returns matched=false", () => {
|
|
119
|
+
expect(detectMailPollPattern(null).matched).toBe(false);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
test("non-string command (number) returns matched=false", () => {
|
|
123
|
+
expect(detectMailPollPattern(42).matched).toBe(false);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
test("empty string returns matched=false", () => {
|
|
127
|
+
expect(detectMailPollPattern("").matched).toBe(false);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
test("for loop with sleep but no ov mail reference is not a poll", () => {
|
|
131
|
+
expect(detectMailPollPattern("for i in 1 2 3; do sleep 1; echo hi; done").matched).toBe(
|
|
132
|
+
false,
|
|
133
|
+
);
|
|
134
|
+
});
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
describe("regex statefulness", () => {
|
|
138
|
+
test("repeated calls return consistent results (no lastIndex leakage)", () => {
|
|
139
|
+
const cmd = "until ov mail list; do sleep 1; done";
|
|
140
|
+
for (let i = 0; i < 5; i++) {
|
|
141
|
+
const result = detectMailPollPattern(cmd);
|
|
142
|
+
expect(result.matched).toBe(true);
|
|
143
|
+
expect(result.reason).toBe("until ov mail loop");
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
test("matched call followed by non-match returns non-match correctly", () => {
|
|
148
|
+
expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
|
|
149
|
+
expect(detectMailPollPattern("ov mail check").matched).toBe(false);
|
|
150
|
+
expect(detectMailPollPattern("until ov mail list; do sleep 1; done").matched).toBe(true);
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
});
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Defense-in-depth detector for Bash mail-poll patterns (overstory-c92c).
|
|
3
|
+
*
|
|
4
|
+
* The lead.md prompt forbids Bash polling for mail (overstory-fa84) — the
|
|
5
|
+
* primary mitigation. This helper is the runtime backstop: if a future custom
|
|
6
|
+
* overlay or contributed agent definition silently reintroduces the pattern,
|
|
7
|
+
* the turn-runner emits a warning and a custom event so it surfaces in
|
|
8
|
+
* `ov logs` / `ov feed` / the UI. Warn-only by design; the seed's P3 severity
|
|
9
|
+
* is met without aborting the turn.
|
|
10
|
+
*
|
|
11
|
+
* What counts as a wait-poll:
|
|
12
|
+
* 1. A `until` or `while` loop construct.
|
|
13
|
+
* 2. The loop condition references `ov mail check` or `ov mail list`
|
|
14
|
+
* (directly, negated with `!`, or wrapped in `[ "$(...)" ... ]`).
|
|
15
|
+
* 3. The loop body contains `sleep` (otherwise it's bounded work, not a
|
|
16
|
+
* poll).
|
|
17
|
+
*
|
|
18
|
+
* `for` loops are bounded and never classified as wait-polls — `for i in 1 2 3;
|
|
19
|
+
* do ov mail send ...; done` is a legitimate batched send, not a poll.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
const LOOP_PATTERN =
|
|
23
|
+
/\b(until|while)\b([\s\S]*?)\s*(?:;|\n)\s*do\b([\s\S]*?)\s*(?:;|\n)\s*\bdone\b/g;
|
|
24
|
+
const SLEEP_IN_BODY = /\bsleep\b/;
|
|
25
|
+
const OV_MAIL_REF = /\bov\s+mail\s+(?:check|list)\b/;
|
|
26
|
+
const DIRECT_OV_MAIL = /^ov\s+mail\s+(?:check|list)\b/;
|
|
27
|
+
const NEGATED_OV_MAIL = /^!\s*ov\s+mail\s+(?:check|list)\b/;
|
|
28
|
+
|
|
29
|
+
export interface MailPollDetectionResult {
|
|
30
|
+
matched: boolean;
|
|
31
|
+
reason?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Pure detector — no I/O, no side effects. Accepts any input and returns
|
|
36
|
+
* `{ matched: false }` for non-string values so callers can pass the raw
|
|
37
|
+
* `event.input.command` field without pre-validation.
|
|
38
|
+
*/
|
|
39
|
+
export function detectMailPollPattern(command: unknown): MailPollDetectionResult {
|
|
40
|
+
if (typeof command !== "string") return { matched: false };
|
|
41
|
+
|
|
42
|
+
// Reset lastIndex because the regex is module-level with the `g` flag.
|
|
43
|
+
LOOP_PATTERN.lastIndex = 0;
|
|
44
|
+
let match: RegExpExecArray | null = LOOP_PATTERN.exec(command);
|
|
45
|
+
while (match !== null) {
|
|
46
|
+
const kind = match[1] as "until" | "while";
|
|
47
|
+
const condition = (match[2] ?? "").trim();
|
|
48
|
+
const body = match[3] ?? "";
|
|
49
|
+
|
|
50
|
+
if (!SLEEP_IN_BODY.test(body)) {
|
|
51
|
+
match = LOOP_PATTERN.exec(command);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (!OV_MAIL_REF.test(condition)) {
|
|
55
|
+
match = LOOP_PATTERN.exec(command);
|
|
56
|
+
continue;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (kind === "until") {
|
|
60
|
+
if (DIRECT_OV_MAIL.test(condition)) {
|
|
61
|
+
return { matched: true, reason: "until ov mail loop" };
|
|
62
|
+
}
|
|
63
|
+
return { matched: true, reason: "ov mail piped condition" };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (NEGATED_OV_MAIL.test(condition)) {
|
|
67
|
+
return { matched: true, reason: "while-not ov mail loop" };
|
|
68
|
+
}
|
|
69
|
+
return { matched: true, reason: "ov mail piped condition" };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return { matched: false };
|
|
73
|
+
}
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
formatQualityGatesCapabilities,
|
|
11
11
|
formatQualityGatesInline,
|
|
12
12
|
formatQualityGatesSteps,
|
|
13
|
+
formatSiblings,
|
|
13
14
|
generateOverlay,
|
|
14
15
|
isCanonicalRoot,
|
|
15
16
|
writeOverlay,
|
|
@@ -1000,3 +1001,58 @@ describe("quality gate placeholders in base definitions", () => {
|
|
|
1000
1001
|
expect(output).not.toContain("{{QUALITY_GATE");
|
|
1001
1002
|
});
|
|
1002
1003
|
});
|
|
1004
|
+
|
|
1005
|
+
describe("formatSiblings (overstory-f76a)", () => {
|
|
1006
|
+
test("empty siblings array → empty string", () => {
|
|
1007
|
+
const config = makeConfig({ siblings: [] });
|
|
1008
|
+
expect(formatSiblings(config)).toBe("");
|
|
1009
|
+
});
|
|
1010
|
+
|
|
1011
|
+
test("missing siblings field → empty string", () => {
|
|
1012
|
+
const config = makeConfig();
|
|
1013
|
+
expect(formatSiblings(config)).toBe("");
|
|
1014
|
+
});
|
|
1015
|
+
|
|
1016
|
+
test("one sibling → markdown with the name and rebase guidance", () => {
|
|
1017
|
+
const config = makeConfig({ siblings: ["sibling-a"] });
|
|
1018
|
+
const out = formatSiblings(config);
|
|
1019
|
+
expect(out).toContain("## Parallel Siblings");
|
|
1020
|
+
expect(out).toContain("- sibling-a");
|
|
1021
|
+
expect(out).toContain("git fetch origin main:main");
|
|
1022
|
+
expect(out).toContain("git rebase main");
|
|
1023
|
+
expect(out).toContain("merge_ready");
|
|
1024
|
+
});
|
|
1025
|
+
|
|
1026
|
+
test("multiple siblings render every name as a bullet", () => {
|
|
1027
|
+
const config = makeConfig({ siblings: ["sibling-a", "sibling-b", "sibling-c"] });
|
|
1028
|
+
const out = formatSiblings(config);
|
|
1029
|
+
expect(out).toContain("- sibling-a");
|
|
1030
|
+
expect(out).toContain("- sibling-b");
|
|
1031
|
+
expect(out).toContain("- sibling-c");
|
|
1032
|
+
});
|
|
1033
|
+
});
|
|
1034
|
+
|
|
1035
|
+
describe("generateOverlay siblings wiring (overstory-f76a)", () => {
|
|
1036
|
+
test("siblings field renders Parallel Siblings section in overlay", async () => {
|
|
1037
|
+
const config = makeConfig({ siblings: ["sibling-a", "sibling-b"] });
|
|
1038
|
+
const output = await generateOverlay(config);
|
|
1039
|
+
expect(output).toContain("## Parallel Siblings");
|
|
1040
|
+
expect(output).toContain("- sibling-a");
|
|
1041
|
+
expect(output).toContain("- sibling-b");
|
|
1042
|
+
expect(output).toContain("git rebase main");
|
|
1043
|
+
expect(output).not.toContain("{{SIBLINGS}}");
|
|
1044
|
+
});
|
|
1045
|
+
|
|
1046
|
+
test("no siblings → overlay omits Parallel Siblings section", async () => {
|
|
1047
|
+
const config = makeConfig();
|
|
1048
|
+
const output = await generateOverlay(config);
|
|
1049
|
+
expect(output).not.toContain("## Parallel Siblings");
|
|
1050
|
+
expect(output).not.toContain("{{SIBLINGS}}");
|
|
1051
|
+
});
|
|
1052
|
+
|
|
1053
|
+
test("empty siblings array → overlay omits Parallel Siblings section", async () => {
|
|
1054
|
+
const config = makeConfig({ siblings: [] });
|
|
1055
|
+
const output = await generateOverlay(config);
|
|
1056
|
+
expect(output).not.toContain("## Parallel Siblings");
|
|
1057
|
+
});
|
|
1058
|
+
});
|
package/src/agents/overlay.ts
CHANGED
|
@@ -33,6 +33,38 @@ function getTemplatePath(): string {
|
|
|
33
33
|
return join(dirname(import.meta.dir), "..", "templates", "overlay.md.tmpl");
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Format the parallel-siblings section (overstory-f76a). Returns empty string
|
|
38
|
+
* when no siblings are configured. When set, renders a markdown section that
|
|
39
|
+
* names each sibling and instructs the agent to rebase onto `main` BEFORE
|
|
40
|
+
* sending `merge_ready`. Reason: parallel leads branch off pre-merge `main`;
|
|
41
|
+
* whichever merges second carries a stale base and risks reverting sibling
|
|
42
|
+
* work (mx-c0c122 stale-base-revert).
|
|
43
|
+
*
|
|
44
|
+
* Exported for unit-testing.
|
|
45
|
+
*/
|
|
46
|
+
export function formatSiblings(config: OverlayConfig): string {
|
|
47
|
+
const siblings = config.siblings;
|
|
48
|
+
if (!siblings || siblings.length === 0) return "";
|
|
49
|
+
|
|
50
|
+
const bullets = siblings.map((name) => `- ${name}`).join("\n");
|
|
51
|
+
return [
|
|
52
|
+
"## Parallel Siblings",
|
|
53
|
+
"",
|
|
54
|
+
"The coordinator has dispatched the following sibling agents in parallel that may share file scope with you:",
|
|
55
|
+
"",
|
|
56
|
+
bullets,
|
|
57
|
+
"",
|
|
58
|
+
"**CRITICAL**: rebase your branch onto the latest `main` BEFORE sending `merge_ready`, then re-run quality gates AFTER the rebase. Sibling work may have landed on `main` while you were working — sending `merge_ready` from a stale base risks reverting their changes (mx-c0c122 stale-base-revert).",
|
|
59
|
+
"",
|
|
60
|
+
"```bash",
|
|
61
|
+
"git fetch origin main:main",
|
|
62
|
+
"git rebase main",
|
|
63
|
+
"# re-run quality gates here, then signal merge_ready",
|
|
64
|
+
"```",
|
|
65
|
+
].join("\n");
|
|
66
|
+
}
|
|
67
|
+
|
|
36
68
|
/**
|
|
37
69
|
* Format the file scope list as a markdown bullet list.
|
|
38
70
|
* Returns a human-readable fallback if no files are scoped.
|
|
@@ -361,6 +393,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
|
|
|
361
393
|
"{{SPEC_INSTRUCTION}}": specInstruction,
|
|
362
394
|
"{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
|
|
363
395
|
"{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
|
|
396
|
+
"{{SIBLINGS}}": formatSiblings(config),
|
|
364
397
|
"{{BASE_DEFINITION}}": config.baseDefinition,
|
|
365
398
|
"{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
|
|
366
399
|
"{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),
|