@os-eco/overstory-cli 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -9
- package/agents/coordinator.md +52 -4
- package/package.json +1 -1
- package/src/agents/hooks-deployer.test.ts +185 -12
- package/src/agents/hooks-deployer.ts +57 -1
- package/src/commands/clean.test.ts +136 -0
- package/src/commands/clean.ts +198 -4
- package/src/commands/coordinator.test.ts +494 -6
- package/src/commands/coordinator.ts +200 -4
- package/src/commands/dashboard.ts +84 -18
- package/src/commands/ecosystem.test.ts +101 -0
- package/src/commands/init.test.ts +211 -0
- package/src/commands/init.ts +93 -15
- package/src/commands/log.test.ts +10 -11
- package/src/commands/log.ts +31 -32
- package/src/commands/prime.ts +30 -5
- package/src/commands/sling.test.ts +33 -0
- package/src/commands/sling.ts +416 -358
- package/src/commands/spec.ts +8 -2
- package/src/commands/stop.test.ts +127 -6
- package/src/commands/stop.ts +95 -43
- package/src/commands/supervisor.ts +2 -0
- package/src/commands/watch.ts +29 -9
- package/src/config.test.ts +72 -0
- package/src/config.ts +26 -1
- package/src/index.ts +4 -1
- package/src/merge/resolver.test.ts +383 -25
- package/src/merge/resolver.ts +291 -98
- package/src/runtimes/claude.test.ts +32 -7
- package/src/runtimes/claude.ts +19 -4
- package/src/runtimes/codex.test.ts +13 -0
- package/src/runtimes/codex.ts +18 -2
- package/src/runtimes/copilot.ts +3 -0
- package/src/runtimes/cursor.test.ts +497 -0
- package/src/runtimes/cursor.ts +205 -0
- package/src/runtimes/gemini.ts +3 -0
- package/src/runtimes/opencode.ts +3 -0
- package/src/runtimes/pi.test.ts +119 -2
- package/src/runtimes/pi.ts +64 -12
- package/src/runtimes/registry.test.ts +21 -1
- package/src/runtimes/registry.ts +3 -0
- package/src/runtimes/sapling.ts +3 -0
- package/src/runtimes/types.ts +5 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.test.ts +178 -0
- package/src/sessions/store.ts +44 -8
- package/src/types.ts +25 -1
- package/src/watchdog/daemon.test.ts +257 -0
- package/src/watchdog/daemon.ts +66 -23
- package/src/worktree/manager.test.ts +65 -1
- package/src/worktree/manager.ts +36 -0
- package/src/worktree/tmux.test.ts +150 -0
- package/src/worktree/tmux.ts +126 -23
package/README.md
CHANGED
|
@@ -19,6 +19,7 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
|
|
|
19
19
|
- [GitHub Copilot](https://github.com/features/copilot) (`copilot` CLI)
|
|
20
20
|
- [Codex](https://github.com/openai/codex) (`codex` CLI)
|
|
21
21
|
- [Gemini CLI](https://github.com/google-gemini/gemini-cli) (`gemini` CLI)
|
|
22
|
+
- [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
|
|
22
23
|
- [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
|
|
23
24
|
- [OpenCode](https://opencode.ai) (`opencode` CLI)
|
|
24
25
|
|
|
@@ -99,6 +100,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
99
100
|
| `ov coordinator send` | Fire-and-forget message to coordinator (`--subject`) |
|
|
100
101
|
| `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
|
|
101
102
|
| `ov coordinator output` | Show recent coordinator output (`--lines`) |
|
|
103
|
+
| `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
|
|
102
104
|
| `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
|
|
103
105
|
| `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
|
|
104
106
|
| `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
|
|
@@ -176,14 +178,16 @@ Overstory uses instruction overlays and tool-call guards to turn agent sessions
|
|
|
176
178
|
|
|
177
179
|
Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types.ts`) defines the contract — each adapter handles spawning, config deployment, guard enforcement, readiness detection, and transcript parsing for its runtime. Set the default in `config.yaml` or override per-agent with `ov sling --runtime <name>`.
|
|
178
180
|
|
|
179
|
-
| Runtime | CLI | Guard Mechanism |
|
|
180
|
-
|
|
181
|
+
| Runtime | CLI | Guard Mechanism | Stability |
|
|
182
|
+
|---------|-----|-----------------|-----------|
|
|
181
183
|
| Claude Code | `claude` | `settings.local.json` hooks | Stable |
|
|
182
|
-
|
|
|
183
|
-
|
|
|
184
|
-
|
|
|
185
|
-
|
|
|
186
|
-
|
|
|
184
|
+
| Sapling | `sp` | `.sapling/guards.json` | Stable |
|
|
185
|
+
| Pi | `pi` | `.pi/extensions/` guard extension | Experimental |
|
|
186
|
+
| Copilot | `copilot` | (none — `--allow-all-tools`) | Experimental |
|
|
187
|
+
| Cursor | `agent` | (none — `--yolo`) | Experimental |
|
|
188
|
+
| Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
|
|
189
|
+
| Gemini | `gemini` | `--sandbox` flag | Experimental |
|
|
190
|
+
| OpenCode | `opencode` | (none) | Experimental |
|
|
187
191
|
|
|
188
192
|
## How It Works
|
|
189
193
|
|
|
@@ -232,7 +236,7 @@ overstory/
|
|
|
232
236
|
config.ts Config loader + validation
|
|
233
237
|
errors.ts Custom error types
|
|
234
238
|
json.ts Standardized JSON envelope helpers
|
|
235
|
-
commands/ One file per CLI subcommand (
|
|
239
|
+
commands/ One file per CLI subcommand (35 commands)
|
|
236
240
|
agents.ts Agent discovery and querying
|
|
237
241
|
coordinator.ts Persistent orchestrator lifecycle
|
|
238
242
|
supervisor.ts Team lead management [DEPRECATED]
|
|
@@ -283,7 +287,7 @@ overstory/
|
|
|
283
287
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
284
288
|
doctor/ Health check modules (11 checks)
|
|
285
289
|
insights/ Session insight analyzer for auto-expertise
|
|
286
|
-
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode)
|
|
290
|
+
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
|
|
287
291
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
288
292
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
|
289
293
|
e2e/ End-to-end lifecycle tests
|
package/agents/coordinator.md
CHANGED
|
@@ -22,6 +22,7 @@ These are named failures. If you catch yourself doing any of these, stop and cor
|
|
|
22
22
|
- **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
|
|
23
23
|
- **OVERLAPPING_FILE_AREAS** -- Assigning overlapping file areas to multiple leads. Check existing agent file scopes via `ov status` before dispatching.
|
|
24
24
|
- **PREMATURE_MERGE** -- Merging a branch before the lead signals `merge_ready`. Always wait for the lead's explicit `merge_ready` mail. Watchdog completion nudges (e.g. "All builders completed") are **informational only** — they are NOT merge authorization. Only a typed `merge_ready` mail from the owning lead authorizes a merge.
|
|
25
|
+
- **PREMATURE_ISSUE_CLOSE** -- Closing a seeds issue before the lead has sent `merge_ready` AND the branch has been successfully merged. Builder completion alone does NOT authorize issue closure. The required sequence is strictly: lead sends `merge_ready` → coordinator merges branch → merge succeeds → then close the issue. Closing based on builder `worker_done` signals, group auto-close, or `ov status` showing agents completed is a bug. Always verify the merge step is complete first.
|
|
25
26
|
- **SILENT_ESCALATION_DROP** -- Receiving an escalation mail and not acting on it. Every escalation must be routed according to its severity.
|
|
26
27
|
- **ORPHANED_AGENTS** -- Dispatching leads and losing track of them. Every dispatched lead must be in a task group.
|
|
27
28
|
- **SCOPE_EXPLOSION** -- Decomposing into too many leads. Target 2-5 leads per batch. Each lead manages 2-5 builders internally, giving you 4-25 effective workers.
|
|
@@ -226,6 +227,12 @@ Coordinator (you, depth 0)
|
|
|
226
227
|
ov merge --branch <lead-branch> # then merge
|
|
227
228
|
```
|
|
228
229
|
**Do NOT merge based on watchdog nudges, `ov status` showing "completed" builders, or your own git inspection.** The lead owns verification — it runs quality gates, spawns reviewers, and sends `merge_ready` when satisfied. Wait for that mail.
|
|
230
|
+
|
|
231
|
+
After a successful merge, close the corresponding issue:
|
|
232
|
+
```bash
|
|
233
|
+
{{TRACKER_CLI}} close <task-id> --reason "Merged branch <lead-branch>"
|
|
234
|
+
```
|
|
235
|
+
**Do NOT close issues before their branches are merged.** Issue closure is the final step after merge confirmation, never before.
|
|
229
236
|
10. **Close the batch** when the group auto-completes or all issues are resolved:
|
|
230
237
|
- Verify all issues are closed: `{{TRACKER_CLI}} show <id>` for each.
|
|
231
238
|
- Clean up worktrees: `ov worktree clean --completed`.
|
|
@@ -281,14 +288,55 @@ Report to the human operator immediately. Critical escalations mean the automate
|
|
|
281
288
|
|
|
282
289
|
When a batch is complete (task group auto-closed, all issues resolved):
|
|
283
290
|
|
|
291
|
+
**CRITICAL: Never close an issue until its branch is merged.** The correct close sequence is:
|
|
292
|
+
1. Receive `merge_ready` from lead.
|
|
293
|
+
2. Run `ov merge --branch <branch> --dry-run` (check first), then `ov merge --branch <branch>`.
|
|
294
|
+
3. Verify merge succeeded (no error output, `merged` mail received or `ov status` confirms).
|
|
295
|
+
4. **Only then** close the issue: `{{TRACKER_CLI}} close <id> --reason "Merged branch <branch-name>"`.
|
|
296
|
+
|
|
284
297
|
1. Verify all issues are closed: run `{{TRACKER_CLI}} show <id>` for each issue in the group.
|
|
285
|
-
2. Verify all branches are merged: check `ov status` for unmerged branches.
|
|
298
|
+
2. Verify all branches are merged: check `ov status` for unmerged branches. If any branch is unmerged, do NOT proceed — wait for the lead's `merge_ready` signal.
|
|
286
299
|
3. Clean up worktrees: `ov worktree clean --completed`.
|
|
287
300
|
4. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
|
|
288
|
-
5.
|
|
289
|
-
|
|
301
|
+
5. Commit and sync state files: after all work is merged and issues are closed, commit any outstanding state changes so runtime state is not left uncommitted when the coordinator goes idle:
|
|
302
|
+
```bash
|
|
303
|
+
{{TRACKER_CLI}} sync
|
|
304
|
+
git add .overstory/ .mulch/
|
|
305
|
+
git diff --cached --quiet || git commit -m "chore: sync runtime state"
|
|
306
|
+
git push
|
|
307
|
+
```
|
|
308
|
+
6. Report to the human operator: summarize what was accomplished, what was merged, any issues encountered.
|
|
309
|
+
7. Check for follow-up work: `{{TRACKER_CLI}} ready` to see if new issues surfaced during the batch.
|
|
310
|
+
|
|
311
|
+
After processing each batch of mail and dispatching work, evaluate whether your exit conditions are met:
|
|
312
|
+
|
|
313
|
+
```bash
|
|
314
|
+
ov coordinator check-complete --json
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
The command evaluates configured `coordinator.exitTriggers` from config.yaml:
|
|
318
|
+
- **allAgentsDone**: all spawned agents in the current run have completed and branches merged
|
|
319
|
+
- **taskTrackerEmpty**: `{{TRACKER_CLI}} ready` returns no unblocked work
|
|
320
|
+
- **onShutdownSignal**: a shutdown message was received via mail
|
|
321
|
+
|
|
322
|
+
When ALL enabled triggers are met (`complete: true` in the JSON output):
|
|
323
|
+
|
|
324
|
+
1. Commit and sync state files so runtime state is not left uncommitted:
|
|
325
|
+
```bash
|
|
326
|
+
{{TRACKER_CLI}} sync
|
|
327
|
+
git add .overstory/ .mulch/
|
|
328
|
+
git diff --cached --quiet || git commit -m "chore: sync runtime state"
|
|
329
|
+
git push
|
|
330
|
+
```
|
|
331
|
+
2. Run `ov run complete` to mark the current run as finished.
|
|
332
|
+
3. Send a final status mail to the operator:
|
|
333
|
+
```bash
|
|
334
|
+
ov mail send --to operator --subject "Run complete" \
|
|
335
|
+
--body "All exit triggers met. Run completed." --type status
|
|
336
|
+
```
|
|
337
|
+
4. Stop processing. Do not spawn additional agents or process further mail.
|
|
290
338
|
|
|
291
|
-
|
|
339
|
+
If no exit triggers are configured (all false), the coordinator runs indefinitely until manually stopped. This is the default behavior for backward compatibility.
|
|
292
340
|
|
|
293
341
|
## persistence-and-context-recovery
|
|
294
342
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.7",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
buildBashFileGuardScript,
|
|
9
9
|
buildBashPathBoundaryScript,
|
|
10
10
|
buildPathBoundaryGuardScript,
|
|
11
|
+
buildTrackerCloseGuardScript,
|
|
11
12
|
deployHooks,
|
|
12
13
|
escapeForSingleQuotedShell,
|
|
13
14
|
extractQualityGatePrefixes,
|
|
@@ -15,6 +16,7 @@ import {
|
|
|
15
16
|
getCapabilityGuards,
|
|
16
17
|
getDangerGuards,
|
|
17
18
|
getPathBoundaryGuards,
|
|
19
|
+
getTrackerCloseGuards,
|
|
18
20
|
isOverstoryHookEntry,
|
|
19
21
|
PATH_PREFIX,
|
|
20
22
|
} from "./hooks-deployer.ts";
|
|
@@ -468,9 +470,9 @@ describe("deployHooks", () => {
|
|
|
468
470
|
expect(writeBlockGuard).toBeDefined();
|
|
469
471
|
expect(writeBlockGuard.hooks[0].command).toContain('"decision":"block"');
|
|
470
472
|
|
|
471
|
-
// Should have multiple Bash guards: danger guard + file guard + universal push guard
|
|
473
|
+
// Should have multiple Bash guards: danger guard + file guard + tracker close guard + universal push guard
|
|
472
474
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
473
|
-
expect(bashGuards.length).toBe(
|
|
475
|
+
expect(bashGuards.length).toBe(4); // danger guard + file guard + tracker close guard + universal push guard
|
|
474
476
|
});
|
|
475
477
|
|
|
476
478
|
test("reviewer capability adds same guards as scout", async () => {
|
|
@@ -512,9 +514,9 @@ describe("deployHooks", () => {
|
|
|
512
514
|
expect(guardMatchers).toContain("NotebookEdit");
|
|
513
515
|
expect(guardMatchers).toContain("Bash");
|
|
514
516
|
|
|
515
|
-
// Should have
|
|
517
|
+
// Should have 4 Bash guards: danger guard + file guard + tracker close guard + universal push guard
|
|
516
518
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
517
|
-
expect(bashGuards.length).toBe(
|
|
519
|
+
expect(bashGuards.length).toBe(4);
|
|
518
520
|
});
|
|
519
521
|
|
|
520
522
|
test("builder capability gets path boundary + Bash danger + Bash path boundary guards + native team tool blocks", async () => {
|
|
@@ -544,9 +546,9 @@ describe("deployHooks", () => {
|
|
|
544
546
|
expect(writeGuards[0].hooks[0].command).toContain("OVERSTORY_WORKTREE_PATH");
|
|
545
547
|
expect(writeGuards[0].hooks[0].command).not.toContain("cannot modify files");
|
|
546
548
|
|
|
547
|
-
// Builder should have
|
|
549
|
+
// Builder should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
|
|
548
550
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
549
|
-
expect(bashGuards.length).toBe(
|
|
551
|
+
expect(bashGuards.length).toBe(4);
|
|
550
552
|
// One should be the danger guard (checks git push)
|
|
551
553
|
const dangerGuard = bashGuards.find(
|
|
552
554
|
(h: { hooks: Array<{ command: string }> }) =>
|
|
@@ -1607,7 +1609,7 @@ describe("structural enforcement integration", () => {
|
|
|
1607
1609
|
|
|
1608
1610
|
// Find the bash file guard (the second Bash entry, after the danger guard)
|
|
1609
1611
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
1610
|
-
expect(bashGuards.length).toBe(
|
|
1612
|
+
expect(bashGuards.length).toBe(4);
|
|
1611
1613
|
|
|
1612
1614
|
// The file guard (second Bash guard) should whitelist git add/commit
|
|
1613
1615
|
const fileGuard = bashGuards[1];
|
|
@@ -2070,8 +2072,8 @@ describe("bash path boundary integration", () => {
|
|
|
2070
2072
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2071
2073
|
|
|
2072
2074
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2073
|
-
// Should have
|
|
2074
|
-
expect(bashGuards.length).toBe(
|
|
2075
|
+
// Should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
|
|
2076
|
+
expect(bashGuards.length).toBe(4);
|
|
2075
2077
|
|
|
2076
2078
|
// Find the path boundary guard
|
|
2077
2079
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
@@ -2092,7 +2094,7 @@ describe("bash path boundary integration", () => {
|
|
|
2092
2094
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2093
2095
|
|
|
2094
2096
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2095
|
-
expect(bashGuards.length).toBe(
|
|
2097
|
+
expect(bashGuards.length).toBe(4);
|
|
2096
2098
|
|
|
2097
2099
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
2098
2100
|
h.hooks[0]?.command?.includes("Bash path boundary violation"),
|
|
@@ -2110,9 +2112,9 @@ describe("bash path boundary integration", () => {
|
|
|
2110
2112
|
const parsed = JSON.parse(content);
|
|
2111
2113
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2112
2114
|
|
|
2113
|
-
// Scout gets danger guard + file guard + universal push guard (
|
|
2115
|
+
// Scout gets danger guard + file guard + tracker close guard + universal push guard (4 Bash guards), but NOT path boundary
|
|
2114
2116
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2115
|
-
expect(bashGuards.length).toBe(
|
|
2117
|
+
expect(bashGuards.length).toBe(4);
|
|
2116
2118
|
|
|
2117
2119
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
2118
2120
|
h.hooks[0]?.command?.includes("Bash path boundary violation"),
|
|
@@ -2401,6 +2403,177 @@ describe("PATH prefix in deployed hooks", () => {
|
|
|
2401
2403
|
});
|
|
2402
2404
|
});
|
|
2403
2405
|
|
|
2406
|
+
describe("buildTrackerCloseGuardScript", () => {
|
|
2407
|
+
test("returns a string containing key patterns", () => {
|
|
2408
|
+
const script = buildTrackerCloseGuardScript();
|
|
2409
|
+
expect(typeof script).toBe("string");
|
|
2410
|
+
expect(script.length).toBeGreaterThan(0);
|
|
2411
|
+
expect(script).toContain("sd");
|
|
2412
|
+
expect(script).toContain("bd");
|
|
2413
|
+
expect(script).toContain("close");
|
|
2414
|
+
expect(script).toContain("update");
|
|
2415
|
+
});
|
|
2416
|
+
|
|
2417
|
+
test("contains ENV_GUARD prefix", () => {
|
|
2418
|
+
const script = buildTrackerCloseGuardScript();
|
|
2419
|
+
expect(script).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
|
|
2420
|
+
});
|
|
2421
|
+
|
|
2422
|
+
test("contains OVERSTORY_TASK_ID early-exit check", () => {
|
|
2423
|
+
const script = buildTrackerCloseGuardScript();
|
|
2424
|
+
expect(script).toContain('[ -z "$OVERSTORY_TASK_ID" ] && exit 0;');
|
|
2425
|
+
});
|
|
2426
|
+
|
|
2427
|
+
test("blocks sd close with wrong ID", async () => {
|
|
2428
|
+
const script = buildTrackerCloseGuardScript();
|
|
2429
|
+
const input = JSON.stringify({ command: "sd close other-task" });
|
|
2430
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2431
|
+
stdin: new TextEncoder().encode(input),
|
|
2432
|
+
stdout: "pipe",
|
|
2433
|
+
stderr: "pipe",
|
|
2434
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2435
|
+
});
|
|
2436
|
+
const output = await new Response(proc.stdout).text();
|
|
2437
|
+
await proc.exited;
|
|
2438
|
+
const parsed = JSON.parse(output.trim());
|
|
2439
|
+
expect(parsed.decision).toBe("block");
|
|
2440
|
+
expect(parsed.reason).toContain("other-task");
|
|
2441
|
+
expect(parsed.reason).toContain("my-task");
|
|
2442
|
+
});
|
|
2443
|
+
|
|
2444
|
+
test("allows sd close with matching ID", async () => {
|
|
2445
|
+
const script = buildTrackerCloseGuardScript();
|
|
2446
|
+
const input = JSON.stringify({ command: "sd close my-task" });
|
|
2447
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2448
|
+
stdin: new TextEncoder().encode(input),
|
|
2449
|
+
stdout: "pipe",
|
|
2450
|
+
stderr: "pipe",
|
|
2451
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2452
|
+
});
|
|
2453
|
+
const output = await new Response(proc.stdout).text();
|
|
2454
|
+
await proc.exited;
|
|
2455
|
+
expect(output.trim()).toBe("");
|
|
2456
|
+
});
|
|
2457
|
+
|
|
2458
|
+
test("blocks bd close with wrong ID", async () => {
|
|
2459
|
+
const script = buildTrackerCloseGuardScript();
|
|
2460
|
+
const input = JSON.stringify({ command: "bd close other-task" });
|
|
2461
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2462
|
+
stdin: new TextEncoder().encode(input),
|
|
2463
|
+
stdout: "pipe",
|
|
2464
|
+
stderr: "pipe",
|
|
2465
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2466
|
+
});
|
|
2467
|
+
const output = await new Response(proc.stdout).text();
|
|
2468
|
+
await proc.exited;
|
|
2469
|
+
const parsed = JSON.parse(output.trim());
|
|
2470
|
+
expect(parsed.decision).toBe("block");
|
|
2471
|
+
expect(parsed.reason).toContain("other-task");
|
|
2472
|
+
});
|
|
2473
|
+
|
|
2474
|
+
test("blocks sd update --status with wrong ID", async () => {
|
|
2475
|
+
const script = buildTrackerCloseGuardScript();
|
|
2476
|
+
const input = JSON.stringify({ command: "sd update other-task --status in_progress" });
|
|
2477
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2478
|
+
stdin: new TextEncoder().encode(input),
|
|
2479
|
+
stdout: "pipe",
|
|
2480
|
+
stderr: "pipe",
|
|
2481
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2482
|
+
});
|
|
2483
|
+
const output = await new Response(proc.stdout).text();
|
|
2484
|
+
await proc.exited;
|
|
2485
|
+
const parsed = JSON.parse(output.trim());
|
|
2486
|
+
expect(parsed.decision).toBe("block");
|
|
2487
|
+
expect(parsed.reason).toContain("other-task");
|
|
2488
|
+
});
|
|
2489
|
+
|
|
2490
|
+
test("exits early when OVERSTORY_TASK_ID is empty (coordinator/monitor)", async () => {
|
|
2491
|
+
const script = buildTrackerCloseGuardScript();
|
|
2492
|
+
const input = JSON.stringify({ command: "sd close coordinator-task" });
|
|
2493
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2494
|
+
stdin: new TextEncoder().encode(input),
|
|
2495
|
+
stdout: "pipe",
|
|
2496
|
+
stderr: "pipe",
|
|
2497
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "coordinator", OVERSTORY_TASK_ID: "" },
|
|
2498
|
+
});
|
|
2499
|
+
const output = await new Response(proc.stdout).text();
|
|
2500
|
+
await proc.exited;
|
|
2501
|
+
expect(output.trim()).toBe("");
|
|
2502
|
+
});
|
|
2503
|
+
});
|
|
2504
|
+
|
|
2505
|
+
describe("getTrackerCloseGuards", () => {
|
|
2506
|
+
test("returns exactly 1 Bash guard entry", () => {
|
|
2507
|
+
const guards = getTrackerCloseGuards();
|
|
2508
|
+
expect(guards).toHaveLength(1);
|
|
2509
|
+
expect(guards[0]?.matcher).toBe("Bash");
|
|
2510
|
+
});
|
|
2511
|
+
|
|
2512
|
+
test("guard hook type is command", () => {
|
|
2513
|
+
const guards = getTrackerCloseGuards();
|
|
2514
|
+
expect(guards[0]?.hooks[0]?.type).toBe("command");
|
|
2515
|
+
});
|
|
2516
|
+
|
|
2517
|
+
test("guard command contains OVERSTORY_TASK_ID check", () => {
|
|
2518
|
+
const guards = getTrackerCloseGuards();
|
|
2519
|
+
const command = guards[0]?.hooks[0]?.command ?? "";
|
|
2520
|
+
expect(command).toContain("OVERSTORY_TASK_ID");
|
|
2521
|
+
});
|
|
2522
|
+
|
|
2523
|
+
test("guard command includes ENV_GUARD prefix", () => {
|
|
2524
|
+
const guards = getTrackerCloseGuards();
|
|
2525
|
+
const command = guards[0]?.hooks[0]?.command ?? "";
|
|
2526
|
+
expect(command).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
|
|
2527
|
+
});
|
|
2528
|
+
});
|
|
2529
|
+
|
|
2530
|
+
describe("deployHooks tracker close guard integration", () => {
|
|
2531
|
+
let tempDir: string;
|
|
2532
|
+
|
|
2533
|
+
beforeEach(async () => {
|
|
2534
|
+
tempDir = await mkdtemp(join(tmpdir(), "overstory-tracker-close-test-"));
|
|
2535
|
+
});
|
|
2536
|
+
|
|
2537
|
+
afterEach(async () => {
|
|
2538
|
+
await cleanupTempDir(tempDir);
|
|
2539
|
+
});
|
|
2540
|
+
|
|
2541
|
+
test("deployHooks includes tracker close guard in PreToolUse for builder", async () => {
|
|
2542
|
+
const worktreePath = join(tempDir, "builder-tc-wt");
|
|
2543
|
+
await deployHooks(worktreePath, "builder-tc", "builder");
|
|
2544
|
+
|
|
2545
|
+
const content = await Bun.file(join(worktreePath, ".claude", "settings.local.json")).text();
|
|
2546
|
+
const parsed = JSON.parse(content);
|
|
2547
|
+
const preToolUse = parsed.hooks.PreToolUse;
|
|
2548
|
+
|
|
2549
|
+
const trackerGuard = preToolUse.find(
|
|
2550
|
+
(h: { matcher: string; hooks: Array<{ command: string }> }) =>
|
|
2551
|
+
h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
|
|
2552
|
+
);
|
|
2553
|
+
expect(trackerGuard).toBeDefined();
|
|
2554
|
+
expect(trackerGuard.hooks[0].command).toContain("OVERSTORY_TASK_ID");
|
|
2555
|
+
});
|
|
2556
|
+
|
|
2557
|
+
test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
|
|
2558
|
+
const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
|
|
2559
|
+
|
|
2560
|
+
for (const cap of capabilities) {
|
|
2561
|
+
const wt = join(tempDir, `${cap}-tc-wt`);
|
|
2562
|
+
await deployHooks(wt, `${cap}-tc`, cap);
|
|
2563
|
+
|
|
2564
|
+
const content = await Bun.file(join(wt, ".claude", "settings.local.json")).text();
|
|
2565
|
+
const parsed = JSON.parse(content);
|
|
2566
|
+
const preToolUse = parsed.hooks.PreToolUse;
|
|
2567
|
+
|
|
2568
|
+
const trackerGuard = preToolUse.find(
|
|
2569
|
+
(h: { matcher: string; hooks: Array<{ command: string }> }) =>
|
|
2570
|
+
h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
|
|
2571
|
+
);
|
|
2572
|
+
expect(trackerGuard).toBeDefined();
|
|
2573
|
+
}
|
|
2574
|
+
});
|
|
2575
|
+
});
|
|
2576
|
+
|
|
2404
2577
|
describe("escapeForSingleQuotedShell", () => {
|
|
2405
2578
|
test("no single quotes: string passes through unchanged", () => {
|
|
2406
2579
|
expect(escapeForSingleQuotedShell("hello world")).toBe("hello world");
|
|
@@ -283,6 +283,61 @@ export function buildBashFileGuardScript(
|
|
|
283
283
|
return script;
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
+
/**
|
|
287
|
+
* Build a PreToolUse guard script that prevents agents from closing or updating
|
|
288
|
+
* issues they don't own.
|
|
289
|
+
*
|
|
290
|
+
* Guards against two patterns:
|
|
291
|
+
* - `sd/bd close <id>` — blocks if <id> != $OVERSTORY_TASK_ID
|
|
292
|
+
* - `sd/bd update <id> --status` — blocks if <id> != $OVERSTORY_TASK_ID
|
|
293
|
+
*
|
|
294
|
+
* Agents without OVERSTORY_TASK_ID (coordinator, monitor) exit early and are unaffected.
|
|
295
|
+
*/
|
|
296
|
+
export function buildTrackerCloseGuardScript(): string {
|
|
297
|
+
const script = [
|
|
298
|
+
// Only enforce for overstory agent sessions
|
|
299
|
+
ENV_GUARD,
|
|
300
|
+
// Skip if task ID is not set (coordinator/monitor have no task)
|
|
301
|
+
'[ -z "$OVERSTORY_TASK_ID" ] && exit 0;',
|
|
302
|
+
"read -r INPUT;",
|
|
303
|
+
// Extract command value from JSON
|
|
304
|
+
'CMD=$(echo "$INPUT" | sed \'s/.*"command": *"\\([^"]*\\)".*/\\1/\');',
|
|
305
|
+
// Check for sd/bd close <id>
|
|
306
|
+
"if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+close\\s'; then",
|
|
307
|
+
" ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+close[[:space:]]+([^ ]+).*/\\2/');",
|
|
308
|
+
' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
|
|
309
|
+
' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot close issue $ISSUE_ID — agents may only close their own task ($OVERSTORY_TASK_ID). Report completion via worker_done mail to your parent instead.\\"}";',
|
|
310
|
+
" exit 0;",
|
|
311
|
+
" fi;",
|
|
312
|
+
"fi;",
|
|
313
|
+
// Check for sd/bd update <id> --status
|
|
314
|
+
"if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+update\\s.*--status'; then",
|
|
315
|
+
" ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+update[[:space:]]+([^ ]+).*/\\2/');",
|
|
316
|
+
' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
|
|
317
|
+
' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot update issue $ISSUE_ID — agents may only update their own task ($OVERSTORY_TASK_ID).\\"}";',
|
|
318
|
+
" exit 0;",
|
|
319
|
+
" fi;",
|
|
320
|
+
"fi;",
|
|
321
|
+
].join(" ");
|
|
322
|
+
return script;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Generate a PreToolUse guard that blocks tracker close/update for foreign issues.
|
|
327
|
+
*
|
|
328
|
+
* Returns a single Bash matcher entry. Applied to ALL agent capabilities
|
|
329
|
+
* so that no agent can accidentally close the coordinator's dispatch issue.
|
|
330
|
+
* Agents without OVERSTORY_TASK_ID (coordinator, monitor) are unaffected.
|
|
331
|
+
*/
|
|
332
|
+
export function getTrackerCloseGuards(): HookEntry[] {
|
|
333
|
+
return [
|
|
334
|
+
{
|
|
335
|
+
matcher: "Bash",
|
|
336
|
+
hooks: [{ type: "command", command: buildTrackerCloseGuardScript() }],
|
|
337
|
+
},
|
|
338
|
+
];
|
|
339
|
+
}
|
|
340
|
+
|
|
286
341
|
/**
|
|
287
342
|
* Capabilities that are allowed to modify files via Bash commands.
|
|
288
343
|
* These get the Bash path boundary guard instead of a blanket file-modification block.
|
|
@@ -539,7 +594,8 @@ export async function deployHooks(
|
|
|
539
594
|
const pathGuards = getPathBoundaryGuards();
|
|
540
595
|
const dangerGuards = getDangerGuards(agentName);
|
|
541
596
|
const capabilityGuards = getCapabilityGuards(capability, qualityGates);
|
|
542
|
-
const
|
|
597
|
+
const trackerCloseGuards = getTrackerCloseGuards();
|
|
598
|
+
const allGuards = [...pathGuards, ...dangerGuards, ...capabilityGuards, ...trackerCloseGuards];
|
|
543
599
|
|
|
544
600
|
if (allGuards.length > 0) {
|
|
545
601
|
const preToolUse = config.hooks.PreToolUse ?? [];
|
|
@@ -77,6 +77,12 @@ describe("validation", () => {
|
|
|
77
77
|
test("no flags throws ValidationError", async () => {
|
|
78
78
|
await expect(cleanCommand({})).rejects.toThrow("No cleanup targets specified");
|
|
79
79
|
});
|
|
80
|
+
|
|
81
|
+
test("--agent and --all throws ValidationError", async () => {
|
|
82
|
+
await expect(cleanCommand({ agent: "my-builder", all: true })).rejects.toThrow(
|
|
83
|
+
"--agent and --all are mutually exclusive",
|
|
84
|
+
);
|
|
85
|
+
});
|
|
80
86
|
});
|
|
81
87
|
|
|
82
88
|
// === --all ===
|
|
@@ -656,3 +662,133 @@ describe("mulch health checks", () => {
|
|
|
656
662
|
expect(stdoutOutput).toBeDefined();
|
|
657
663
|
});
|
|
658
664
|
});
|
|
665
|
+
|
|
666
|
+
// === --agent ===
|
|
667
|
+
|
|
668
|
+
describe("--agent", () => {
|
|
669
|
+
function makeSession(overrides: Partial<AgentSession> = {}): AgentSession {
|
|
670
|
+
return {
|
|
671
|
+
id: "s1",
|
|
672
|
+
agentName: "test-builder",
|
|
673
|
+
capability: "builder",
|
|
674
|
+
worktreePath: join(tempDir, ".overstory", "worktrees", "test-builder"),
|
|
675
|
+
branchName: "overstory/test-builder/task-1",
|
|
676
|
+
taskId: "task-1",
|
|
677
|
+
tmuxSession: "overstory-test-project-test-builder",
|
|
678
|
+
state: "working",
|
|
679
|
+
pid: 99999,
|
|
680
|
+
parentAgent: null,
|
|
681
|
+
depth: 1,
|
|
682
|
+
runId: "run-123",
|
|
683
|
+
startedAt: new Date().toISOString(),
|
|
684
|
+
lastActivity: new Date().toISOString(),
|
|
685
|
+
escalationLevel: 0,
|
|
686
|
+
stalledSince: null,
|
|
687
|
+
transcriptPath: null,
|
|
688
|
+
...overrides,
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function saveSession(session: AgentSession): void {
|
|
693
|
+
const { store } = openSessionStore(overstoryDir);
|
|
694
|
+
try {
|
|
695
|
+
store.upsert(session);
|
|
696
|
+
} finally {
|
|
697
|
+
store.close();
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
test("throws AgentError when agent not found", async () => {
|
|
702
|
+
await expect(cleanCommand({ agent: "nonexistent" })).rejects.toThrow("not found");
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
test("clears agent and logs directories", async () => {
|
|
706
|
+
const session = makeSession();
|
|
707
|
+
saveSession(session);
|
|
708
|
+
|
|
709
|
+
// Create agent and logs dirs with content
|
|
710
|
+
const agentDir = join(overstoryDir, "agents", "test-builder");
|
|
711
|
+
const logsDir = join(overstoryDir, "logs", "test-builder");
|
|
712
|
+
await mkdir(agentDir, { recursive: true });
|
|
713
|
+
await mkdir(logsDir, { recursive: true });
|
|
714
|
+
await writeFile(join(agentDir, "identity.yaml"), "name: test-builder");
|
|
715
|
+
await writeFile(join(logsDir, "session.log"), "log data");
|
|
716
|
+
|
|
717
|
+
await cleanCommand({ agent: "test-builder" });
|
|
718
|
+
|
|
719
|
+
// Dirs should be cleared (but still exist)
|
|
720
|
+
const agentEntries = await readdir(agentDir);
|
|
721
|
+
const logEntries = await readdir(logsDir);
|
|
722
|
+
expect(agentEntries).toHaveLength(0);
|
|
723
|
+
expect(logEntries).toHaveLength(0);
|
|
724
|
+
|
|
725
|
+
expect(stdoutOutput).toContain("Agent cleaned");
|
|
726
|
+
expect(stdoutOutput).toContain("test-builder");
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
test("marks agent session as completed", async () => {
|
|
730
|
+
const session = makeSession({ state: "working" });
|
|
731
|
+
saveSession(session);
|
|
732
|
+
|
|
733
|
+
await cleanCommand({ agent: "test-builder" });
|
|
734
|
+
|
|
735
|
+
const { store } = openSessionStore(overstoryDir);
|
|
736
|
+
const updated = store.getByName("test-builder");
|
|
737
|
+
store.close();
|
|
738
|
+
expect(updated?.state).toBe("completed");
|
|
739
|
+
});
|
|
740
|
+
|
|
741
|
+
test("logs synthetic session-end event for non-completed agent", async () => {
|
|
742
|
+
const session = makeSession({ state: "working" });
|
|
743
|
+
saveSession(session);
|
|
744
|
+
|
|
745
|
+
await cleanCommand({ agent: "test-builder" });
|
|
746
|
+
|
|
747
|
+
const eventsDbPath = join(overstoryDir, "events.db");
|
|
748
|
+
const eventStore = createEventStore(eventsDbPath);
|
|
749
|
+
const events = eventStore.getByAgent("test-builder");
|
|
750
|
+
eventStore.close();
|
|
751
|
+
|
|
752
|
+
const sessionEndEvents = events.filter((e) => e.eventType === "session_end");
|
|
753
|
+
expect(sessionEndEvents).toHaveLength(1);
|
|
754
|
+
const data = JSON.parse(sessionEndEvents[0]?.data ?? "{}");
|
|
755
|
+
expect(data.reason).toContain("clean --agent");
|
|
756
|
+
});
|
|
757
|
+
|
|
758
|
+
test("does not log session-end event for already-completed agent", async () => {
|
|
759
|
+
const session = makeSession({ state: "completed" });
|
|
760
|
+
saveSession(session);
|
|
761
|
+
|
|
762
|
+
await cleanCommand({ agent: "test-builder" });
|
|
763
|
+
|
|
764
|
+
const eventsDbPath = join(overstoryDir, "events.db");
|
|
765
|
+
if (existsSync(eventsDbPath)) {
|
|
766
|
+
const eventStore = createEventStore(eventsDbPath);
|
|
767
|
+
const events = eventStore.getByAgent("test-builder");
|
|
768
|
+
eventStore.close();
|
|
769
|
+
const sessionEndEvents = events.filter((e) => e.eventType === "session_end");
|
|
770
|
+
expect(sessionEndEvents).toHaveLength(0);
|
|
771
|
+
}
|
|
772
|
+
});
|
|
773
|
+
|
|
774
|
+
test("--agent + --json returns JSON with agent result", async () => {
|
|
775
|
+
const session = makeSession({ state: "working" });
|
|
776
|
+
saveSession(session);
|
|
777
|
+
|
|
778
|
+
await cleanCommand({ agent: "test-builder", json: true });
|
|
779
|
+
|
|
780
|
+
const result = JSON.parse(stdoutOutput);
|
|
781
|
+
expect(result).toHaveProperty("agent");
|
|
782
|
+
expect(result.agent).toHaveProperty("agentName", "test-builder");
|
|
783
|
+
expect(result.agent).toHaveProperty("markedCompleted");
|
|
784
|
+
});
|
|
785
|
+
|
|
786
|
+
test("handles missing agent/logs directories gracefully", async () => {
|
|
787
|
+
const session = makeSession({ state: "completed" });
|
|
788
|
+
saveSession(session);
|
|
789
|
+
|
|
790
|
+
// No agent or logs dirs — should not error
|
|
791
|
+
await cleanCommand({ agent: "test-builder" });
|
|
792
|
+
expect(stdoutOutput).toContain("Agent cleaned");
|
|
793
|
+
});
|
|
794
|
+
});
|