@os-eco/overstory-cli 0.8.6 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -11
- package/agents/ov-co-creation.md +90 -0
- package/package.json +1 -1
- package/src/agents/hooks-deployer.test.ts +185 -12
- package/src/agents/hooks-deployer.ts +57 -1
- package/src/agents/overlay.ts +13 -0
- package/src/canopy/client.test.ts +107 -0
- package/src/canopy/client.ts +179 -0
- package/src/commands/coordinator.test.ts +74 -5
- package/src/commands/coordinator.ts +105 -25
- package/src/commands/dashboard.ts +85 -19
- package/src/commands/discover.test.ts +288 -0
- package/src/commands/discover.ts +202 -0
- package/src/commands/ecosystem.test.ts +101 -0
- package/src/commands/init.test.ts +76 -1
- package/src/commands/init.ts +36 -14
- package/src/commands/sling.test.ts +33 -0
- package/src/commands/sling.ts +126 -38
- package/src/commands/supervisor.ts +2 -0
- package/src/commands/update.test.ts +1 -0
- package/src/commands/watch.ts +9 -9
- package/src/e2e/init-sling-lifecycle.test.ts +2 -1
- package/src/index.ts +6 -1
- package/src/mail/store.ts +2 -1
- package/src/merge/resolver.test.ts +141 -7
- package/src/merge/resolver.ts +61 -8
- package/src/runtimes/claude.test.ts +32 -7
- package/src/runtimes/claude.ts +19 -4
- package/src/runtimes/codex.test.ts +13 -0
- package/src/runtimes/codex.ts +18 -2
- package/src/runtimes/copilot.ts +3 -0
- package/src/runtimes/cursor.test.ts +497 -0
- package/src/runtimes/cursor.ts +205 -0
- package/src/runtimes/gemini.ts +3 -0
- package/src/runtimes/opencode.ts +3 -0
- package/src/runtimes/pi.test.ts +1 -1
- package/src/runtimes/pi.ts +3 -0
- package/src/runtimes/registry.test.ts +21 -1
- package/src/runtimes/registry.ts +3 -0
- package/src/runtimes/sapling.ts +3 -0
- package/src/runtimes/types.ts +5 -0
- package/src/schema-consistency.test.ts +2 -0
- package/src/sessions/store.test.ts +178 -0
- package/src/sessions/store.ts +69 -12
- package/src/types.ts +69 -2
- package/src/watchdog/daemon.ts +41 -0
- package/src/worktree/tmux.test.ts +150 -0
- package/src/worktree/tmux.ts +126 -23
- package/templates/overlay.md.tmpl +2 -0
package/README.md
CHANGED
|
@@ -19,6 +19,7 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
|
|
|
19
19
|
- [GitHub Copilot](https://github.com/features/copilot) (`copilot` CLI)
|
|
20
20
|
- [Codex](https://github.com/openai/codex) (`codex` CLI)
|
|
21
21
|
- [Gemini CLI](https://github.com/google-gemini/gemini-cli) (`gemini` CLI)
|
|
22
|
+
- [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
|
|
22
23
|
- [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
|
|
23
24
|
- [OpenCode](https://opencode.ai) (`opencode` CLI)
|
|
24
25
|
|
|
@@ -83,17 +84,18 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
83
84
|
| Command | Description |
|
|
84
85
|
|---------|-------------|
|
|
85
86
|
| `ov init` | Initialize `.overstory/` and bootstrap os-eco tools (`--yes`, `--name`, `--tools`, `--skip-mulch`, `--skip-seeds`, `--skip-canopy`, `--skip-onboard`, `--json`) |
|
|
86
|
-
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--json`) |
|
|
87
|
+
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--profile`, `--json`) |
|
|
87
88
|
| `ov stop <agent-name>` | Terminate a running agent (`--clean-worktree`, `--json`) |
|
|
88
89
|
| `ov prime` | Load context for orchestrator/agent (`--agent`, `--compact`) |
|
|
89
90
|
| `ov spec write <task-id>` | Write a task specification (`--body`) |
|
|
91
|
+
| `ov discover` | Discover a brownfield codebase via coordinator-driven scout swarm (`--skip`, `--name`, `--attach`, `--watchdog`, `--json`) |
|
|
90
92
|
| `ov update` | Refresh `.overstory/` managed files from installed package (`--agents`, `--manifest`, `--hooks`, `--dry-run`, `--json`) |
|
|
91
93
|
|
|
92
94
|
### Coordination
|
|
93
95
|
|
|
94
96
|
| Command | Description |
|
|
95
97
|
|---------|-------------|
|
|
96
|
-
| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`) |
|
|
98
|
+
| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`, `--profile`) |
|
|
97
99
|
| `ov coordinator stop` | Stop coordinator |
|
|
98
100
|
| `ov coordinator status` | Show coordinator state |
|
|
99
101
|
| `ov coordinator send` | Fire-and-forget message to coordinator (`--subject`) |
|
|
@@ -177,14 +179,16 @@ Overstory uses instruction overlays and tool-call guards to turn agent sessions
|
|
|
177
179
|
|
|
178
180
|
Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types.ts`) defines the contract — each adapter handles spawning, config deployment, guard enforcement, readiness detection, and transcript parsing for its runtime. Set the default in `config.yaml` or override per-agent with `ov sling --runtime <name>`.
|
|
179
181
|
|
|
180
|
-
| Runtime | CLI | Guard Mechanism |
|
|
181
|
-
|
|
182
|
+
| Runtime | CLI | Guard Mechanism | Stability |
|
|
183
|
+
|---------|-----|-----------------|-----------|
|
|
182
184
|
| Claude Code | `claude` | `settings.local.json` hooks | Stable |
|
|
183
|
-
|
|
|
184
|
-
|
|
|
185
|
-
|
|
|
186
|
-
|
|
|
187
|
-
|
|
|
185
|
+
| Sapling | `sp` | `.sapling/guards.json` | Stable |
|
|
186
|
+
| Pi | `pi` | `.pi/extensions/` guard extension | Experimental |
|
|
187
|
+
| Copilot | `copilot` | (none — `--allow-all-tools`) | Experimental |
|
|
188
|
+
| Cursor | `agent` | (none — `--yolo`) | Experimental |
|
|
189
|
+
| Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
|
|
190
|
+
| Gemini | `gemini` | `--sandbox` flag | Experimental |
|
|
191
|
+
| OpenCode | `opencode` | (none) | Experimental |
|
|
188
192
|
|
|
189
193
|
## How It Works
|
|
190
194
|
|
|
@@ -233,7 +237,7 @@ overstory/
|
|
|
233
237
|
config.ts Config loader + validation
|
|
234
238
|
errors.ts Custom error types
|
|
235
239
|
json.ts Standardized JSON envelope helpers
|
|
236
|
-
commands/ One file per CLI subcommand (
|
|
240
|
+
commands/ One file per CLI subcommand (36 commands)
|
|
237
241
|
agents.ts Agent discovery and querying
|
|
238
242
|
coordinator.ts Persistent orchestrator lifecycle
|
|
239
243
|
supervisor.ts Team lead management [DEPRECATED]
|
|
@@ -267,7 +271,10 @@ overstory/
|
|
|
267
271
|
ecosystem.ts os-eco tool dashboard
|
|
268
272
|
update.ts Refresh managed files
|
|
269
273
|
upgrade.ts npm version upgrades
|
|
274
|
+
discover.ts Brownfield codebase discovery via coordinator-driven scout swarm
|
|
270
275
|
completions.ts Shell completion generation (bash/zsh/fish)
|
|
276
|
+
canopy/
|
|
277
|
+
client.ts Canopy client (prompt rendering, listing, emission)
|
|
271
278
|
agents/ Agent lifecycle management
|
|
272
279
|
manifest.ts Agent registry (load + query)
|
|
273
280
|
overlay.ts Dynamic CLAUDE.md overlay generator
|
|
@@ -284,7 +291,7 @@ overstory/
|
|
|
284
291
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
285
292
|
doctor/ Health check modules (11 checks)
|
|
286
293
|
insights/ Session insight analyzer for auto-expertise
|
|
287
|
-
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode)
|
|
294
|
+
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
|
|
288
295
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
289
296
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
|
290
297
|
e2e/ End-to-end lifecycle tests
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ov-co-creation
|
|
3
|
+
description: Co-creation workflow profile — human-in-the-loop at explicit decision gates
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
## propulsion-principle
|
|
7
|
+
|
|
8
|
+
Read your assignment. For implementation work within an approved plan, execute immediately — no confirmation needed for routine decisions (naming, file organization, test strategy, implementation details within spec).
|
|
9
|
+
|
|
10
|
+
PAUSE at decision gates. When you encounter an architectural choice, design fork, scope boundary, or tool selection, stop and do not proceed. Instead:
|
|
11
|
+
|
|
12
|
+
1. Write a structured decision document (context, options, tradeoffs, recommendation).
|
|
13
|
+
2. Send it as a decision_gate mail to the coordinator.
|
|
14
|
+
3. Wait for a response before proceeding past the gate.
|
|
15
|
+
|
|
16
|
+
Hesitation is the default at gates; action is the default within approved plans.
|
|
17
|
+
|
|
18
|
+
## escalation-policy
|
|
19
|
+
|
|
20
|
+
At decision points, present options rather than choosing. When you encounter a meaningful decision:
|
|
21
|
+
|
|
22
|
+
1. Write a structured decision document: context, 2+ options with tradeoffs, and your recommendation.
|
|
23
|
+
2. Send it as a decision_gate mail to the coordinator and wait.
|
|
24
|
+
3. Do not proceed until you receive a reply selecting an option.
|
|
25
|
+
|
|
26
|
+
Routine implementation decisions within an already-approved plan remain autonomous. Do not send decision gates for: variable names, file organization within spec, test strategy, or minor implementation choices that do not affect overall direction.
|
|
27
|
+
|
|
28
|
+
Escalate immediately (not as a decision gate) when you discover: risks that could cause data loss, security issues, or breaking changes beyond scope; blocked dependencies outside your control.
|
|
29
|
+
|
|
30
|
+
## artifact-expectations
|
|
31
|
+
|
|
32
|
+
Decision artifacts come before code. Deliverables in order:
|
|
33
|
+
|
|
34
|
+
1. **Option memos**: For any decision with multiple viable approaches, write a structured memo with options, tradeoffs, and a recommendation. Send as a decision_gate mail and await approval.
|
|
35
|
+
2. **ADRs (Architecture Decision Records)**: For architectural choices, create a lightweight ADR capturing context, decision, and consequences.
|
|
36
|
+
3. **Tradeoff matrices**: When comparing approaches across multiple dimensions, present a structured comparison.
|
|
37
|
+
4. **Code and tests**: Implementation proceeds after decision artifacts are approved. Code must be clean, follow project conventions, and include automated tests.
|
|
38
|
+
5. **Quality gates**: All lints, type checks, and tests must pass before reporting completion.
|
|
39
|
+
|
|
40
|
+
Do not write implementation code before decisions are resolved. The human reviews and approves decision documents; implementation follows approval.
|
|
41
|
+
|
|
42
|
+
## completion-criteria
|
|
43
|
+
|
|
44
|
+
Work is complete when all of the following are true:
|
|
45
|
+
|
|
46
|
+
- All quality gates pass: tests green, linting clean, type checking passes.
|
|
47
|
+
- Changes are committed to the appropriate branch.
|
|
48
|
+
- Any issues tracked in the task system are updated or closed.
|
|
49
|
+
- A completion signal has been sent to the appropriate recipient (parent agent, coordinator, or human).
|
|
50
|
+
|
|
51
|
+
Do not declare completion prematurely. Run the quality gates yourself — do not assume they pass. If a gate fails, fix the issue before reporting done.
|
|
52
|
+
|
|
53
|
+
## human-role
|
|
54
|
+
|
|
55
|
+
The human is an active co-creator at explicit decision gates — not a hands-off supervisor.
|
|
56
|
+
|
|
57
|
+
- **Active at gates.** The human reviews decision documents and selects options via mail reply. The agent waits for this input before proceeding.
|
|
58
|
+
- **Autonomous between gates.** Once a direction is approved, the agent executes without further check-ins. Implementation details within an approved plan are delegated.
|
|
59
|
+
- **Milestone reviews.** The human reviews work at defined checkpoints (planning, prototype, final). These are collaborative reviews with explicit proceed signals.
|
|
60
|
+
- **Minimal interruption between gates.** Do not ask questions that could be answered by reading the codebase or attempting something. Reserve interruptions for genuinely ambiguous requirements.
|
|
61
|
+
|
|
62
|
+
## decision-gates
|
|
63
|
+
|
|
64
|
+
When you reach a decision point (architectural choice, scope boundary, design fork, tool selection), follow this protocol:
|
|
65
|
+
|
|
66
|
+
1. **Write a structured decision document** containing:
|
|
67
|
+
- **Context**: What problem are you solving? What constraints apply?
|
|
68
|
+
- **Options**: At least 2 viable approaches, each with: description, tradeoffs (pros/cons), and implementation implications.
|
|
69
|
+
- **Recommendation**: Which option you recommend and why.
|
|
70
|
+
|
|
71
|
+
2. **Send a decision_gate mail** to the coordinator with the decision document in the body. Include a payload with the options array and brief context. Use --type decision_gate.
|
|
72
|
+
|
|
73
|
+
3. **BLOCK and wait** for a reply. Do not continue past the gate without a response. Poll your inbox periodically while waiting.
|
|
74
|
+
|
|
75
|
+
Decision gates are NOT for: variable names, file organization within spec, test strategy, or minor implementation choices within an approved design. They are for choices that meaningfully affect the direction of work.
|
|
76
|
+
|
|
77
|
+
## milestone-reviews
|
|
78
|
+
|
|
79
|
+
Send checkpoint reviews at three milestones:
|
|
80
|
+
|
|
81
|
+
**After planning** (before any implementation begins):
|
|
82
|
+
Send a status mail with: scope summary (what will be built), approach (high-level design with all decisions resolved via gates), file list (which files will be affected), and any open questions requiring confirmation before starting.
|
|
83
|
+
|
|
84
|
+
**After prototyping** (when a working prototype exists):
|
|
85
|
+
Send a status mail with: what works and what is rough, remaining decisions (if any), revised scope if it changed during prototyping, and an explicit request to proceed before final implementation.
|
|
86
|
+
|
|
87
|
+
**Before final implementation** (after all gates resolved and prototype reviewed):
|
|
88
|
+
Send a status mail summarizing: complete plan with all decisions incorporated, any deviations from original scope, and a confirmation request before beginning the final commit sequence.
|
|
89
|
+
|
|
90
|
+
Each milestone review uses mail type status and clearly labels the milestone in the subject line.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.1",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
buildBashFileGuardScript,
|
|
9
9
|
buildBashPathBoundaryScript,
|
|
10
10
|
buildPathBoundaryGuardScript,
|
|
11
|
+
buildTrackerCloseGuardScript,
|
|
11
12
|
deployHooks,
|
|
12
13
|
escapeForSingleQuotedShell,
|
|
13
14
|
extractQualityGatePrefixes,
|
|
@@ -15,6 +16,7 @@ import {
|
|
|
15
16
|
getCapabilityGuards,
|
|
16
17
|
getDangerGuards,
|
|
17
18
|
getPathBoundaryGuards,
|
|
19
|
+
getTrackerCloseGuards,
|
|
18
20
|
isOverstoryHookEntry,
|
|
19
21
|
PATH_PREFIX,
|
|
20
22
|
} from "./hooks-deployer.ts";
|
|
@@ -468,9 +470,9 @@ describe("deployHooks", () => {
|
|
|
468
470
|
expect(writeBlockGuard).toBeDefined();
|
|
469
471
|
expect(writeBlockGuard.hooks[0].command).toContain('"decision":"block"');
|
|
470
472
|
|
|
471
|
-
// Should have multiple Bash guards: danger guard + file guard + universal push guard
|
|
473
|
+
// Should have multiple Bash guards: danger guard + file guard + tracker close guard + universal push guard
|
|
472
474
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
473
|
-
expect(bashGuards.length).toBe(
|
|
475
|
+
expect(bashGuards.length).toBe(4); // danger guard + file guard + tracker close guard + universal push guard
|
|
474
476
|
});
|
|
475
477
|
|
|
476
478
|
test("reviewer capability adds same guards as scout", async () => {
|
|
@@ -512,9 +514,9 @@ describe("deployHooks", () => {
|
|
|
512
514
|
expect(guardMatchers).toContain("NotebookEdit");
|
|
513
515
|
expect(guardMatchers).toContain("Bash");
|
|
514
516
|
|
|
515
|
-
// Should have
|
|
517
|
+
// Should have 4 Bash guards: danger guard + file guard + tracker close guard + universal push guard
|
|
516
518
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
517
|
-
expect(bashGuards.length).toBe(
|
|
519
|
+
expect(bashGuards.length).toBe(4);
|
|
518
520
|
});
|
|
519
521
|
|
|
520
522
|
test("builder capability gets path boundary + Bash danger + Bash path boundary guards + native team tool blocks", async () => {
|
|
@@ -544,9 +546,9 @@ describe("deployHooks", () => {
|
|
|
544
546
|
expect(writeGuards[0].hooks[0].command).toContain("OVERSTORY_WORKTREE_PATH");
|
|
545
547
|
expect(writeGuards[0].hooks[0].command).not.toContain("cannot modify files");
|
|
546
548
|
|
|
547
|
-
// Builder should have
|
|
549
|
+
// Builder should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
|
|
548
550
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
549
|
-
expect(bashGuards.length).toBe(
|
|
551
|
+
expect(bashGuards.length).toBe(4);
|
|
550
552
|
// One should be the danger guard (checks git push)
|
|
551
553
|
const dangerGuard = bashGuards.find(
|
|
552
554
|
(h: { hooks: Array<{ command: string }> }) =>
|
|
@@ -1607,7 +1609,7 @@ describe("structural enforcement integration", () => {
|
|
|
1607
1609
|
|
|
1608
1610
|
// Find the bash file guard (the second Bash entry, after the danger guard)
|
|
1609
1611
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
1610
|
-
expect(bashGuards.length).toBe(
|
|
1612
|
+
expect(bashGuards.length).toBe(4);
|
|
1611
1613
|
|
|
1612
1614
|
// The file guard (second Bash guard) should whitelist git add/commit
|
|
1613
1615
|
const fileGuard = bashGuards[1];
|
|
@@ -2070,8 +2072,8 @@ describe("bash path boundary integration", () => {
|
|
|
2070
2072
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2071
2073
|
|
|
2072
2074
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2073
|
-
// Should have
|
|
2074
|
-
expect(bashGuards.length).toBe(
|
|
2075
|
+
// Should have 4 Bash guards: danger guard + path boundary guard + tracker close guard + universal push guard
|
|
2076
|
+
expect(bashGuards.length).toBe(4);
|
|
2075
2077
|
|
|
2076
2078
|
// Find the path boundary guard
|
|
2077
2079
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
@@ -2092,7 +2094,7 @@ describe("bash path boundary integration", () => {
|
|
|
2092
2094
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2093
2095
|
|
|
2094
2096
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2095
|
-
expect(bashGuards.length).toBe(
|
|
2097
|
+
expect(bashGuards.length).toBe(4);
|
|
2096
2098
|
|
|
2097
2099
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
2098
2100
|
h.hooks[0]?.command?.includes("Bash path boundary violation"),
|
|
@@ -2110,9 +2112,9 @@ describe("bash path boundary integration", () => {
|
|
|
2110
2112
|
const parsed = JSON.parse(content);
|
|
2111
2113
|
const preToolUse = parsed.hooks.PreToolUse;
|
|
2112
2114
|
|
|
2113
|
-
// Scout gets danger guard + file guard + universal push guard (
|
|
2115
|
+
// Scout gets danger guard + file guard + tracker close guard + universal push guard (4 Bash guards), but NOT path boundary
|
|
2114
2116
|
const bashGuards = preToolUse.filter((h: { matcher: string }) => h.matcher === "Bash");
|
|
2115
|
-
expect(bashGuards.length).toBe(
|
|
2117
|
+
expect(bashGuards.length).toBe(4);
|
|
2116
2118
|
|
|
2117
2119
|
const pathGuard = bashGuards.find((h: { hooks: Array<{ command: string }> }) =>
|
|
2118
2120
|
h.hooks[0]?.command?.includes("Bash path boundary violation"),
|
|
@@ -2401,6 +2403,177 @@ describe("PATH prefix in deployed hooks", () => {
|
|
|
2401
2403
|
});
|
|
2402
2404
|
});
|
|
2403
2405
|
|
|
2406
|
+
describe("buildTrackerCloseGuardScript", () => {
|
|
2407
|
+
test("returns a string containing key patterns", () => {
|
|
2408
|
+
const script = buildTrackerCloseGuardScript();
|
|
2409
|
+
expect(typeof script).toBe("string");
|
|
2410
|
+
expect(script.length).toBeGreaterThan(0);
|
|
2411
|
+
expect(script).toContain("sd");
|
|
2412
|
+
expect(script).toContain("bd");
|
|
2413
|
+
expect(script).toContain("close");
|
|
2414
|
+
expect(script).toContain("update");
|
|
2415
|
+
});
|
|
2416
|
+
|
|
2417
|
+
test("contains ENV_GUARD prefix", () => {
|
|
2418
|
+
const script = buildTrackerCloseGuardScript();
|
|
2419
|
+
expect(script).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
|
|
2420
|
+
});
|
|
2421
|
+
|
|
2422
|
+
test("contains OVERSTORY_TASK_ID early-exit check", () => {
|
|
2423
|
+
const script = buildTrackerCloseGuardScript();
|
|
2424
|
+
expect(script).toContain('[ -z "$OVERSTORY_TASK_ID" ] && exit 0;');
|
|
2425
|
+
});
|
|
2426
|
+
|
|
2427
|
+
test("blocks sd close with wrong ID", async () => {
|
|
2428
|
+
const script = buildTrackerCloseGuardScript();
|
|
2429
|
+
const input = JSON.stringify({ command: "sd close other-task" });
|
|
2430
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2431
|
+
stdin: new TextEncoder().encode(input),
|
|
2432
|
+
stdout: "pipe",
|
|
2433
|
+
stderr: "pipe",
|
|
2434
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2435
|
+
});
|
|
2436
|
+
const output = await new Response(proc.stdout).text();
|
|
2437
|
+
await proc.exited;
|
|
2438
|
+
const parsed = JSON.parse(output.trim());
|
|
2439
|
+
expect(parsed.decision).toBe("block");
|
|
2440
|
+
expect(parsed.reason).toContain("other-task");
|
|
2441
|
+
expect(parsed.reason).toContain("my-task");
|
|
2442
|
+
});
|
|
2443
|
+
|
|
2444
|
+
test("allows sd close with matching ID", async () => {
|
|
2445
|
+
const script = buildTrackerCloseGuardScript();
|
|
2446
|
+
const input = JSON.stringify({ command: "sd close my-task" });
|
|
2447
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2448
|
+
stdin: new TextEncoder().encode(input),
|
|
2449
|
+
stdout: "pipe",
|
|
2450
|
+
stderr: "pipe",
|
|
2451
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2452
|
+
});
|
|
2453
|
+
const output = await new Response(proc.stdout).text();
|
|
2454
|
+
await proc.exited;
|
|
2455
|
+
expect(output.trim()).toBe("");
|
|
2456
|
+
});
|
|
2457
|
+
|
|
2458
|
+
test("blocks bd close with wrong ID", async () => {
|
|
2459
|
+
const script = buildTrackerCloseGuardScript();
|
|
2460
|
+
const input = JSON.stringify({ command: "bd close other-task" });
|
|
2461
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2462
|
+
stdin: new TextEncoder().encode(input),
|
|
2463
|
+
stdout: "pipe",
|
|
2464
|
+
stderr: "pipe",
|
|
2465
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2466
|
+
});
|
|
2467
|
+
const output = await new Response(proc.stdout).text();
|
|
2468
|
+
await proc.exited;
|
|
2469
|
+
const parsed = JSON.parse(output.trim());
|
|
2470
|
+
expect(parsed.decision).toBe("block");
|
|
2471
|
+
expect(parsed.reason).toContain("other-task");
|
|
2472
|
+
});
|
|
2473
|
+
|
|
2474
|
+
test("blocks sd update --status with wrong ID", async () => {
|
|
2475
|
+
const script = buildTrackerCloseGuardScript();
|
|
2476
|
+
const input = JSON.stringify({ command: "sd update other-task --status in_progress" });
|
|
2477
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2478
|
+
stdin: new TextEncoder().encode(input),
|
|
2479
|
+
stdout: "pipe",
|
|
2480
|
+
stderr: "pipe",
|
|
2481
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "test-agent", OVERSTORY_TASK_ID: "my-task" },
|
|
2482
|
+
});
|
|
2483
|
+
const output = await new Response(proc.stdout).text();
|
|
2484
|
+
await proc.exited;
|
|
2485
|
+
const parsed = JSON.parse(output.trim());
|
|
2486
|
+
expect(parsed.decision).toBe("block");
|
|
2487
|
+
expect(parsed.reason).toContain("other-task");
|
|
2488
|
+
});
|
|
2489
|
+
|
|
2490
|
+
test("exits early when OVERSTORY_TASK_ID is empty (coordinator/monitor)", async () => {
|
|
2491
|
+
const script = buildTrackerCloseGuardScript();
|
|
2492
|
+
const input = JSON.stringify({ command: "sd close coordinator-task" });
|
|
2493
|
+
const proc = Bun.spawn(["sh", "-c", script], {
|
|
2494
|
+
stdin: new TextEncoder().encode(input),
|
|
2495
|
+
stdout: "pipe",
|
|
2496
|
+
stderr: "pipe",
|
|
2497
|
+
env: { ...process.env, OVERSTORY_AGENT_NAME: "coordinator", OVERSTORY_TASK_ID: "" },
|
|
2498
|
+
});
|
|
2499
|
+
const output = await new Response(proc.stdout).text();
|
|
2500
|
+
await proc.exited;
|
|
2501
|
+
expect(output.trim()).toBe("");
|
|
2502
|
+
});
|
|
2503
|
+
});
|
|
2504
|
+
|
|
2505
|
+
describe("getTrackerCloseGuards", () => {
|
|
2506
|
+
test("returns exactly 1 Bash guard entry", () => {
|
|
2507
|
+
const guards = getTrackerCloseGuards();
|
|
2508
|
+
expect(guards).toHaveLength(1);
|
|
2509
|
+
expect(guards[0]?.matcher).toBe("Bash");
|
|
2510
|
+
});
|
|
2511
|
+
|
|
2512
|
+
test("guard hook type is command", () => {
|
|
2513
|
+
const guards = getTrackerCloseGuards();
|
|
2514
|
+
expect(guards[0]?.hooks[0]?.type).toBe("command");
|
|
2515
|
+
});
|
|
2516
|
+
|
|
2517
|
+
test("guard command contains OVERSTORY_TASK_ID check", () => {
|
|
2518
|
+
const guards = getTrackerCloseGuards();
|
|
2519
|
+
const command = guards[0]?.hooks[0]?.command ?? "";
|
|
2520
|
+
expect(command).toContain("OVERSTORY_TASK_ID");
|
|
2521
|
+
});
|
|
2522
|
+
|
|
2523
|
+
test("guard command includes ENV_GUARD prefix", () => {
|
|
2524
|
+
const guards = getTrackerCloseGuards();
|
|
2525
|
+
const command = guards[0]?.hooks[0]?.command ?? "";
|
|
2526
|
+
expect(command).toContain('[ -z "$OVERSTORY_AGENT_NAME" ] && exit 0;');
|
|
2527
|
+
});
|
|
2528
|
+
});
|
|
2529
|
+
|
|
2530
|
+
describe("deployHooks tracker close guard integration", () => {
|
|
2531
|
+
let tempDir: string;
|
|
2532
|
+
|
|
2533
|
+
beforeEach(async () => {
|
|
2534
|
+
tempDir = await mkdtemp(join(tmpdir(), "overstory-tracker-close-test-"));
|
|
2535
|
+
});
|
|
2536
|
+
|
|
2537
|
+
afterEach(async () => {
|
|
2538
|
+
await cleanupTempDir(tempDir);
|
|
2539
|
+
});
|
|
2540
|
+
|
|
2541
|
+
test("deployHooks includes tracker close guard in PreToolUse for builder", async () => {
|
|
2542
|
+
const worktreePath = join(tempDir, "builder-tc-wt");
|
|
2543
|
+
await deployHooks(worktreePath, "builder-tc", "builder");
|
|
2544
|
+
|
|
2545
|
+
const content = await Bun.file(join(worktreePath, ".claude", "settings.local.json")).text();
|
|
2546
|
+
const parsed = JSON.parse(content);
|
|
2547
|
+
const preToolUse = parsed.hooks.PreToolUse;
|
|
2548
|
+
|
|
2549
|
+
const trackerGuard = preToolUse.find(
|
|
2550
|
+
(h: { matcher: string; hooks: Array<{ command: string }> }) =>
|
|
2551
|
+
h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
|
|
2552
|
+
);
|
|
2553
|
+
expect(trackerGuard).toBeDefined();
|
|
2554
|
+
expect(trackerGuard.hooks[0].command).toContain("OVERSTORY_TASK_ID");
|
|
2555
|
+
});
|
|
2556
|
+
|
|
2557
|
+
test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
|
|
2558
|
+
const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
|
|
2559
|
+
|
|
2560
|
+
for (const cap of capabilities) {
|
|
2561
|
+
const wt = join(tempDir, `${cap}-tc-wt`);
|
|
2562
|
+
await deployHooks(wt, `${cap}-tc`, cap);
|
|
2563
|
+
|
|
2564
|
+
const content = await Bun.file(join(wt, ".claude", "settings.local.json")).text();
|
|
2565
|
+
const parsed = JSON.parse(content);
|
|
2566
|
+
const preToolUse = parsed.hooks.PreToolUse;
|
|
2567
|
+
|
|
2568
|
+
const trackerGuard = preToolUse.find(
|
|
2569
|
+
(h: { matcher: string; hooks: Array<{ command: string }> }) =>
|
|
2570
|
+
h.matcher === "Bash" && h.hooks[0]?.command?.includes("OVERSTORY_TASK_ID"),
|
|
2571
|
+
);
|
|
2572
|
+
expect(trackerGuard).toBeDefined();
|
|
2573
|
+
}
|
|
2574
|
+
});
|
|
2575
|
+
});
|
|
2576
|
+
|
|
2404
2577
|
describe("escapeForSingleQuotedShell", () => {
|
|
2405
2578
|
test("no single quotes: string passes through unchanged", () => {
|
|
2406
2579
|
expect(escapeForSingleQuotedShell("hello world")).toBe("hello world");
|
|
@@ -283,6 +283,61 @@ export function buildBashFileGuardScript(
|
|
|
283
283
|
return script;
|
|
284
284
|
}
|
|
285
285
|
|
|
286
|
+
/**
|
|
287
|
+
* Build a PreToolUse guard script that prevents agents from closing or updating
|
|
288
|
+
* issues they don't own.
|
|
289
|
+
*
|
|
290
|
+
* Guards against two patterns:
|
|
291
|
+
* - `sd/bd close <id>` — blocks if <id> != $OVERSTORY_TASK_ID
|
|
292
|
+
* - `sd/bd update <id> --status` — blocks if <id> != $OVERSTORY_TASK_ID
|
|
293
|
+
*
|
|
294
|
+
* Agents without OVERSTORY_TASK_ID (coordinator, monitor) exit early and are unaffected.
|
|
295
|
+
*/
|
|
296
|
+
export function buildTrackerCloseGuardScript(): string {
|
|
297
|
+
const script = [
|
|
298
|
+
// Only enforce for overstory agent sessions
|
|
299
|
+
ENV_GUARD,
|
|
300
|
+
// Skip if task ID is not set (coordinator/monitor have no task)
|
|
301
|
+
'[ -z "$OVERSTORY_TASK_ID" ] && exit 0;',
|
|
302
|
+
"read -r INPUT;",
|
|
303
|
+
// Extract command value from JSON
|
|
304
|
+
'CMD=$(echo "$INPUT" | sed \'s/.*"command": *"\\([^"]*\\)".*/\\1/\');',
|
|
305
|
+
// Check for sd/bd close <id>
|
|
306
|
+
"if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+close\\s'; then",
|
|
307
|
+
" ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+close[[:space:]]+([^ ]+).*/\\2/');",
|
|
308
|
+
' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
|
|
309
|
+
' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot close issue $ISSUE_ID — agents may only close their own task ($OVERSTORY_TASK_ID). Report completion via worker_done mail to your parent instead.\\"}";',
|
|
310
|
+
" exit 0;",
|
|
311
|
+
" fi;",
|
|
312
|
+
"fi;",
|
|
313
|
+
// Check for sd/bd update <id> --status
|
|
314
|
+
"if echo \"$CMD\" | grep -qE '^\\s*(sd|bd)\\s+update\\s.*--status'; then",
|
|
315
|
+
" ISSUE_ID=$(echo \"$CMD\" | sed -E 's/^[[:space:]]*(sd|bd)[[:space:]]+update[[:space:]]+([^ ]+).*/\\2/');",
|
|
316
|
+
' if [ "$ISSUE_ID" != "$OVERSTORY_TASK_ID" ]; then',
|
|
317
|
+
' echo "{\\"decision\\":\\"block\\",\\"reason\\":\\"Cannot update issue $ISSUE_ID — agents may only update their own task ($OVERSTORY_TASK_ID).\\"}";',
|
|
318
|
+
" exit 0;",
|
|
319
|
+
" fi;",
|
|
320
|
+
"fi;",
|
|
321
|
+
].join(" ");
|
|
322
|
+
return script;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/**
|
|
326
|
+
* Generate a PreToolUse guard that blocks tracker close/update for foreign issues.
|
|
327
|
+
*
|
|
328
|
+
* Returns a single Bash matcher entry. Applied to ALL agent capabilities
|
|
329
|
+
* so that no agent can accidentally close the coordinator's dispatch issue.
|
|
330
|
+
* Agents without OVERSTORY_TASK_ID (coordinator, monitor) are unaffected.
|
|
331
|
+
*/
|
|
332
|
+
export function getTrackerCloseGuards(): HookEntry[] {
|
|
333
|
+
return [
|
|
334
|
+
{
|
|
335
|
+
matcher: "Bash",
|
|
336
|
+
hooks: [{ type: "command", command: buildTrackerCloseGuardScript() }],
|
|
337
|
+
},
|
|
338
|
+
];
|
|
339
|
+
}
|
|
340
|
+
|
|
286
341
|
/**
|
|
287
342
|
* Capabilities that are allowed to modify files via Bash commands.
|
|
288
343
|
* These get the Bash path boundary guard instead of a blanket file-modification block.
|
|
@@ -539,7 +594,8 @@ export async function deployHooks(
|
|
|
539
594
|
const pathGuards = getPathBoundaryGuards();
|
|
540
595
|
const dangerGuards = getDangerGuards(agentName);
|
|
541
596
|
const capabilityGuards = getCapabilityGuards(capability, qualityGates);
|
|
542
|
-
const
|
|
597
|
+
const trackerCloseGuards = getTrackerCloseGuards();
|
|
598
|
+
const allGuards = [...pathGuards, ...dangerGuards, ...capabilityGuards, ...trackerCloseGuards];
|
|
543
599
|
|
|
544
600
|
if (allGuards.length > 0) {
|
|
545
601
|
const preToolUse = config.hooks.PreToolUse ?? [];
|
package/src/agents/overlay.ts
CHANGED
|
@@ -35,6 +35,18 @@ function formatMulchDomains(domains: readonly string[]): string {
|
|
|
35
35
|
return `\`\`\`bash\nml prime ${domains.join(" ")}\n\`\`\``;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Format profile content (Layer 2: deployment-specific WHAT KIND) for embedding in the overlay.
|
|
40
|
+
* Returns empty string if no profile was provided (omits the section entirely).
|
|
41
|
+
* When profile IS provided, renders it as-is — the caller (canopy) owns the formatting.
|
|
42
|
+
*/
|
|
43
|
+
function formatProfile(profileContent: string | undefined): string {
|
|
44
|
+
if (!profileContent || profileContent.trim().length === 0) {
|
|
45
|
+
return "";
|
|
46
|
+
}
|
|
47
|
+
return profileContent;
|
|
48
|
+
}
|
|
49
|
+
|
|
38
50
|
/**
|
|
39
51
|
* Format pre-fetched mulch expertise for embedding in the overlay.
|
|
40
52
|
* Returns empty string if no expertise was provided (omits the section entirely).
|
|
@@ -314,6 +326,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
|
|
|
314
326
|
"{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
|
|
315
327
|
"{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
|
|
316
328
|
"{{BASE_DEFINITION}}": config.baseDefinition,
|
|
329
|
+
"{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
|
|
317
330
|
"{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),
|
|
318
331
|
"{{QUALITY_GATE_STEPS}}": formatQualityGatesSteps(config.qualityGates),
|
|
319
332
|
"{{QUALITY_GATE_BASH}}": formatQualityGatesBash(config.qualityGates),
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the Canopy CLI client.
|
|
3
|
+
*
|
|
4
|
+
* Uses real `cn` CLI calls against the actual .canopy/ directory.
|
|
5
|
+
* We do not mock the CLI — the project root has real prompts to test against.
|
|
6
|
+
* Tests are skipped if the `cn` CLI is not installed (e.g. in CI).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { AgentError } from "../errors.ts";
|
|
11
|
+
import { createCanopyClient } from "./client.ts";
|
|
12
|
+
|
|
13
|
+
// Check if canopy CLI is available
|
|
14
|
+
let hasCanopy = false;
|
|
15
|
+
try {
|
|
16
|
+
const proc = Bun.spawn(["which", "cn"], { stdout: "pipe", stderr: "pipe" });
|
|
17
|
+
const exitCode = await proc.exited;
|
|
18
|
+
hasCanopy = exitCode === 0;
|
|
19
|
+
} catch {
|
|
20
|
+
hasCanopy = false;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// The worktree root has its own .canopy/ symlinked/shared from the canonical root.
|
|
24
|
+
// Use process.cwd() which is set to the worktree root in bun test.
|
|
25
|
+
const cwd = process.cwd();
|
|
26
|
+
const client = createCanopyClient(cwd);
|
|
27
|
+
|
|
28
|
+
describe("CanopyClient.list()", () => {
|
|
29
|
+
test.skipIf(!hasCanopy)("returns prompts array with at least one entry", async () => {
|
|
30
|
+
const result = await client.list();
|
|
31
|
+
expect(result.success).toBe(true);
|
|
32
|
+
expect(Array.isArray(result.prompts)).toBe(true);
|
|
33
|
+
expect(result.prompts.length).toBeGreaterThan(0);
|
|
34
|
+
const first = result.prompts[0];
|
|
35
|
+
expect(first).toBeDefined();
|
|
36
|
+
expect(typeof first?.name).toBe("string");
|
|
37
|
+
expect(typeof first?.version).toBe("number");
|
|
38
|
+
expect(Array.isArray(first?.sections)).toBe(true);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
describe("CanopyClient.render()", () => {
|
|
43
|
+
test.skipIf(!hasCanopy)(
|
|
44
|
+
"returns CanopyRenderResult with name, version, sections for 'builder' prompt",
|
|
45
|
+
async () => {
|
|
46
|
+
const result = await client.render("builder");
|
|
47
|
+
expect(result.success).toBe(true);
|
|
48
|
+
expect(result.name).toBe("builder");
|
|
49
|
+
expect(typeof result.version).toBe("number");
|
|
50
|
+
expect(result.version).toBeGreaterThan(0);
|
|
51
|
+
expect(Array.isArray(result.sections)).toBe(true);
|
|
52
|
+
expect(result.sections.length).toBeGreaterThan(0);
|
|
53
|
+
const section = result.sections[0];
|
|
54
|
+
expect(section).toBeDefined();
|
|
55
|
+
expect(typeof section?.name).toBe("string");
|
|
56
|
+
expect(typeof section?.body).toBe("string");
|
|
57
|
+
},
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
|
|
61
|
+
await expect(client.render("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe("CanopyClient.show()", () => {
|
|
66
|
+
test.skipIf(!hasCanopy)("returns prompt object for 'builder'", async () => {
|
|
67
|
+
const result = await client.show("builder");
|
|
68
|
+
expect(result.success).toBe(true);
|
|
69
|
+
expect(result.prompt).toBeDefined();
|
|
70
|
+
expect(result.prompt.name).toBe("builder");
|
|
71
|
+
expect(typeof result.prompt.version).toBe("number");
|
|
72
|
+
expect(typeof result.prompt.id).toBe("string");
|
|
73
|
+
expect(Array.isArray(result.prompt.sections)).toBe(true);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
|
|
77
|
+
await expect(client.show("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe("CanopyClient.validate()", () => {
|
|
82
|
+
test.skipIf(!hasCanopy)("returns {success, errors} for a named prompt", async () => {
|
|
83
|
+
const result = await client.validate("scout");
|
|
84
|
+
expect(typeof result.success).toBe("boolean");
|
|
85
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
86
|
+
if (result.success) {
|
|
87
|
+
expect(result.errors.length).toBe(0);
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test.skipIf(!hasCanopy)("returns success=false with errors for an invalid prompt", async () => {
|
|
92
|
+
// 'builder' is known to fail schema validation (missing test gate)
|
|
93
|
+
const result = await client.validate("builder");
|
|
94
|
+
expect(typeof result.success).toBe("boolean");
|
|
95
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
96
|
+
// Either valid or invalid — just verify structure is correct
|
|
97
|
+
if (!result.success) {
|
|
98
|
+
expect(result.errors.length).toBeGreaterThan(0);
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test.skipIf(!hasCanopy)("validate --all returns result with success boolean", async () => {
|
|
103
|
+
const result = await client.validate(undefined, { all: true });
|
|
104
|
+
expect(typeof result.success).toBe("boolean");
|
|
105
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
106
|
+
});
|
|
107
|
+
});
|