@os-eco/overstory-cli 0.8.7 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -8
- package/agents/coordinator.md +30 -6
- package/agents/lead.md +11 -1
- package/agents/ov-co-creation.md +90 -0
- package/package.json +1 -1
- package/src/agents/hooks-deployer.test.ts +9 -1
- package/src/agents/hooks-deployer.ts +2 -1
- package/src/agents/overlay.test.ts +26 -0
- package/src/agents/overlay.ts +31 -4
- package/src/canopy/client.test.ts +107 -0
- package/src/canopy/client.ts +179 -0
- package/src/commands/agents.ts +1 -1
- package/src/commands/clean.test.ts +3 -0
- package/src/commands/clean.ts +1 -58
- package/src/commands/completions.test.ts +18 -6
- package/src/commands/completions.ts +40 -1
- package/src/commands/coordinator.test.ts +77 -4
- package/src/commands/coordinator.ts +304 -146
- package/src/commands/dashboard.ts +47 -10
- package/src/commands/discover.test.ts +288 -0
- package/src/commands/discover.ts +202 -0
- package/src/commands/doctor.ts +3 -1
- package/src/commands/ecosystem.test.ts +126 -1
- package/src/commands/ecosystem.ts +7 -53
- package/src/commands/feed.test.ts +117 -2
- package/src/commands/feed.ts +46 -30
- package/src/commands/group.test.ts +274 -155
- package/src/commands/group.ts +11 -5
- package/src/commands/init.test.ts +2 -1
- package/src/commands/init.ts +8 -0
- package/src/commands/log.test.ts +35 -0
- package/src/commands/log.ts +10 -6
- package/src/commands/logs.test.ts +423 -1
- package/src/commands/logs.ts +99 -104
- package/src/commands/orchestrator.ts +42 -0
- package/src/commands/prime.test.ts +177 -2
- package/src/commands/prime.ts +4 -2
- package/src/commands/sling.ts +23 -3
- package/src/commands/update.test.ts +1 -0
- package/src/commands/upgrade.test.ts +2 -0
- package/src/commands/upgrade.ts +1 -17
- package/src/commands/watch.test.ts +67 -1
- package/src/commands/watch.ts +13 -88
- package/src/config.test.ts +250 -0
- package/src/config.ts +43 -0
- package/src/doctor/agents.test.ts +72 -5
- package/src/doctor/agents.ts +10 -10
- package/src/doctor/consistency.test.ts +35 -0
- package/src/doctor/consistency.ts +7 -3
- package/src/doctor/dependencies.test.ts +58 -1
- package/src/doctor/dependencies.ts +4 -2
- package/src/doctor/providers.test.ts +41 -5
- package/src/doctor/types.ts +2 -1
- package/src/doctor/version.test.ts +106 -2
- package/src/doctor/version.ts +4 -2
- package/src/doctor/watchdog.test.ts +167 -0
- package/src/doctor/watchdog.ts +158 -0
- package/src/e2e/init-sling-lifecycle.test.ts +4 -2
- package/src/errors.test.ts +350 -0
- package/src/events/tailer.test.ts +25 -0
- package/src/events/tailer.ts +8 -1
- package/src/index.ts +9 -1
- package/src/mail/store.test.ts +110 -0
- package/src/mail/store.ts +2 -1
- package/src/runtimes/aider.test.ts +124 -0
- package/src/runtimes/aider.ts +147 -0
- package/src/runtimes/amp.test.ts +164 -0
- package/src/runtimes/amp.ts +154 -0
- package/src/runtimes/claude.test.ts +4 -2
- package/src/runtimes/goose.test.ts +133 -0
- package/src/runtimes/goose.ts +157 -0
- package/src/runtimes/pi-guards.ts +2 -1
- package/src/runtimes/pi.test.ts +9 -9
- package/src/runtimes/pi.ts +6 -7
- package/src/runtimes/registry.test.ts +1 -1
- package/src/runtimes/registry.ts +13 -4
- package/src/runtimes/sapling.ts +2 -1
- package/src/runtimes/types.ts +2 -2
- package/src/schema-consistency.test.ts +1 -0
- package/src/sessions/store.ts +25 -4
- package/src/types.ts +65 -1
- package/src/utils/bin.test.ts +10 -0
- package/src/utils/bin.ts +37 -0
- package/src/utils/fs.test.ts +119 -0
- package/src/utils/fs.ts +62 -0
- package/src/utils/pid.test.ts +68 -0
- package/src/utils/pid.ts +45 -0
- package/src/utils/time.test.ts +43 -0
- package/src/utils/time.ts +37 -0
- package/src/utils/version.test.ts +33 -0
- package/src/utils/version.ts +70 -0
- package/src/watchdog/daemon.test.ts +255 -1
- package/src/watchdog/daemon.ts +87 -9
- package/src/watchdog/health.test.ts +15 -1
- package/src/watchdog/health.ts +1 -1
- package/src/watchdog/triage.test.ts +49 -9
- package/src/watchdog/triage.ts +21 -5
- package/templates/overlay.md.tmpl +2 -0
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
|
|
|
6
6
|
[](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
|
|
7
7
|
[](LICENSE)
|
|
8
8
|
|
|
9
|
-
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
|
|
9
|
+
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
|
|
10
10
|
|
|
11
11
|
> **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
|
|
12
12
|
|
|
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
|
|
|
22
22
|
- [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
|
|
23
23
|
- [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
|
|
24
24
|
- [OpenCode](https://opencode.ai) (`opencode` CLI)
|
|
25
|
+
- [Aider](https://aider.chat) (`aider` CLI)
|
|
26
|
+
- [Goose](https://github.com/block/goose) (`goose` CLI)
|
|
27
|
+
- [Amp](https://amp.dev) (`amp` CLI)
|
|
25
28
|
|
|
26
29
|
```bash
|
|
27
30
|
bun install -g @os-eco/overstory-cli
|
|
@@ -84,23 +87,30 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
84
87
|
| Command | Description |
|
|
85
88
|
|---------|-------------|
|
|
86
89
|
| `ov init` | Initialize `.overstory/` and bootstrap os-eco tools (`--yes`, `--name`, `--tools`, `--skip-mulch`, `--skip-seeds`, `--skip-canopy`, `--skip-onboard`, `--json`) |
|
|
87
|
-
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--json`) |
|
|
90
|
+
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--profile`, `--json`) |
|
|
88
91
|
| `ov stop <agent-name>` | Terminate a running agent (`--clean-worktree`, `--json`) |
|
|
89
92
|
| `ov prime` | Load context for orchestrator/agent (`--agent`, `--compact`) |
|
|
90
93
|
| `ov spec write <task-id>` | Write a task specification (`--body`) |
|
|
94
|
+
| `ov discover` | Discover a brownfield codebase via coordinator-driven scout swarm (`--skip`, `--name`, `--attach`, `--watchdog`, `--json`) |
|
|
91
95
|
| `ov update` | Refresh `.overstory/` managed files from installed package (`--agents`, `--manifest`, `--hooks`, `--dry-run`, `--json`) |
|
|
92
96
|
|
|
93
97
|
### Coordination
|
|
94
98
|
|
|
95
99
|
| Command | Description |
|
|
96
100
|
|---------|-------------|
|
|
97
|
-
| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`) |
|
|
101
|
+
| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`, `--profile`) |
|
|
98
102
|
| `ov coordinator stop` | Stop coordinator |
|
|
99
103
|
| `ov coordinator status` | Show coordinator state |
|
|
100
104
|
| `ov coordinator send` | Fire-and-forget message to coordinator (`--subject`) |
|
|
101
105
|
| `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
|
|
102
106
|
| `ov coordinator output` | Show recent coordinator output (`--lines`) |
|
|
103
107
|
| `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
|
|
108
|
+
| `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
|
|
109
|
+
| `ov orchestrator stop` | Stop orchestrator |
|
|
110
|
+
| `ov orchestrator status` | Show orchestrator state |
|
|
111
|
+
| `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
|
|
112
|
+
| `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
|
|
113
|
+
| `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
|
|
104
114
|
| `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
|
|
105
115
|
| `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
|
|
106
116
|
| `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
|
|
@@ -164,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
164
174
|
| `ov monitor status` | Show monitor state |
|
|
165
175
|
| `ov log <event>` | Log a hook event (`--agent`) |
|
|
166
176
|
| `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
|
|
167
|
-
| `ov doctor` | Run health checks on overstory setup —
|
|
177
|
+
| `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
|
|
168
178
|
| `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
|
|
169
179
|
| `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
|
|
170
180
|
| `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
|
|
@@ -187,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
|
|
|
187
197
|
| Cursor | `agent` | (none — `--yolo`) | Experimental |
|
|
188
198
|
| Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
|
|
189
199
|
| Gemini | `gemini` | `--sandbox` flag | Experimental |
|
|
200
|
+
| Aider | `aider` | (none — `--yes-always`) | Experimental |
|
|
201
|
+
| Goose | `goose` | Profile-based permissions | Experimental |
|
|
202
|
+
| Amp | `amp` | Built-in approval system | Experimental |
|
|
190
203
|
| OpenCode | `opencode` | (none) | Experimental |
|
|
191
204
|
|
|
192
205
|
## How It Works
|
|
@@ -236,7 +249,7 @@ overstory/
|
|
|
236
249
|
config.ts Config loader + validation
|
|
237
250
|
errors.ts Custom error types
|
|
238
251
|
json.ts Standardized JSON envelope helpers
|
|
239
|
-
commands/ One file per CLI subcommand (
|
|
252
|
+
commands/ One file per CLI subcommand (37 commands)
|
|
240
253
|
agents.ts Agent discovery and querying
|
|
241
254
|
coordinator.ts Persistent orchestrator lifecycle
|
|
242
255
|
supervisor.ts Team lead management [DEPRECATED]
|
|
@@ -259,7 +272,7 @@ overstory/
|
|
|
259
272
|
run.ts Orchestration run lifecycle
|
|
260
273
|
trace.ts Agent/task timeline viewing
|
|
261
274
|
clean.ts Worktree/session cleanup
|
|
262
|
-
doctor.ts Health check runner (
|
|
275
|
+
doctor.ts Health check runner (12 check modules)
|
|
263
276
|
inspect.ts Deep per-agent inspection
|
|
264
277
|
spec.ts Task spec management
|
|
265
278
|
errors.ts Aggregated error view
|
|
@@ -270,7 +283,11 @@ overstory/
|
|
|
270
283
|
ecosystem.ts os-eco tool dashboard
|
|
271
284
|
update.ts Refresh managed files
|
|
272
285
|
upgrade.ts npm version upgrades
|
|
286
|
+
discover.ts Brownfield codebase discovery via coordinator-driven scout swarm
|
|
287
|
+
orchestrator.ts Multi-repo coordination (PersistentAgentSpec)
|
|
273
288
|
completions.ts Shell completion generation (bash/zsh/fish)
|
|
289
|
+
canopy/
|
|
290
|
+
client.ts Canopy client (prompt rendering, listing, emission)
|
|
274
291
|
agents/ Agent lifecycle management
|
|
275
292
|
manifest.ts Agent registry (load + query)
|
|
276
293
|
overlay.ts Dynamic CLAUDE.md overlay generator
|
|
@@ -285,9 +302,10 @@ overstory/
|
|
|
285
302
|
watchdog/ Tiered health monitoring (daemon, triage, health)
|
|
286
303
|
logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
|
|
287
304
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
288
|
-
doctor/ Health check modules (
|
|
305
|
+
doctor/ Health check modules (12 checks)
|
|
306
|
+
utils/ Shared utilities (bin, fs, pid, time, version)
|
|
289
307
|
insights/ Session insight analyzer for auto-expertise
|
|
290
|
-
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
|
|
308
|
+
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
|
|
291
309
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
292
310
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
|
293
311
|
e2e/ End-to-end lifecycle tests
|
package/agents/coordinator.md
CHANGED
|
@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
|
|
|
11
11
|
- **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
|
|
12
12
|
- **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
|
|
13
13
|
- **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
|
|
14
|
+
- **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
|
|
14
15
|
|
|
15
16
|
## failure-modes
|
|
16
17
|
|
|
17
18
|
These are named failures. If you catch yourself doing any of these, stop and correct immediately.
|
|
18
19
|
|
|
19
|
-
- **HIERARCHY_BYPASS** -- Spawning a
|
|
20
|
+
- **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
|
|
20
21
|
- **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
|
|
21
22
|
- **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
|
|
22
23
|
- **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
|
|
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
|
|
|
46
47
|
- **NEVER** use the Write tool on any file. You have no write access.
|
|
47
48
|
- **NEVER** use the Edit tool on any file. You have no write access.
|
|
48
49
|
- **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
|
|
49
|
-
- **NEVER** spawn
|
|
50
|
+
- **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
|
|
50
51
|
- **NEVER** run bash commands that modify source code, dependencies, or git history:
|
|
51
52
|
- No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
|
|
52
53
|
- No `rm`, `mv`, `cp`, `mkdir` on source directories
|
|
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
|
|
|
118
119
|
|
|
119
120
|
## role
|
|
120
121
|
|
|
121
|
-
You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues,
|
|
122
|
+
You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
|
|
122
123
|
|
|
123
124
|
## capabilities
|
|
124
125
|
|
|
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
|
|
|
128
129
|
- **Grep** -- search file contents with regex
|
|
129
130
|
- **Bash** (coordination commands only):
|
|
130
131
|
- `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
|
|
131
|
-
- `ov sling` (spawn lead agents
|
|
132
|
+
- `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
|
|
132
133
|
- `ov status` (monitor active agents and worktrees)
|
|
133
134
|
- `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
|
|
134
135
|
- `ov nudge <agent> [message]` (poke stalled leads)
|
|
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
|
|
|
141
142
|
|
|
142
143
|
### Spawning Agents
|
|
143
144
|
|
|
144
|
-
**
|
|
145
|
+
**Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
|
|
145
146
|
|
|
146
147
|
```bash
|
|
147
148
|
ov sling <task-id> \
|
|
@@ -150,7 +151,20 @@ ov sling <task-id> \
|
|
|
150
151
|
--depth 1
|
|
151
152
|
```
|
|
152
153
|
|
|
153
|
-
|
|
154
|
+
Low-budget fallback examples:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Direct scout: coordinator is acting as combined coordinator/lead
|
|
158
|
+
ov sling <task-id> --capability scout --name <scout-name> --depth 1
|
|
159
|
+
|
|
160
|
+
# Direct builder for a small, concrete task that does not need a separate lead/spec cycle
|
|
161
|
+
ov sling <task-id> --capability builder --name <builder-name> --depth 1
|
|
162
|
+
|
|
163
|
+
# Compressed lead: keep the lead, but force it to act as lead/worker
|
|
164
|
+
ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
|
|
154
168
|
|
|
155
169
|
```
|
|
156
170
|
Coordinator (you, depth 0)
|
|
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
|
|
|
160
174
|
└── Reviewer (depth 2) — validates quality
|
|
161
175
|
```
|
|
162
176
|
|
|
177
|
+
Compressed hierarchy is also valid when you are deliberately minimizing agent count:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
Coordinator (you, depth 0, acting as coordinator/lead)
|
|
181
|
+
└── Scout or Builder (depth 1)
|
|
182
|
+
```
|
|
183
|
+
|
|
163
184
|
### Communication
|
|
164
185
|
- **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
|
|
165
186
|
- **Check inbox:** `ov mail check` (unread messages)
|
|
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
|
|
|
206
227
|
```bash
|
|
207
228
|
ov sling <task-id> --capability lead --name <lead-name> --depth 1
|
|
208
229
|
```
|
|
230
|
+
If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
|
|
231
|
+
- Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
|
|
232
|
+
- Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
|
|
209
233
|
6. **Send dispatch mail** to each lead with the high-level objective:
|
|
210
234
|
```bash
|
|
211
235
|
ov mail send --to <lead-name> --subject "Work stream: <title>" \
|
package/agents/lead.md
CHANGED
|
@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
|
|
|
9
9
|
- **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
|
|
10
10
|
- **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
|
|
11
11
|
|
|
12
|
+
Budget compression rules:
|
|
13
|
+
- **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
|
|
14
|
+
- **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
|
|
15
|
+
- **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
|
|
16
|
+
|
|
12
17
|
Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
|
|
13
18
|
|
|
14
19
|
## cost-awareness
|
|
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
|
|
|
19
24
|
|
|
20
25
|
Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
|
|
21
26
|
|
|
27
|
+
When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
|
|
28
|
+
|
|
22
29
|
Where to actually save tokens:
|
|
23
30
|
- Prefer fewer, well-scoped builders over many small ones.
|
|
24
31
|
- Batch status updates instead of sending per-worker messages.
|
|
@@ -143,7 +150,7 @@ Criteria — ANY:
|
|
|
143
150
|
- Straightforward implementation with clear spec
|
|
144
151
|
- Single builder can handle the full scope
|
|
145
152
|
|
|
146
|
-
Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
|
|
153
|
+
Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
|
|
147
154
|
|
|
148
155
|
### Complex Tasks (Full Pipeline)
|
|
149
156
|
Criteria — ANY:
|
|
@@ -153,6 +160,9 @@ Criteria — ANY:
|
|
|
153
160
|
- Multiple builders needed with file scope partitioning
|
|
154
161
|
|
|
155
162
|
Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
|
|
163
|
+
If your overlay budget is too small to support that pipeline, compress roles deliberately:
|
|
164
|
+
- With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
|
|
165
|
+
- With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
|
|
156
166
|
|
|
157
167
|
## three-phase-workflow
|
|
158
168
|
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ov-co-creation
|
|
3
|
+
description: Co-creation workflow profile — human-in-the-loop at explicit decision gates
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
## propulsion-principle
|
|
7
|
+
|
|
8
|
+
Read your assignment. For implementation work within an approved plan, execute immediately — no confirmation needed for routine decisions (naming, file organization, test strategy, implementation details within spec).
|
|
9
|
+
|
|
10
|
+
PAUSE at decision gates. When you encounter an architectural choice, design fork, scope boundary, or tool selection, stop and do not proceed. Instead:
|
|
11
|
+
|
|
12
|
+
1. Write a structured decision document (context, options, tradeoffs, recommendation).
|
|
13
|
+
2. Send it as a decision_gate mail to the coordinator.
|
|
14
|
+
3. Wait for a response before proceeding past the gate.
|
|
15
|
+
|
|
16
|
+
Hesitation is the default at gates; action is the default within approved plans.
|
|
17
|
+
|
|
18
|
+
## escalation-policy
|
|
19
|
+
|
|
20
|
+
At decision points, present options rather than choosing. When you encounter a meaningful decision:
|
|
21
|
+
|
|
22
|
+
1. Write a structured decision document: context, 2+ options with tradeoffs, and your recommendation.
|
|
23
|
+
2. Send it as a decision_gate mail to the coordinator and wait.
|
|
24
|
+
3. Do not proceed until you receive a reply selecting an option.
|
|
25
|
+
|
|
26
|
+
Routine implementation decisions within an already-approved plan remain autonomous. Do not send decision gates for: variable names, file organization within spec, test strategy, or minor implementation choices that do not affect overall direction.
|
|
27
|
+
|
|
28
|
+
Escalate immediately (not as a decision gate) when you discover: risks that could cause data loss, security issues, or breaking changes beyond scope; blocked dependencies outside your control.
|
|
29
|
+
|
|
30
|
+
## artifact-expectations
|
|
31
|
+
|
|
32
|
+
Decision artifacts come before code. Deliverables in order:
|
|
33
|
+
|
|
34
|
+
1. **Option memos**: For any decision with multiple viable approaches, write a structured memo with options, tradeoffs, and a recommendation. Send as a decision_gate mail and await approval.
|
|
35
|
+
2. **ADRs (Architecture Decision Records)**: For architectural choices, create a lightweight ADR capturing context, decision, and consequences.
|
|
36
|
+
3. **Tradeoff matrices**: When comparing approaches across multiple dimensions, present a structured comparison.
|
|
37
|
+
4. **Code and tests**: Implementation proceeds after decision artifacts are approved. Code must be clean, follow project conventions, and include automated tests.
|
|
38
|
+
5. **Quality gates**: All lints, type checks, and tests must pass before reporting completion.
|
|
39
|
+
|
|
40
|
+
Do not write implementation code before decisions are resolved. The human reviews and approves decision documents; implementation follows approval.
|
|
41
|
+
|
|
42
|
+
## completion-criteria
|
|
43
|
+
|
|
44
|
+
Work is complete when all of the following are true:
|
|
45
|
+
|
|
46
|
+
- All quality gates pass: tests green, linting clean, type checking passes.
|
|
47
|
+
- Changes are committed to the appropriate branch.
|
|
48
|
+
- Any issues tracked in the task system are updated or closed.
|
|
49
|
+
- A completion signal has been sent to the appropriate recipient (parent agent, coordinator, or human).
|
|
50
|
+
|
|
51
|
+
Do not declare completion prematurely. Run the quality gates yourself — do not assume they pass. If a gate fails, fix the issue before reporting done.
|
|
52
|
+
|
|
53
|
+
## human-role
|
|
54
|
+
|
|
55
|
+
The human is an active co-creator at explicit decision gates — not a hands-off supervisor.
|
|
56
|
+
|
|
57
|
+
- **Active at gates.** The human reviews decision documents and selects options via mail reply. The agent waits for this input before proceeding.
|
|
58
|
+
- **Autonomous between gates.** Once a direction is approved, the agent executes without further check-ins. Implementation details within an approved plan are delegated.
|
|
59
|
+
- **Milestone reviews.** The human reviews work at defined checkpoints (planning, prototype, final). These are collaborative reviews with explicit proceed signals.
|
|
60
|
+
- **Minimal interruption between gates.** Do not ask questions that could be answered by reading the codebase or attempting something. Reserve interruptions for genuinely ambiguous requirements.
|
|
61
|
+
|
|
62
|
+
## decision-gates
|
|
63
|
+
|
|
64
|
+
When you reach a decision point (architectural choice, scope boundary, design fork, tool selection), follow this protocol:
|
|
65
|
+
|
|
66
|
+
1. **Write a structured decision document** containing:
|
|
67
|
+
- **Context**: What problem are you solving? What constraints apply?
|
|
68
|
+
- **Options**: At least 2 viable approaches, each with: description, tradeoffs (pros/cons), and implementation implications.
|
|
69
|
+
- **Recommendation**: Which option you recommend and why.
|
|
70
|
+
|
|
71
|
+
2. **Send a decision_gate mail** to the coordinator with the decision document in the body. Include a payload with the options array and brief context. Use --type decision_gate.
|
|
72
|
+
|
|
73
|
+
3. **BLOCK and wait** for a reply. Do not continue past the gate without a response. Poll your inbox periodically while waiting.
|
|
74
|
+
|
|
75
|
+
Decision gates are NOT for: variable names, file organization within spec, test strategy, or minor implementation choices within an approved design. They are for choices that meaningfully affect the direction of work.
|
|
76
|
+
|
|
77
|
+
## milestone-reviews
|
|
78
|
+
|
|
79
|
+
Send checkpoint reviews at three milestones:
|
|
80
|
+
|
|
81
|
+
**After planning** (before any implementation begins):
|
|
82
|
+
Send a status mail with: scope summary (what will be built), approach (high-level design with all decisions resolved via gates), file list (which files will be affected), and any open questions requiring confirmation before starting.
|
|
83
|
+
|
|
84
|
+
**After prototyping** (when a working prototype exists):
|
|
85
|
+
Send a status mail with: what works and what is rough, remaining decisions (if any), revised scope if it changed during prototyping, and an explicit request to proceed before final implementation.
|
|
86
|
+
|
|
87
|
+
**Before final implementation** (after all gates resolved and prototype reviewed):
|
|
88
|
+
Send a status mail summarizing: complete plan with all decisions incorporated, any deviations from original scope, and a confirmation request before beginning the final commit sequence.
|
|
89
|
+
|
|
90
|
+
Each milestone review uses mail type status and clearly labels the milestone in the subject line.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.2",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
|
|
|
2555
2555
|
});
|
|
2556
2556
|
|
|
2557
2557
|
test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
|
|
2558
|
-
const capabilities = [
|
|
2558
|
+
const capabilities = [
|
|
2559
|
+
"builder",
|
|
2560
|
+
"scout",
|
|
2561
|
+
"reviewer",
|
|
2562
|
+
"lead",
|
|
2563
|
+
"merger",
|
|
2564
|
+
"orchestrator",
|
|
2565
|
+
"coordinator",
|
|
2566
|
+
];
|
|
2559
2567
|
|
|
2560
2568
|
for (const cap of capabilities) {
|
|
2561
2569
|
const wt = join(tempDir, `${cap}-tc-wt`);
|
|
@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
|
|
|
20
20
|
"scout",
|
|
21
21
|
"reviewer",
|
|
22
22
|
"lead",
|
|
23
|
+
"orchestrator",
|
|
23
24
|
"coordinator",
|
|
24
25
|
"supervisor",
|
|
25
26
|
"monitor",
|
|
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
|
|
|
29
30
|
* Capabilities that coordinate work and need git add/commit for syncing
|
|
30
31
|
* tasks, mulch, and other metadata — but must NOT git push.
|
|
31
32
|
*/
|
|
32
|
-
const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
|
|
33
|
+
const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
|
|
33
34
|
|
|
34
35
|
/**
|
|
35
36
|
* Additional safe Bash prefixes for coordination capabilities.
|
|
@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
|
|
|
523
523
|
expect(output).toContain("3");
|
|
524
524
|
});
|
|
525
525
|
|
|
526
|
+
test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
|
|
527
|
+
const config = makeConfig({
|
|
528
|
+
capability: "lead",
|
|
529
|
+
maxAgentsOverride: 1,
|
|
530
|
+
canSpawn: true,
|
|
531
|
+
});
|
|
532
|
+
const output = await generateOverlay(config);
|
|
533
|
+
|
|
534
|
+
expect(output).toContain("MAX AGENTS");
|
|
535
|
+
expect(output).toContain("combined **lead/worker**");
|
|
536
|
+
expect(output).toContain("only slot");
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
|
|
540
|
+
const config = makeConfig({
|
|
541
|
+
capability: "lead",
|
|
542
|
+
maxAgentsOverride: 2,
|
|
543
|
+
canSpawn: true,
|
|
544
|
+
});
|
|
545
|
+
const output = await generateOverlay(config);
|
|
546
|
+
|
|
547
|
+
expect(output).toContain("MAX AGENTS");
|
|
548
|
+
expect(output).toContain("compressed mode");
|
|
549
|
+
expect(output).toContain("self-verification");
|
|
550
|
+
});
|
|
551
|
+
|
|
526
552
|
test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
|
|
527
553
|
const config = makeConfig({
|
|
528
554
|
capability: "lead",
|
package/src/agents/overlay.ts
CHANGED
|
@@ -35,6 +35,18 @@ function formatMulchDomains(domains: readonly string[]): string {
|
|
|
35
35
|
return `\`\`\`bash\nml prime ${domains.join(" ")}\n\`\`\``;
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Format profile content (Layer 2: deployment-specific WHAT KIND) for embedding in the overlay.
|
|
40
|
+
* Returns empty string if no profile was provided (omits the section entirely).
|
|
41
|
+
* When profile IS provided, renders it as-is — the caller (canopy) owns the formatting.
|
|
42
|
+
*/
|
|
43
|
+
function formatProfile(profileContent: string | undefined): string {
|
|
44
|
+
if (!profileContent || profileContent.trim().length === 0) {
|
|
45
|
+
return "";
|
|
46
|
+
}
|
|
47
|
+
return profileContent;
|
|
48
|
+
}
|
|
49
|
+
|
|
38
50
|
/**
|
|
39
51
|
* Format pre-fetched mulch expertise for embedding in the overlay.
|
|
40
52
|
* Returns empty string if no expertise was provided (omits the section entirely).
|
|
@@ -90,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
|
|
|
90
102
|
}
|
|
91
103
|
|
|
92
104
|
if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
"
|
|
96
|
-
|
|
105
|
+
if (config.maxAgentsOverride === 1) {
|
|
106
|
+
sections.push(
|
|
107
|
+
"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
|
|
108
|
+
"Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
|
|
109
|
+
"Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
|
|
110
|
+
);
|
|
111
|
+
} else if (config.maxAgentsOverride === 2) {
|
|
112
|
+
sections.push(
|
|
113
|
+
"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
|
|
114
|
+
"Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
|
|
115
|
+
"Prefer self-verification over spawning a separate reviewer.",
|
|
116
|
+
);
|
|
117
|
+
} else {
|
|
118
|
+
sections.push(
|
|
119
|
+
`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
|
|
120
|
+
"Do not spawn more than this many sub-workers.",
|
|
121
|
+
);
|
|
122
|
+
}
|
|
97
123
|
}
|
|
98
124
|
|
|
99
125
|
if (sections.length === 0) return "";
|
|
@@ -314,6 +340,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
|
|
|
314
340
|
"{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
|
|
315
341
|
"{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
|
|
316
342
|
"{{BASE_DEFINITION}}": config.baseDefinition,
|
|
343
|
+
"{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
|
|
317
344
|
"{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),
|
|
318
345
|
"{{QUALITY_GATE_STEPS}}": formatQualityGatesSteps(config.qualityGates),
|
|
319
346
|
"{{QUALITY_GATE_BASH}}": formatQualityGatesBash(config.qualityGates),
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for the Canopy CLI client.
|
|
3
|
+
*
|
|
4
|
+
* Uses real `cn` CLI calls against the actual .canopy/ directory.
|
|
5
|
+
* We do not mock the CLI — the project root has real prompts to test against.
|
|
6
|
+
* Tests are skipped if the `cn` CLI is not installed (e.g. in CI).
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { describe, expect, test } from "bun:test";
|
|
10
|
+
import { AgentError } from "../errors.ts";
|
|
11
|
+
import { createCanopyClient } from "./client.ts";
|
|
12
|
+
|
|
13
|
+
// Check if canopy CLI is available
|
|
14
|
+
let hasCanopy = false;
|
|
15
|
+
try {
|
|
16
|
+
const proc = Bun.spawn(["which", "cn"], { stdout: "pipe", stderr: "pipe" });
|
|
17
|
+
const exitCode = await proc.exited;
|
|
18
|
+
hasCanopy = exitCode === 0;
|
|
19
|
+
} catch {
|
|
20
|
+
hasCanopy = false;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
// The worktree root has its own .canopy/ symlinked/shared from the canonical root.
|
|
24
|
+
// Use process.cwd() which is set to the worktree root in bun test.
|
|
25
|
+
const cwd = process.cwd();
|
|
26
|
+
const client = createCanopyClient(cwd);
|
|
27
|
+
|
|
28
|
+
describe("CanopyClient.list()", () => {
|
|
29
|
+
test.skipIf(!hasCanopy)("returns prompts array with at least one entry", async () => {
|
|
30
|
+
const result = await client.list();
|
|
31
|
+
expect(result.success).toBe(true);
|
|
32
|
+
expect(Array.isArray(result.prompts)).toBe(true);
|
|
33
|
+
expect(result.prompts.length).toBeGreaterThan(0);
|
|
34
|
+
const first = result.prompts[0];
|
|
35
|
+
expect(first).toBeDefined();
|
|
36
|
+
expect(typeof first?.name).toBe("string");
|
|
37
|
+
expect(typeof first?.version).toBe("number");
|
|
38
|
+
expect(Array.isArray(first?.sections)).toBe(true);
|
|
39
|
+
});
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
describe("CanopyClient.render()", () => {
|
|
43
|
+
test.skipIf(!hasCanopy)(
|
|
44
|
+
"returns CanopyRenderResult with name, version, sections for 'builder' prompt",
|
|
45
|
+
async () => {
|
|
46
|
+
const result = await client.render("builder");
|
|
47
|
+
expect(result.success).toBe(true);
|
|
48
|
+
expect(result.name).toBe("builder");
|
|
49
|
+
expect(typeof result.version).toBe("number");
|
|
50
|
+
expect(result.version).toBeGreaterThan(0);
|
|
51
|
+
expect(Array.isArray(result.sections)).toBe(true);
|
|
52
|
+
expect(result.sections.length).toBeGreaterThan(0);
|
|
53
|
+
const section = result.sections[0];
|
|
54
|
+
expect(section).toBeDefined();
|
|
55
|
+
expect(typeof section?.name).toBe("string");
|
|
56
|
+
expect(typeof section?.body).toBe("string");
|
|
57
|
+
},
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
|
|
61
|
+
await expect(client.render("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
|
|
62
|
+
});
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
describe("CanopyClient.show()", () => {
|
|
66
|
+
test.skipIf(!hasCanopy)("returns prompt object for 'builder'", async () => {
|
|
67
|
+
const result = await client.show("builder");
|
|
68
|
+
expect(result.success).toBe(true);
|
|
69
|
+
expect(result.prompt).toBeDefined();
|
|
70
|
+
expect(result.prompt.name).toBe("builder");
|
|
71
|
+
expect(typeof result.prompt.version).toBe("number");
|
|
72
|
+
expect(typeof result.prompt.id).toBe("string");
|
|
73
|
+
expect(Array.isArray(result.prompt.sections)).toBe(true);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
|
|
77
|
+
await expect(client.show("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe("CanopyClient.validate()", () => {
|
|
82
|
+
test.skipIf(!hasCanopy)("returns {success, errors} for a named prompt", async () => {
|
|
83
|
+
const result = await client.validate("scout");
|
|
84
|
+
expect(typeof result.success).toBe("boolean");
|
|
85
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
86
|
+
if (result.success) {
|
|
87
|
+
expect(result.errors.length).toBe(0);
|
|
88
|
+
}
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
test.skipIf(!hasCanopy)("returns success=false with errors for an invalid prompt", async () => {
|
|
92
|
+
// 'builder' is known to fail schema validation (missing test gate)
|
|
93
|
+
const result = await client.validate("builder");
|
|
94
|
+
expect(typeof result.success).toBe("boolean");
|
|
95
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
96
|
+
// Either valid or invalid — just verify structure is correct
|
|
97
|
+
if (!result.success) {
|
|
98
|
+
expect(result.errors.length).toBeGreaterThan(0);
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test.skipIf(!hasCanopy)("validate --all returns result with success boolean", async () => {
|
|
103
|
+
const result = await client.validate(undefined, { all: true });
|
|
104
|
+
expect(typeof result.success).toBe("boolean");
|
|
105
|
+
expect(Array.isArray(result.errors)).toBe(true);
|
|
106
|
+
});
|
|
107
|
+
});
|