@os-eco/overstory-cli 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -6
- package/agents/coordinator.md +30 -6
- package/agents/lead.md +11 -1
- package/package.json +1 -1
- package/src/agents/hooks-deployer.test.ts +9 -1
- package/src/agents/hooks-deployer.ts +2 -1
- package/src/agents/overlay.test.ts +26 -0
- package/src/agents/overlay.ts +18 -4
- package/src/commands/agents.ts +1 -1
- package/src/commands/clean.test.ts +3 -0
- package/src/commands/clean.ts +1 -58
- package/src/commands/completions.test.ts +18 -6
- package/src/commands/completions.ts +40 -1
- package/src/commands/coordinator.test.ts +77 -4
- package/src/commands/coordinator.ts +226 -124
- package/src/commands/dashboard.ts +46 -9
- package/src/commands/doctor.ts +3 -1
- package/src/commands/ecosystem.test.ts +126 -1
- package/src/commands/ecosystem.ts +7 -53
- package/src/commands/feed.test.ts +117 -2
- package/src/commands/feed.ts +46 -30
- package/src/commands/group.test.ts +274 -155
- package/src/commands/group.ts +11 -5
- package/src/commands/init.ts +8 -0
- package/src/commands/log.test.ts +35 -0
- package/src/commands/log.ts +10 -6
- package/src/commands/logs.test.ts +423 -1
- package/src/commands/logs.ts +99 -104
- package/src/commands/orchestrator.ts +42 -0
- package/src/commands/prime.test.ts +177 -2
- package/src/commands/prime.ts +4 -2
- package/src/commands/sling.ts +3 -3
- package/src/commands/upgrade.test.ts +2 -0
- package/src/commands/upgrade.ts +1 -17
- package/src/commands/watch.test.ts +67 -1
- package/src/commands/watch.ts +4 -79
- package/src/config.test.ts +250 -0
- package/src/config.ts +43 -0
- package/src/doctor/agents.test.ts +72 -5
- package/src/doctor/agents.ts +10 -10
- package/src/doctor/consistency.test.ts +35 -0
- package/src/doctor/consistency.ts +7 -3
- package/src/doctor/dependencies.test.ts +58 -1
- package/src/doctor/dependencies.ts +4 -2
- package/src/doctor/providers.test.ts +41 -5
- package/src/doctor/types.ts +2 -1
- package/src/doctor/version.test.ts +106 -2
- package/src/doctor/version.ts +4 -2
- package/src/doctor/watchdog.test.ts +167 -0
- package/src/doctor/watchdog.ts +158 -0
- package/src/e2e/init-sling-lifecycle.test.ts +2 -1
- package/src/errors.test.ts +350 -0
- package/src/events/tailer.test.ts +25 -0
- package/src/events/tailer.ts +8 -1
- package/src/index.ts +4 -1
- package/src/mail/store.test.ts +110 -0
- package/src/runtimes/aider.test.ts +124 -0
- package/src/runtimes/aider.ts +147 -0
- package/src/runtimes/amp.test.ts +164 -0
- package/src/runtimes/amp.ts +154 -0
- package/src/runtimes/claude.test.ts +4 -2
- package/src/runtimes/goose.test.ts +133 -0
- package/src/runtimes/goose.ts +157 -0
- package/src/runtimes/pi-guards.ts +2 -1
- package/src/runtimes/pi.test.ts +9 -9
- package/src/runtimes/pi.ts +6 -7
- package/src/runtimes/registry.test.ts +1 -1
- package/src/runtimes/registry.ts +13 -4
- package/src/runtimes/sapling.ts +2 -1
- package/src/runtimes/types.ts +2 -2
- package/src/types.ts +4 -0
- package/src/utils/bin.test.ts +10 -0
- package/src/utils/bin.ts +37 -0
- package/src/utils/fs.test.ts +119 -0
- package/src/utils/fs.ts +62 -0
- package/src/utils/pid.test.ts +68 -0
- package/src/utils/pid.ts +45 -0
- package/src/utils/time.test.ts +43 -0
- package/src/utils/time.ts +37 -0
- package/src/utils/version.test.ts +33 -0
- package/src/utils/version.ts +70 -0
- package/src/watchdog/daemon.test.ts +255 -1
- package/src/watchdog/daemon.ts +46 -9
- package/src/watchdog/health.test.ts +15 -1
- package/src/watchdog/health.ts +1 -1
- package/src/watchdog/triage.test.ts +49 -9
- package/src/watchdog/triage.ts +21 -5
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
|
|
|
6
6
|
[](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
|
|
7
7
|
[](LICENSE)
|
|
8
8
|
|
|
9
|
-
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
|
|
9
|
+
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
|
|
10
10
|
|
|
11
11
|
> **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
|
|
12
12
|
|
|
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
|
|
|
22
22
|
- [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
|
|
23
23
|
- [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
|
|
24
24
|
- [OpenCode](https://opencode.ai) (`opencode` CLI)
|
|
25
|
+
- [Aider](https://aider.chat) (`aider` CLI)
|
|
26
|
+
- [Goose](https://github.com/block/goose) (`goose` CLI)
|
|
27
|
+
- [Amp](https://amp.dev) (`amp` CLI)
|
|
25
28
|
|
|
26
29
|
```bash
|
|
27
30
|
bun install -g @os-eco/overstory-cli
|
|
@@ -102,6 +105,12 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
102
105
|
| `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
|
|
103
106
|
| `ov coordinator output` | Show recent coordinator output (`--lines`) |
|
|
104
107
|
| `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
|
|
108
|
+
| `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
|
|
109
|
+
| `ov orchestrator stop` | Stop orchestrator |
|
|
110
|
+
| `ov orchestrator status` | Show orchestrator state |
|
|
111
|
+
| `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
|
|
112
|
+
| `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
|
|
113
|
+
| `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
|
|
105
114
|
| `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
|
|
106
115
|
| `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
|
|
107
116
|
| `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
|
|
@@ -165,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
165
174
|
| `ov monitor status` | Show monitor state |
|
|
166
175
|
| `ov log <event>` | Log a hook event (`--agent`) |
|
|
167
176
|
| `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
|
|
168
|
-
| `ov doctor` | Run health checks on overstory setup —
|
|
177
|
+
| `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
|
|
169
178
|
| `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
|
|
170
179
|
| `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
|
|
171
180
|
| `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
|
|
@@ -188,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
|
|
|
188
197
|
| Cursor | `agent` | (none — `--yolo`) | Experimental |
|
|
189
198
|
| Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
|
|
190
199
|
| Gemini | `gemini` | `--sandbox` flag | Experimental |
|
|
200
|
+
| Aider | `aider` | (none — `--yes-always`) | Experimental |
|
|
201
|
+
| Goose | `goose` | Profile-based permissions | Experimental |
|
|
202
|
+
| Amp | `amp` | Built-in approval system | Experimental |
|
|
191
203
|
| OpenCode | `opencode` | (none) | Experimental |
|
|
192
204
|
|
|
193
205
|
## How It Works
|
|
@@ -237,7 +249,7 @@ overstory/
|
|
|
237
249
|
config.ts Config loader + validation
|
|
238
250
|
errors.ts Custom error types
|
|
239
251
|
json.ts Standardized JSON envelope helpers
|
|
240
|
-
commands/ One file per CLI subcommand (
|
|
252
|
+
commands/ One file per CLI subcommand (37 commands)
|
|
241
253
|
agents.ts Agent discovery and querying
|
|
242
254
|
coordinator.ts Persistent orchestrator lifecycle
|
|
243
255
|
supervisor.ts Team lead management [DEPRECATED]
|
|
@@ -260,7 +272,7 @@ overstory/
|
|
|
260
272
|
run.ts Orchestration run lifecycle
|
|
261
273
|
trace.ts Agent/task timeline viewing
|
|
262
274
|
clean.ts Worktree/session cleanup
|
|
263
|
-
doctor.ts Health check runner (
|
|
275
|
+
doctor.ts Health check runner (12 check modules)
|
|
264
276
|
inspect.ts Deep per-agent inspection
|
|
265
277
|
spec.ts Task spec management
|
|
266
278
|
errors.ts Aggregated error view
|
|
@@ -272,6 +284,7 @@ overstory/
|
|
|
272
284
|
update.ts Refresh managed files
|
|
273
285
|
upgrade.ts npm version upgrades
|
|
274
286
|
discover.ts Brownfield codebase discovery via coordinator-driven scout swarm
|
|
287
|
+
orchestrator.ts Multi-repo coordination (PersistentAgentSpec)
|
|
275
288
|
completions.ts Shell completion generation (bash/zsh/fish)
|
|
276
289
|
canopy/
|
|
277
290
|
client.ts Canopy client (prompt rendering, listing, emission)
|
|
@@ -289,9 +302,10 @@ overstory/
|
|
|
289
302
|
watchdog/ Tiered health monitoring (daemon, triage, health)
|
|
290
303
|
logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
|
|
291
304
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
292
|
-
doctor/ Health check modules (
|
|
305
|
+
doctor/ Health check modules (12 checks)
|
|
306
|
+
utils/ Shared utilities (bin, fs, pid, time, version)
|
|
293
307
|
insights/ Session insight analyzer for auto-expertise
|
|
294
|
-
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
|
|
308
|
+
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
|
|
295
309
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
296
310
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
|
297
311
|
e2e/ End-to-end lifecycle tests
|
package/agents/coordinator.md
CHANGED
|
@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
|
|
|
11
11
|
- **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
|
|
12
12
|
- **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
|
|
13
13
|
- **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
|
|
14
|
+
- **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
|
|
14
15
|
|
|
15
16
|
## failure-modes
|
|
16
17
|
|
|
17
18
|
These are named failures. If you catch yourself doing any of these, stop and correct immediately.
|
|
18
19
|
|
|
19
|
-
- **HIERARCHY_BYPASS** -- Spawning a
|
|
20
|
+
- **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
|
|
20
21
|
- **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
|
|
21
22
|
- **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
|
|
22
23
|
- **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
|
|
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
|
|
|
46
47
|
- **NEVER** use the Write tool on any file. You have no write access.
|
|
47
48
|
- **NEVER** use the Edit tool on any file. You have no write access.
|
|
48
49
|
- **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
|
|
49
|
-
- **NEVER** spawn
|
|
50
|
+
- **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
|
|
50
51
|
- **NEVER** run bash commands that modify source code, dependencies, or git history:
|
|
51
52
|
- No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
|
|
52
53
|
- No `rm`, `mv`, `cp`, `mkdir` on source directories
|
|
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
|
|
|
118
119
|
|
|
119
120
|
## role
|
|
120
121
|
|
|
121
|
-
You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues,
|
|
122
|
+
You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
|
|
122
123
|
|
|
123
124
|
## capabilities
|
|
124
125
|
|
|
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
|
|
|
128
129
|
- **Grep** -- search file contents with regex
|
|
129
130
|
- **Bash** (coordination commands only):
|
|
130
131
|
- `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
|
|
131
|
-
- `ov sling` (spawn lead agents
|
|
132
|
+
- `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
|
|
132
133
|
- `ov status` (monitor active agents and worktrees)
|
|
133
134
|
- `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
|
|
134
135
|
- `ov nudge <agent> [message]` (poke stalled leads)
|
|
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
|
|
|
141
142
|
|
|
142
143
|
### Spawning Agents
|
|
143
144
|
|
|
144
|
-
**
|
|
145
|
+
**Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
|
|
145
146
|
|
|
146
147
|
```bash
|
|
147
148
|
ov sling <task-id> \
|
|
@@ -150,7 +151,20 @@ ov sling <task-id> \
|
|
|
150
151
|
--depth 1
|
|
151
152
|
```
|
|
152
153
|
|
|
153
|
-
|
|
154
|
+
Low-budget fallback examples:
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Direct scout: coordinator is acting as combined coordinator/lead
|
|
158
|
+
ov sling <task-id> --capability scout --name <scout-name> --depth 1
|
|
159
|
+
|
|
160
|
+
# Direct builder for a small, concrete task that does not need a separate lead/spec cycle
|
|
161
|
+
ov sling <task-id> --capability builder --name <builder-name> --depth 1
|
|
162
|
+
|
|
163
|
+
# Compressed lead: keep the lead, but force it to act as lead/worker
|
|
164
|
+
ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
|
|
154
168
|
|
|
155
169
|
```
|
|
156
170
|
Coordinator (you, depth 0)
|
|
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
|
|
|
160
174
|
└── Reviewer (depth 2) — validates quality
|
|
161
175
|
```
|
|
162
176
|
|
|
177
|
+
Compressed hierarchy is also valid when you are deliberately minimizing agent count:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
Coordinator (you, depth 0, acting as coordinator/lead)
|
|
181
|
+
└── Scout or Builder (depth 1)
|
|
182
|
+
```
|
|
183
|
+
|
|
163
184
|
### Communication
|
|
164
185
|
- **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
|
|
165
186
|
- **Check inbox:** `ov mail check` (unread messages)
|
|
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
|
|
|
206
227
|
```bash
|
|
207
228
|
ov sling <task-id> --capability lead --name <lead-name> --depth 1
|
|
208
229
|
```
|
|
230
|
+
If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
|
|
231
|
+
- Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
|
|
232
|
+
- Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
|
|
209
233
|
6. **Send dispatch mail** to each lead with the high-level objective:
|
|
210
234
|
```bash
|
|
211
235
|
ov mail send --to <lead-name> --subject "Work stream: <title>" \
|
package/agents/lead.md
CHANGED
|
@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
|
|
|
9
9
|
- **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
|
|
10
10
|
- **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
|
|
11
11
|
|
|
12
|
+
Budget compression rules:
|
|
13
|
+
- **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
|
|
14
|
+
- **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
|
|
15
|
+
- **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
|
|
16
|
+
|
|
12
17
|
Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
|
|
13
18
|
|
|
14
19
|
## cost-awareness
|
|
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
|
|
|
19
24
|
|
|
20
25
|
Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
|
|
21
26
|
|
|
27
|
+
When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
|
|
28
|
+
|
|
22
29
|
Where to actually save tokens:
|
|
23
30
|
- Prefer fewer, well-scoped builders over many small ones.
|
|
24
31
|
- Batch status updates instead of sending per-worker messages.
|
|
@@ -143,7 +150,7 @@ Criteria — ANY:
|
|
|
143
150
|
- Straightforward implementation with clear spec
|
|
144
151
|
- Single builder can handle the full scope
|
|
145
152
|
|
|
146
|
-
Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
|
|
153
|
+
Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
|
|
147
154
|
|
|
148
155
|
### Complex Tasks (Full Pipeline)
|
|
149
156
|
Criteria — ANY:
|
|
@@ -153,6 +160,9 @@ Criteria — ANY:
|
|
|
153
160
|
- Multiple builders needed with file scope partitioning
|
|
154
161
|
|
|
155
162
|
Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
|
|
163
|
+
If your overlay budget is too small to support that pipeline, compress roles deliberately:
|
|
164
|
+
- With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
|
|
165
|
+
- With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
|
|
156
166
|
|
|
157
167
|
## three-phase-workflow
|
|
158
168
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.2",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
|
|
|
2555
2555
|
});
|
|
2556
2556
|
|
|
2557
2557
|
test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
|
|
2558
|
-
const capabilities = [
|
|
2558
|
+
const capabilities = [
|
|
2559
|
+
"builder",
|
|
2560
|
+
"scout",
|
|
2561
|
+
"reviewer",
|
|
2562
|
+
"lead",
|
|
2563
|
+
"merger",
|
|
2564
|
+
"orchestrator",
|
|
2565
|
+
"coordinator",
|
|
2566
|
+
];
|
|
2559
2567
|
|
|
2560
2568
|
for (const cap of capabilities) {
|
|
2561
2569
|
const wt = join(tempDir, `${cap}-tc-wt`);
|
|
@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
|
|
|
20
20
|
"scout",
|
|
21
21
|
"reviewer",
|
|
22
22
|
"lead",
|
|
23
|
+
"orchestrator",
|
|
23
24
|
"coordinator",
|
|
24
25
|
"supervisor",
|
|
25
26
|
"monitor",
|
|
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
|
|
|
29
30
|
* Capabilities that coordinate work and need git add/commit for syncing
|
|
30
31
|
* tasks, mulch, and other metadata — but must NOT git push.
|
|
31
32
|
*/
|
|
32
|
-
const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
|
|
33
|
+
const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
|
|
33
34
|
|
|
34
35
|
/**
|
|
35
36
|
* Additional safe Bash prefixes for coordination capabilities.
|
|
@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
|
|
|
523
523
|
expect(output).toContain("3");
|
|
524
524
|
});
|
|
525
525
|
|
|
526
|
+
test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
|
|
527
|
+
const config = makeConfig({
|
|
528
|
+
capability: "lead",
|
|
529
|
+
maxAgentsOverride: 1,
|
|
530
|
+
canSpawn: true,
|
|
531
|
+
});
|
|
532
|
+
const output = await generateOverlay(config);
|
|
533
|
+
|
|
534
|
+
expect(output).toContain("MAX AGENTS");
|
|
535
|
+
expect(output).toContain("combined **lead/worker**");
|
|
536
|
+
expect(output).toContain("only slot");
|
|
537
|
+
});
|
|
538
|
+
|
|
539
|
+
test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
|
|
540
|
+
const config = makeConfig({
|
|
541
|
+
capability: "lead",
|
|
542
|
+
maxAgentsOverride: 2,
|
|
543
|
+
canSpawn: true,
|
|
544
|
+
});
|
|
545
|
+
const output = await generateOverlay(config);
|
|
546
|
+
|
|
547
|
+
expect(output).toContain("MAX AGENTS");
|
|
548
|
+
expect(output).toContain("compressed mode");
|
|
549
|
+
expect(output).toContain("self-verification");
|
|
550
|
+
});
|
|
551
|
+
|
|
526
552
|
test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
|
|
527
553
|
const config = makeConfig({
|
|
528
554
|
capability: "lead",
|
package/src/agents/overlay.ts
CHANGED
|
@@ -102,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
|
|
|
102
102
|
}
|
|
103
103
|
|
|
104
104
|
if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
"
|
|
108
|
-
|
|
105
|
+
if (config.maxAgentsOverride === 1) {
|
|
106
|
+
sections.push(
|
|
107
|
+
"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
|
|
108
|
+
"Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
|
|
109
|
+
"Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
|
|
110
|
+
);
|
|
111
|
+
} else if (config.maxAgentsOverride === 2) {
|
|
112
|
+
sections.push(
|
|
113
|
+
"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
|
|
114
|
+
"Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
|
|
115
|
+
"Prefer self-verification over spawning a separate reviewer.",
|
|
116
|
+
);
|
|
117
|
+
} else {
|
|
118
|
+
sections.push(
|
|
119
|
+
`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
|
|
120
|
+
"Do not spawn more than this many sub-workers.",
|
|
121
|
+
);
|
|
122
|
+
}
|
|
109
123
|
}
|
|
110
124
|
|
|
111
125
|
if (sections.length === 0) return "";
|
package/src/commands/agents.ts
CHANGED
|
@@ -223,7 +223,7 @@ export function createAgentsCommand(): Command {
|
|
|
223
223
|
.description("Find active agents by capability")
|
|
224
224
|
.option(
|
|
225
225
|
"--capability <type>",
|
|
226
|
-
"Filter by capability (builder, scout, reviewer, lead, merger, coordinator, supervisor)",
|
|
226
|
+
"Filter by capability (builder, scout, reviewer, lead, merger, orchestrator, coordinator, supervisor)",
|
|
227
227
|
)
|
|
228
228
|
.option("--all", "Include completed and zombie agents (default: active only)")
|
|
229
229
|
.option("--json", "Output as JSON")
|
package/src/commands/clean.ts
CHANGED
|
@@ -20,7 +20,6 @@
|
|
|
20
20
|
*/
|
|
21
21
|
|
|
22
22
|
import { existsSync } from "node:fs";
|
|
23
|
-
import { readdir, rm, unlink } from "node:fs/promises";
|
|
24
23
|
import { join } from "node:path";
|
|
25
24
|
import { loadConfig } from "../config.ts";
|
|
26
25
|
import { AgentError, ValidationError } from "../errors.ts";
|
|
@@ -30,6 +29,7 @@ import { printHint, printSuccess } from "../logging/color.ts";
|
|
|
30
29
|
import { createMulchClient } from "../mulch/client.ts";
|
|
31
30
|
import { openSessionStore } from "../sessions/compat.ts";
|
|
32
31
|
import type { AgentSession, MulchDoctorResult, MulchPruneResult, MulchStatus } from "../types.ts";
|
|
32
|
+
import { clearDirectory, deleteFile, resetJsonFile, wipeSqliteDb } from "../utils/fs.ts";
|
|
33
33
|
import { listWorktrees, removeWorktree } from "../worktree/manager.ts";
|
|
34
34
|
import {
|
|
35
35
|
isProcessAlive,
|
|
@@ -274,63 +274,6 @@ async function deleteOrphanedBranches(root: string): Promise<number> {
|
|
|
274
274
|
return deleted;
|
|
275
275
|
}
|
|
276
276
|
|
|
277
|
-
/**
|
|
278
|
-
* Delete a SQLite database file and its WAL/SHM companions.
|
|
279
|
-
*/
|
|
280
|
-
async function wipeSqliteDb(dbPath: string): Promise<boolean> {
|
|
281
|
-
const extensions = ["", "-wal", "-shm"];
|
|
282
|
-
let wiped = false;
|
|
283
|
-
for (const ext of extensions) {
|
|
284
|
-
try {
|
|
285
|
-
await unlink(`${dbPath}${ext}`);
|
|
286
|
-
if (ext === "") wiped = true;
|
|
287
|
-
} catch {
|
|
288
|
-
// File may not exist
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
return wiped;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
/**
|
|
295
|
-
* Reset a JSON file to an empty array.
|
|
296
|
-
*/
|
|
297
|
-
async function resetJsonFile(path: string): Promise<boolean> {
|
|
298
|
-
const file = Bun.file(path);
|
|
299
|
-
if (await file.exists()) {
|
|
300
|
-
await Bun.write(path, "[]\n");
|
|
301
|
-
return true;
|
|
302
|
-
}
|
|
303
|
-
return false;
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
/**
|
|
307
|
-
* Clear all entries inside a directory but keep the directory itself.
|
|
308
|
-
*/
|
|
309
|
-
async function clearDirectory(dirPath: string): Promise<boolean> {
|
|
310
|
-
try {
|
|
311
|
-
const entries = await readdir(dirPath);
|
|
312
|
-
for (const entry of entries) {
|
|
313
|
-
await rm(join(dirPath, entry), { recursive: true, force: true });
|
|
314
|
-
}
|
|
315
|
-
return entries.length > 0;
|
|
316
|
-
} catch {
|
|
317
|
-
// Directory may not exist
|
|
318
|
-
return false;
|
|
319
|
-
}
|
|
320
|
-
}
|
|
321
|
-
|
|
322
|
-
/**
|
|
323
|
-
* Delete a single file if it exists.
|
|
324
|
-
*/
|
|
325
|
-
async function deleteFile(path: string): Promise<boolean> {
|
|
326
|
-
try {
|
|
327
|
-
await unlink(path);
|
|
328
|
-
return true;
|
|
329
|
-
} catch {
|
|
330
|
-
return false;
|
|
331
|
-
}
|
|
332
|
-
}
|
|
333
|
-
|
|
334
277
|
/**
|
|
335
278
|
* Check mulch repository health and return diagnostic information.
|
|
336
279
|
*
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Tests for shell completion generation.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { describe, expect, it, mock } from "bun:test";
|
|
5
|
+
import { afterEach, describe, expect, it, mock } from "bun:test";
|
|
6
6
|
import {
|
|
7
7
|
COMMANDS,
|
|
8
8
|
completionsCommand,
|
|
@@ -11,9 +11,13 @@ import {
|
|
|
11
11
|
generateZsh,
|
|
12
12
|
} from "./completions.ts";
|
|
13
13
|
|
|
14
|
+
afterEach(() => {
|
|
15
|
+
process.exitCode = undefined;
|
|
16
|
+
});
|
|
17
|
+
|
|
14
18
|
describe("COMMANDS array", () => {
|
|
15
|
-
it("should have exactly
|
|
16
|
-
expect(COMMANDS).toHaveLength(
|
|
19
|
+
it("should have exactly 34 commands", () => {
|
|
20
|
+
expect(COMMANDS).toHaveLength(34);
|
|
17
21
|
});
|
|
18
22
|
|
|
19
23
|
it("should include all expected command names", () => {
|
|
@@ -37,6 +41,7 @@ describe("COMMANDS array", () => {
|
|
|
37
41
|
expect(names).toContain("costs");
|
|
38
42
|
expect(names).toContain("metrics");
|
|
39
43
|
expect(names).toContain("spec");
|
|
44
|
+
expect(names).toContain("orchestrator");
|
|
40
45
|
expect(names).toContain("coordinator");
|
|
41
46
|
expect(names).toContain("supervisor");
|
|
42
47
|
expect(names).toContain("hooks");
|
|
@@ -62,7 +67,7 @@ describe("generateBash", () => {
|
|
|
62
67
|
expect(script).toContain("_init_completion");
|
|
63
68
|
});
|
|
64
69
|
|
|
65
|
-
it("should include all
|
|
70
|
+
it("should include all 34 command names", () => {
|
|
66
71
|
const script = generateBash();
|
|
67
72
|
for (const cmd of COMMANDS) {
|
|
68
73
|
expect(script).toContain(cmd.name);
|
|
@@ -96,7 +101,7 @@ describe("generateZsh", () => {
|
|
|
96
101
|
expect(script).toContain("_arguments");
|
|
97
102
|
});
|
|
98
103
|
|
|
99
|
-
it("should include all
|
|
104
|
+
it("should include all 34 command names", () => {
|
|
100
105
|
const script = generateZsh();
|
|
101
106
|
for (const cmd of COMMANDS) {
|
|
102
107
|
expect(script).toContain(cmd.name);
|
|
@@ -126,7 +131,7 @@ describe("generateFish", () => {
|
|
|
126
131
|
expect(script).toContain("__fish_use_subcommand");
|
|
127
132
|
});
|
|
128
133
|
|
|
129
|
-
it("should include all
|
|
134
|
+
it("should include all 34 command names", () => {
|
|
130
135
|
const script = generateFish();
|
|
131
136
|
for (const cmd of COMMANDS) {
|
|
132
137
|
expect(script).toContain(cmd.name);
|
|
@@ -148,6 +153,13 @@ describe("generateFish", () => {
|
|
|
148
153
|
expect(script).toContain("error");
|
|
149
154
|
expect(script).toContain("worker_done");
|
|
150
155
|
});
|
|
156
|
+
|
|
157
|
+
it("should include orchestrator command with start/stop/status subcommands", () => {
|
|
158
|
+
const orchestrator = COMMANDS.find((c) => c.name === "orchestrator");
|
|
159
|
+
expect(orchestrator).toBeDefined();
|
|
160
|
+
const subcommands = orchestrator?.subcommands?.map((s) => s.name);
|
|
161
|
+
expect(subcommands).toEqual(["start", "stop", "status"]);
|
|
162
|
+
});
|
|
151
163
|
});
|
|
152
164
|
|
|
153
165
|
describe("completionsCommand", () => {
|
|
@@ -44,7 +44,16 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
44
44
|
name: "--capability",
|
|
45
45
|
desc: "Filter by capability",
|
|
46
46
|
takesValue: true,
|
|
47
|
-
values: [
|
|
47
|
+
values: [
|
|
48
|
+
"builder",
|
|
49
|
+
"scout",
|
|
50
|
+
"reviewer",
|
|
51
|
+
"lead",
|
|
52
|
+
"merger",
|
|
53
|
+
"orchestrator",
|
|
54
|
+
"coordinator",
|
|
55
|
+
"supervisor",
|
|
56
|
+
],
|
|
48
57
|
},
|
|
49
58
|
{ name: "--all", desc: "Include completed and zombie agents" },
|
|
50
59
|
{ name: "--json", desc: "JSON output" },
|
|
@@ -343,6 +352,36 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
343
352
|
},
|
|
344
353
|
],
|
|
345
354
|
},
|
|
355
|
+
{
|
|
356
|
+
name: "orchestrator",
|
|
357
|
+
desc: "Persistent ecosystem orchestrator agent",
|
|
358
|
+
flags: [
|
|
359
|
+
{ name: "--json", desc: "JSON output" },
|
|
360
|
+
{ name: "--help", desc: "Show help" },
|
|
361
|
+
],
|
|
362
|
+
subcommands: [
|
|
363
|
+
{
|
|
364
|
+
name: "start",
|
|
365
|
+
desc: "Start orchestrator",
|
|
366
|
+
flags: [
|
|
367
|
+
{ name: "--attach", desc: "Attach to tmux session" },
|
|
368
|
+
{ name: "--no-attach", desc: "Do not attach to tmux session" },
|
|
369
|
+
{ name: "--watchdog", desc: "Auto-start watchdog daemon" },
|
|
370
|
+
{ name: "--json", desc: "JSON output" },
|
|
371
|
+
],
|
|
372
|
+
},
|
|
373
|
+
{
|
|
374
|
+
name: "stop",
|
|
375
|
+
desc: "Stop orchestrator",
|
|
376
|
+
flags: [{ name: "--json", desc: "JSON output" }],
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
name: "status",
|
|
380
|
+
desc: "Show orchestrator state",
|
|
381
|
+
flags: [{ name: "--json", desc: "JSON output" }],
|
|
382
|
+
},
|
|
383
|
+
],
|
|
384
|
+
},
|
|
346
385
|
{
|
|
347
386
|
name: "coordinator",
|
|
348
387
|
desc: "Persistent coordinator agent",
|