@os-eco/overstory-cli 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +20 -6
  2. package/agents/coordinator.md +30 -6
  3. package/agents/lead.md +11 -1
  4. package/package.json +1 -1
  5. package/src/agents/hooks-deployer.test.ts +9 -1
  6. package/src/agents/hooks-deployer.ts +2 -1
  7. package/src/agents/overlay.test.ts +26 -0
  8. package/src/agents/overlay.ts +18 -4
  9. package/src/commands/agents.ts +1 -1
  10. package/src/commands/clean.test.ts +3 -0
  11. package/src/commands/clean.ts +1 -58
  12. package/src/commands/completions.test.ts +18 -6
  13. package/src/commands/completions.ts +40 -1
  14. package/src/commands/coordinator.test.ts +77 -4
  15. package/src/commands/coordinator.ts +226 -124
  16. package/src/commands/dashboard.ts +46 -9
  17. package/src/commands/doctor.ts +3 -1
  18. package/src/commands/ecosystem.test.ts +126 -1
  19. package/src/commands/ecosystem.ts +7 -53
  20. package/src/commands/feed.test.ts +117 -2
  21. package/src/commands/feed.ts +46 -30
  22. package/src/commands/group.test.ts +274 -155
  23. package/src/commands/group.ts +11 -5
  24. package/src/commands/init.ts +8 -0
  25. package/src/commands/log.test.ts +35 -0
  26. package/src/commands/log.ts +10 -6
  27. package/src/commands/logs.test.ts +423 -1
  28. package/src/commands/logs.ts +99 -104
  29. package/src/commands/orchestrator.ts +42 -0
  30. package/src/commands/prime.test.ts +177 -2
  31. package/src/commands/prime.ts +4 -2
  32. package/src/commands/sling.ts +3 -3
  33. package/src/commands/upgrade.test.ts +2 -0
  34. package/src/commands/upgrade.ts +1 -17
  35. package/src/commands/watch.test.ts +67 -1
  36. package/src/commands/watch.ts +4 -79
  37. package/src/config.test.ts +250 -0
  38. package/src/config.ts +43 -0
  39. package/src/doctor/agents.test.ts +72 -5
  40. package/src/doctor/agents.ts +10 -10
  41. package/src/doctor/consistency.test.ts +35 -0
  42. package/src/doctor/consistency.ts +7 -3
  43. package/src/doctor/dependencies.test.ts +58 -1
  44. package/src/doctor/dependencies.ts +4 -2
  45. package/src/doctor/providers.test.ts +41 -5
  46. package/src/doctor/types.ts +2 -1
  47. package/src/doctor/version.test.ts +106 -2
  48. package/src/doctor/version.ts +4 -2
  49. package/src/doctor/watchdog.test.ts +167 -0
  50. package/src/doctor/watchdog.ts +158 -0
  51. package/src/e2e/init-sling-lifecycle.test.ts +2 -1
  52. package/src/errors.test.ts +350 -0
  53. package/src/events/tailer.test.ts +25 -0
  54. package/src/events/tailer.ts +8 -1
  55. package/src/index.ts +4 -1
  56. package/src/mail/store.test.ts +110 -0
  57. package/src/runtimes/aider.test.ts +124 -0
  58. package/src/runtimes/aider.ts +147 -0
  59. package/src/runtimes/amp.test.ts +164 -0
  60. package/src/runtimes/amp.ts +154 -0
  61. package/src/runtimes/claude.test.ts +4 -2
  62. package/src/runtimes/goose.test.ts +133 -0
  63. package/src/runtimes/goose.ts +157 -0
  64. package/src/runtimes/pi-guards.ts +2 -1
  65. package/src/runtimes/pi.test.ts +9 -9
  66. package/src/runtimes/pi.ts +6 -7
  67. package/src/runtimes/registry.test.ts +1 -1
  68. package/src/runtimes/registry.ts +13 -4
  69. package/src/runtimes/sapling.ts +2 -1
  70. package/src/runtimes/types.ts +2 -2
  71. package/src/types.ts +4 -0
  72. package/src/utils/bin.test.ts +10 -0
  73. package/src/utils/bin.ts +37 -0
  74. package/src/utils/fs.test.ts +119 -0
  75. package/src/utils/fs.ts +62 -0
  76. package/src/utils/pid.test.ts +68 -0
  77. package/src/utils/pid.ts +45 -0
  78. package/src/utils/time.test.ts +43 -0
  79. package/src/utils/time.ts +37 -0
  80. package/src/utils/version.test.ts +33 -0
  81. package/src/utils/version.ts +70 -0
  82. package/src/watchdog/daemon.test.ts +255 -1
  83. package/src/watchdog/daemon.ts +46 -9
  84. package/src/watchdog/health.test.ts +15 -1
  85. package/src/watchdog/health.ts +1 -1
  86. package/src/watchdog/triage.test.ts +49 -9
  87. package/src/watchdog/triage.ts +21 -5
package/README.md CHANGED
@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
6
6
  [![CI](https://github.com/jayminwest/overstory/actions/workflows/ci.yml/badge.svg)](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
7
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
8
 
9
- Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
9
+ Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
10
10
 
11
11
  > **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
12
12
 
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
22
22
  - [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
23
23
  - [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
24
24
  - [OpenCode](https://opencode.ai) (`opencode` CLI)
25
+ - [Aider](https://aider.chat) (`aider` CLI)
26
+ - [Goose](https://github.com/block/goose) (`goose` CLI)
27
+ - [Amp](https://amp.dev) (`amp` CLI)
25
28
 
26
29
  ```bash
27
30
  bun install -g @os-eco/overstory-cli
@@ -102,6 +105,12 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
102
105
  | `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
103
106
  | `ov coordinator output` | Show recent coordinator output (`--lines`) |
104
107
  | `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
108
+ | `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
109
+ | `ov orchestrator stop` | Stop orchestrator |
110
+ | `ov orchestrator status` | Show orchestrator state |
111
+ | `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
112
+ | `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
113
+ | `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
105
114
  | `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
106
115
  | `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
107
116
  | `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
@@ -165,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
165
174
  | `ov monitor status` | Show monitor state |
166
175
  | `ov log <event>` | Log a hook event (`--agent`) |
167
176
  | `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
168
- | `ov doctor` | Run health checks on overstory setup — 11 categories (`--category`, `--fix`, `--json`) |
177
+ | `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
169
178
  | `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
170
179
  | `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
171
180
  | `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
@@ -188,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
188
197
  | Cursor | `agent` | (none — `--yolo`) | Experimental |
189
198
  | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
190
199
  | Gemini | `gemini` | `--sandbox` flag | Experimental |
200
+ | Aider | `aider` | (none — `--yes-always`) | Experimental |
201
+ | Goose | `goose` | Profile-based permissions | Experimental |
202
+ | Amp | `amp` | Built-in approval system | Experimental |
191
203
  | OpenCode | `opencode` | (none) | Experimental |
192
204
 
193
205
  ## How It Works
@@ -237,7 +249,7 @@ overstory/
237
249
  config.ts Config loader + validation
238
250
  errors.ts Custom error types
239
251
  json.ts Standardized JSON envelope helpers
240
- commands/ One file per CLI subcommand (36 commands)
252
+ commands/ One file per CLI subcommand (37 commands)
241
253
  agents.ts Agent discovery and querying
242
254
  coordinator.ts Persistent orchestrator lifecycle
243
255
  supervisor.ts Team lead management [DEPRECATED]
@@ -260,7 +272,7 @@ overstory/
260
272
  run.ts Orchestration run lifecycle
261
273
  trace.ts Agent/task timeline viewing
262
274
  clean.ts Worktree/session cleanup
263
- doctor.ts Health check runner (11 check modules)
275
+ doctor.ts Health check runner (12 check modules)
264
276
  inspect.ts Deep per-agent inspection
265
277
  spec.ts Task spec management
266
278
  errors.ts Aggregated error view
@@ -272,6 +284,7 @@ overstory/
272
284
  update.ts Refresh managed files
273
285
  upgrade.ts npm version upgrades
274
286
  discover.ts Brownfield codebase discovery via coordinator-driven scout swarm
287
+ orchestrator.ts Multi-repo coordination (PersistentAgentSpec)
275
288
  completions.ts Shell completion generation (bash/zsh/fish)
276
289
  canopy/
277
290
  client.ts Canopy client (prompt rendering, listing, emission)
@@ -289,9 +302,10 @@ overstory/
289
302
  watchdog/ Tiered health monitoring (daemon, triage, health)
290
303
  logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
291
304
  metrics/ SQLite metrics + pricing + transcript parsing
292
- doctor/ Health check modules (11 checks)
305
+ doctor/ Health check modules (12 checks)
306
+ utils/ Shared utilities (bin, fs, pid, time, version)
293
307
  insights/ Session insight analyzer for auto-expertise
294
- runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
308
+ runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
295
309
  tracker/ Pluggable task tracker (beads + seeds backends)
296
310
  mulch/ mulch client (programmatic API + CLI wrapper)
297
311
  e2e/ End-to-end lifecycle tests
@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
11
11
  - **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
12
12
  - **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
13
13
  - **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
14
+ - **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
14
15
 
15
16
  ## failure-modes
16
17
 
17
18
  These are named failures. If you catch yourself doing any of these, stop and correct immediately.
18
19
 
19
- - **HIERARCHY_BYPASS** -- Spawning a builder, scout, reviewer, or merger directly without going through a lead. The coordinator dispatches leads only. Leads handle all downstream agent management. This is code-enforced but you should not even attempt it.
20
+ - **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
20
21
  - **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
21
22
  - **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
22
23
  - **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
46
47
  - **NEVER** use the Write tool on any file. You have no write access.
47
48
  - **NEVER** use the Edit tool on any file. You have no write access.
48
49
  - **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
49
- - **NEVER** spawn builders, scouts, reviewers, or mergers directly. Only spawn leads. This is enforced by `sling.ts` (HierarchyError).
50
+ - **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
50
51
  - **NEVER** run bash commands that modify source code, dependencies, or git history:
51
52
  - No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
52
53
  - No `rm`, `mv`, `cp`, `mkdir` on source directories
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
118
119
 
119
120
  ## role
120
121
 
121
- You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, lead dispatches, and coordination messages -- never code, never specs.
122
+ You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
122
123
 
123
124
  ## capabilities
124
125
 
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
128
129
  - **Grep** -- search file contents with regex
129
130
  - **Bash** (coordination commands only):
130
131
  - `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
131
- - `ov sling` (spawn lead agents into worktrees)
132
+ - `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
132
133
  - `ov status` (monitor active agents and worktrees)
133
134
  - `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
134
135
  - `ov nudge <agent> [message]` (poke stalled leads)
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
141
142
 
142
143
  ### Spawning Agents
143
144
 
144
- **You may ONLY spawn leads. This is code-enforced by `sling.ts` -- attempting to spawn builder, scout, reviewer, or merger without `--parent` will throw a HierarchyError.**
145
+ **Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
145
146
 
146
147
  ```bash
147
148
  ov sling <task-id> \
@@ -150,7 +151,20 @@ ov sling <task-id> \
150
151
  --depth 1
151
152
  ```
152
153
 
153
- You are always at depth 0. Leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2. This is the designed hierarchy:
154
+ Low-budget fallback examples:
155
+
156
+ ```bash
157
+ # Direct scout: coordinator is acting as combined coordinator/lead
158
+ ov sling <task-id> --capability scout --name <scout-name> --depth 1
159
+
160
+ # Direct builder for a small, concrete task that does not need a separate lead/spec cycle
161
+ ov sling <task-id> --capability builder --name <builder-name> --depth 1
162
+
163
+ # Compressed lead: keep the lead, but force it to act as lead/worker
164
+ ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
165
+ ```
166
+
167
+ You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
154
168
 
155
169
  ```
156
170
  Coordinator (you, depth 0)
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
160
174
  └── Reviewer (depth 2) — validates quality
161
175
  ```
162
176
 
177
+ Compressed hierarchy is also valid when you are deliberately minimizing agent count:
178
+
179
+ ```
180
+ Coordinator (you, depth 0, acting as coordinator/lead)
181
+ └── Scout or Builder (depth 1)
182
+ ```
183
+
163
184
  ### Communication
164
185
  - **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
165
186
  - **Check inbox:** `ov mail check` (unread messages)
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
206
227
  ```bash
207
228
  ov sling <task-id> --capability lead --name <lead-name> --depth 1
208
229
  ```
230
+ If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
231
+ - Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
232
+ - Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
209
233
  6. **Send dispatch mail** to each lead with the high-level objective:
210
234
  ```bash
211
235
  ov mail send --to <lead-name> --subject "Work stream: <title>" \
package/agents/lead.md CHANGED
@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
9
9
  - **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
10
10
  - **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
11
11
 
12
+ Budget compression rules:
13
+ - **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
14
+ - **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
15
+ - **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
16
+
12
17
  Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
13
18
 
14
19
  ## cost-awareness
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
19
24
 
20
25
  Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
21
26
 
27
+ When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
28
+
22
29
  Where to actually save tokens:
23
30
  - Prefer fewer, well-scoped builders over many small ones.
24
31
  - Batch status updates instead of sending per-worker messages.
@@ -143,7 +150,7 @@ Criteria — ANY:
143
150
  - Straightforward implementation with clear spec
144
151
  - Single builder can handle the full scope
145
152
 
146
- Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
153
+ Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
147
154
 
148
155
  ### Complex Tasks (Full Pipeline)
149
156
  Criteria — ANY:
@@ -153,6 +160,9 @@ Criteria — ANY:
153
160
  - Multiple builders needed with file scope partitioning
154
161
 
155
162
  Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
163
+ If your overlay budget is too small to support that pipeline, compress roles deliberately:
164
+ - With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
165
+ - With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
156
166
 
157
167
  ## three-phase-workflow
158
168
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@os-eco/overstory-cli",
3
- "version": "0.9.1",
3
+ "version": "0.9.2",
4
4
  "description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
5
5
  "author": "Jaymin West",
6
6
  "license": "MIT",
@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
2555
2555
  });
2556
2556
 
2557
2557
  test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
2558
- const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
2558
+ const capabilities = [
2559
+ "builder",
2560
+ "scout",
2561
+ "reviewer",
2562
+ "lead",
2563
+ "merger",
2564
+ "orchestrator",
2565
+ "coordinator",
2566
+ ];
2559
2567
 
2560
2568
  for (const cap of capabilities) {
2561
2569
  const wt = join(tempDir, `${cap}-tc-wt`);
@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
20
20
  "scout",
21
21
  "reviewer",
22
22
  "lead",
23
+ "orchestrator",
23
24
  "coordinator",
24
25
  "supervisor",
25
26
  "monitor",
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
29
30
  * Capabilities that coordinate work and need git add/commit for syncing
30
31
  * tasks, mulch, and other metadata — but must NOT git push.
31
32
  */
32
- const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
33
+ const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
33
34
 
34
35
  /**
35
36
  * Additional safe Bash prefixes for coordination capabilities.
@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
523
523
  expect(output).toContain("3");
524
524
  });
525
525
 
526
+ test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
527
+ const config = makeConfig({
528
+ capability: "lead",
529
+ maxAgentsOverride: 1,
530
+ canSpawn: true,
531
+ });
532
+ const output = await generateOverlay(config);
533
+
534
+ expect(output).toContain("MAX AGENTS");
535
+ expect(output).toContain("combined **lead/worker**");
536
+ expect(output).toContain("only slot");
537
+ });
538
+
539
+ test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
540
+ const config = makeConfig({
541
+ capability: "lead",
542
+ maxAgentsOverride: 2,
543
+ canSpawn: true,
544
+ });
545
+ const output = await generateOverlay(config);
546
+
547
+ expect(output).toContain("MAX AGENTS");
548
+ expect(output).toContain("compressed mode");
549
+ expect(output).toContain("self-verification");
550
+ });
551
+
526
552
  test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
527
553
  const config = makeConfig({
528
554
  capability: "lead",
@@ -102,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
102
102
  }
103
103
 
104
104
  if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
105
- sections.push(
106
- `- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
107
- "Do not spawn more than this many sub-workers.",
108
- );
105
+ if (config.maxAgentsOverride === 1) {
106
+ sections.push(
107
+ "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
108
+ "Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
109
+ "Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
110
+ );
111
+ } else if (config.maxAgentsOverride === 2) {
112
+ sections.push(
113
+ "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
114
+ "Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
115
+ "Prefer self-verification over spawning a separate reviewer.",
116
+ );
117
+ } else {
118
+ sections.push(
119
+ `- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
120
+ "Do not spawn more than this many sub-workers.",
121
+ );
122
+ }
109
123
  }
110
124
 
111
125
  if (sections.length === 0) return "";
@@ -223,7 +223,7 @@ export function createAgentsCommand(): Command {
223
223
  .description("Find active agents by capability")
224
224
  .option(
225
225
  "--capability <type>",
226
- "Filter by capability (builder, scout, reviewer, lead, merger, coordinator, supervisor)",
226
+ "Filter by capability (builder, scout, reviewer, lead, merger, orchestrator, coordinator, supervisor)",
227
227
  )
228
228
  .option("--all", "Include completed and zombie agents (default: active only)")
229
229
  .option("--json", "Output as JSON")
@@ -792,3 +792,6 @@ describe("--agent", () => {
792
792
  expect(stdoutOutput).toContain("Agent cleaned");
793
793
  });
794
794
  });
795
+
796
+ // fs utility tests (wipeSqliteDb, resetJsonFile, clearDirectory, deleteFile)
797
+ // moved to src/utils/fs.test.ts
@@ -20,7 +20,6 @@
20
20
  */
21
21
 
22
22
  import { existsSync } from "node:fs";
23
- import { readdir, rm, unlink } from "node:fs/promises";
24
23
  import { join } from "node:path";
25
24
  import { loadConfig } from "../config.ts";
26
25
  import { AgentError, ValidationError } from "../errors.ts";
@@ -30,6 +29,7 @@ import { printHint, printSuccess } from "../logging/color.ts";
30
29
  import { createMulchClient } from "../mulch/client.ts";
31
30
  import { openSessionStore } from "../sessions/compat.ts";
32
31
  import type { AgentSession, MulchDoctorResult, MulchPruneResult, MulchStatus } from "../types.ts";
32
+ import { clearDirectory, deleteFile, resetJsonFile, wipeSqliteDb } from "../utils/fs.ts";
33
33
  import { listWorktrees, removeWorktree } from "../worktree/manager.ts";
34
34
  import {
35
35
  isProcessAlive,
@@ -274,63 +274,6 @@ async function deleteOrphanedBranches(root: string): Promise<number> {
274
274
  return deleted;
275
275
  }
276
276
 
277
- /**
278
- * Delete a SQLite database file and its WAL/SHM companions.
279
- */
280
- async function wipeSqliteDb(dbPath: string): Promise<boolean> {
281
- const extensions = ["", "-wal", "-shm"];
282
- let wiped = false;
283
- for (const ext of extensions) {
284
- try {
285
- await unlink(`${dbPath}${ext}`);
286
- if (ext === "") wiped = true;
287
- } catch {
288
- // File may not exist
289
- }
290
- }
291
- return wiped;
292
- }
293
-
294
- /**
295
- * Reset a JSON file to an empty array.
296
- */
297
- async function resetJsonFile(path: string): Promise<boolean> {
298
- const file = Bun.file(path);
299
- if (await file.exists()) {
300
- await Bun.write(path, "[]\n");
301
- return true;
302
- }
303
- return false;
304
- }
305
-
306
- /**
307
- * Clear all entries inside a directory but keep the directory itself.
308
- */
309
- async function clearDirectory(dirPath: string): Promise<boolean> {
310
- try {
311
- const entries = await readdir(dirPath);
312
- for (const entry of entries) {
313
- await rm(join(dirPath, entry), { recursive: true, force: true });
314
- }
315
- return entries.length > 0;
316
- } catch {
317
- // Directory may not exist
318
- return false;
319
- }
320
- }
321
-
322
- /**
323
- * Delete a single file if it exists.
324
- */
325
- async function deleteFile(path: string): Promise<boolean> {
326
- try {
327
- await unlink(path);
328
- return true;
329
- } catch {
330
- return false;
331
- }
332
- }
333
-
334
277
  /**
335
278
  * Check mulch repository health and return diagnostic information.
336
279
  *
@@ -2,7 +2,7 @@
2
2
  * Tests for shell completion generation.
3
3
  */
4
4
 
5
- import { describe, expect, it, mock } from "bun:test";
5
+ import { afterEach, describe, expect, it, mock } from "bun:test";
6
6
  import {
7
7
  COMMANDS,
8
8
  completionsCommand,
@@ -11,9 +11,13 @@ import {
11
11
  generateZsh,
12
12
  } from "./completions.ts";
13
13
 
14
+ afterEach(() => {
15
+ process.exitCode = undefined;
16
+ });
17
+
14
18
  describe("COMMANDS array", () => {
15
- it("should have exactly 33 commands", () => {
16
- expect(COMMANDS).toHaveLength(33);
19
+ it("should have exactly 34 commands", () => {
20
+ expect(COMMANDS).toHaveLength(34);
17
21
  });
18
22
 
19
23
  it("should include all expected command names", () => {
@@ -37,6 +41,7 @@ describe("COMMANDS array", () => {
37
41
  expect(names).toContain("costs");
38
42
  expect(names).toContain("metrics");
39
43
  expect(names).toContain("spec");
44
+ expect(names).toContain("orchestrator");
40
45
  expect(names).toContain("coordinator");
41
46
  expect(names).toContain("supervisor");
42
47
  expect(names).toContain("hooks");
@@ -62,7 +67,7 @@ describe("generateBash", () => {
62
67
  expect(script).toContain("_init_completion");
63
68
  });
64
69
 
65
- it("should include all 33 command names", () => {
70
+ it("should include all 34 command names", () => {
66
71
  const script = generateBash();
67
72
  for (const cmd of COMMANDS) {
68
73
  expect(script).toContain(cmd.name);
@@ -96,7 +101,7 @@ describe("generateZsh", () => {
96
101
  expect(script).toContain("_arguments");
97
102
  });
98
103
 
99
- it("should include all 33 command names", () => {
104
+ it("should include all 34 command names", () => {
100
105
  const script = generateZsh();
101
106
  for (const cmd of COMMANDS) {
102
107
  expect(script).toContain(cmd.name);
@@ -126,7 +131,7 @@ describe("generateFish", () => {
126
131
  expect(script).toContain("__fish_use_subcommand");
127
132
  });
128
133
 
129
- it("should include all 33 command names", () => {
134
+ it("should include all 34 command names", () => {
130
135
  const script = generateFish();
131
136
  for (const cmd of COMMANDS) {
132
137
  expect(script).toContain(cmd.name);
@@ -148,6 +153,13 @@ describe("generateFish", () => {
148
153
  expect(script).toContain("error");
149
154
  expect(script).toContain("worker_done");
150
155
  });
156
+
157
+ it("should include orchestrator command with start/stop/status subcommands", () => {
158
+ const orchestrator = COMMANDS.find((c) => c.name === "orchestrator");
159
+ expect(orchestrator).toBeDefined();
160
+ const subcommands = orchestrator?.subcommands?.map((s) => s.name);
161
+ expect(subcommands).toEqual(["start", "stop", "status"]);
162
+ });
151
163
  });
152
164
 
153
165
  describe("completionsCommand", () => {
@@ -44,7 +44,16 @@ export const COMMANDS: readonly CommandDef[] = [
44
44
  name: "--capability",
45
45
  desc: "Filter by capability",
46
46
  takesValue: true,
47
- values: ["builder", "scout", "reviewer", "lead", "merger", "coordinator", "supervisor"],
47
+ values: [
48
+ "builder",
49
+ "scout",
50
+ "reviewer",
51
+ "lead",
52
+ "merger",
53
+ "orchestrator",
54
+ "coordinator",
55
+ "supervisor",
56
+ ],
48
57
  },
49
58
  { name: "--all", desc: "Include completed and zombie agents" },
50
59
  { name: "--json", desc: "JSON output" },
@@ -343,6 +352,36 @@ export const COMMANDS: readonly CommandDef[] = [
343
352
  },
344
353
  ],
345
354
  },
355
+ {
356
+ name: "orchestrator",
357
+ desc: "Persistent ecosystem orchestrator agent",
358
+ flags: [
359
+ { name: "--json", desc: "JSON output" },
360
+ { name: "--help", desc: "Show help" },
361
+ ],
362
+ subcommands: [
363
+ {
364
+ name: "start",
365
+ desc: "Start orchestrator",
366
+ flags: [
367
+ { name: "--attach", desc: "Attach to tmux session" },
368
+ { name: "--no-attach", desc: "Do not attach to tmux session" },
369
+ { name: "--watchdog", desc: "Auto-start watchdog daemon" },
370
+ { name: "--json", desc: "JSON output" },
371
+ ],
372
+ },
373
+ {
374
+ name: "stop",
375
+ desc: "Stop orchestrator",
376
+ flags: [{ name: "--json", desc: "JSON output" }],
377
+ },
378
+ {
379
+ name: "status",
380
+ desc: "Show orchestrator state",
381
+ flags: [{ name: "--json", desc: "JSON output" }],
382
+ },
383
+ ],
384
+ },
346
385
  {
347
386
  name: "coordinator",
348
387
  desc: "Persistent coordinator agent",