@os-eco/overstory-cli 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +21 -6
  2. package/agents/coordinator.md +34 -10
  3. package/agents/lead.md +11 -1
  4. package/package.json +1 -1
  5. package/src/agents/copilot-hooks-deployer.test.ts +162 -0
  6. package/src/agents/copilot-hooks-deployer.ts +93 -0
  7. package/src/agents/hooks-deployer.test.ts +9 -1
  8. package/src/agents/hooks-deployer.ts +2 -1
  9. package/src/agents/overlay.test.ts +26 -0
  10. package/src/agents/overlay.ts +18 -4
  11. package/src/beads/client.ts +31 -3
  12. package/src/commands/agents.ts +1 -1
  13. package/src/commands/clean.test.ts +3 -0
  14. package/src/commands/clean.ts +1 -58
  15. package/src/commands/completions.test.ts +18 -6
  16. package/src/commands/completions.ts +40 -1
  17. package/src/commands/coordinator.test.ts +77 -4
  18. package/src/commands/coordinator.ts +228 -125
  19. package/src/commands/dashboard.ts +50 -10
  20. package/src/commands/doctor.ts +3 -1
  21. package/src/commands/ecosystem.test.ts +126 -1
  22. package/src/commands/ecosystem.ts +7 -53
  23. package/src/commands/feed.test.ts +117 -2
  24. package/src/commands/feed.ts +46 -30
  25. package/src/commands/group.test.ts +274 -155
  26. package/src/commands/group.ts +11 -5
  27. package/src/commands/init.ts +50 -0
  28. package/src/commands/inspect.ts +8 -4
  29. package/src/commands/log.test.ts +35 -0
  30. package/src/commands/log.ts +10 -6
  31. package/src/commands/logs.test.ts +423 -1
  32. package/src/commands/logs.ts +99 -104
  33. package/src/commands/monitor.ts +8 -2
  34. package/src/commands/orchestrator.ts +42 -0
  35. package/src/commands/prime.test.ts +177 -2
  36. package/src/commands/prime.ts +4 -2
  37. package/src/commands/sling.ts +8 -3
  38. package/src/commands/upgrade.test.ts +2 -0
  39. package/src/commands/upgrade.ts +1 -17
  40. package/src/commands/watch.test.ts +67 -1
  41. package/src/commands/watch.ts +4 -79
  42. package/src/config.test.ts +250 -0
  43. package/src/config.ts +43 -0
  44. package/src/doctor/agents.test.ts +72 -5
  45. package/src/doctor/agents.ts +10 -10
  46. package/src/doctor/consistency.test.ts +35 -0
  47. package/src/doctor/consistency.ts +7 -3
  48. package/src/doctor/dependencies.test.ts +58 -1
  49. package/src/doctor/dependencies.ts +4 -2
  50. package/src/doctor/providers.test.ts +41 -5
  51. package/src/doctor/types.ts +2 -1
  52. package/src/doctor/version.test.ts +106 -2
  53. package/src/doctor/version.ts +4 -2
  54. package/src/doctor/watchdog.test.ts +167 -0
  55. package/src/doctor/watchdog.ts +158 -0
  56. package/src/e2e/init-sling-lifecycle.test.ts +2 -1
  57. package/src/errors.test.ts +350 -0
  58. package/src/events/tailer.test.ts +25 -0
  59. package/src/events/tailer.ts +8 -1
  60. package/src/index.ts +4 -1
  61. package/src/mail/store.test.ts +110 -0
  62. package/src/runtimes/aider.test.ts +124 -0
  63. package/src/runtimes/aider.ts +147 -0
  64. package/src/runtimes/amp.test.ts +164 -0
  65. package/src/runtimes/amp.ts +154 -0
  66. package/src/runtimes/claude.test.ts +4 -2
  67. package/src/runtimes/codex.test.ts +38 -1
  68. package/src/runtimes/codex.ts +22 -3
  69. package/src/runtimes/copilot.test.ts +213 -13
  70. package/src/runtimes/copilot.ts +93 -11
  71. package/src/runtimes/goose.test.ts +133 -0
  72. package/src/runtimes/goose.ts +157 -0
  73. package/src/runtimes/pi-guards.ts +2 -1
  74. package/src/runtimes/pi.test.ts +33 -9
  75. package/src/runtimes/pi.ts +10 -10
  76. package/src/runtimes/registry.test.ts +1 -1
  77. package/src/runtimes/registry.ts +13 -4
  78. package/src/runtimes/sapling.ts +2 -1
  79. package/src/runtimes/types.ts +9 -2
  80. package/src/tracker/factory.test.ts +10 -0
  81. package/src/tracker/factory.ts +3 -2
  82. package/src/types.ts +4 -0
  83. package/src/utils/bin.test.ts +10 -0
  84. package/src/utils/bin.ts +37 -0
  85. package/src/utils/fs.test.ts +119 -0
  86. package/src/utils/fs.ts +62 -0
  87. package/src/utils/pid.test.ts +68 -0
  88. package/src/utils/pid.ts +45 -0
  89. package/src/utils/time.test.ts +43 -0
  90. package/src/utils/time.ts +37 -0
  91. package/src/utils/version.test.ts +33 -0
  92. package/src/utils/version.ts +70 -0
  93. package/src/watchdog/daemon.test.ts +255 -1
  94. package/src/watchdog/daemon.ts +46 -9
  95. package/src/watchdog/health.test.ts +15 -1
  96. package/src/watchdog/health.ts +1 -1
  97. package/src/watchdog/triage.test.ts +49 -9
  98. package/src/watchdog/triage.ts +21 -5
  99. package/src/worktree/tmux.test.ts +166 -49
  100. package/src/worktree/tmux.ts +36 -37
  101. package/templates/copilot-hooks.json.tmpl +13 -0
package/README.md CHANGED
@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
6
6
  [![CI](https://github.com/jayminwest/overstory/actions/workflows/ci.yml/badge.svg)](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
7
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
8
 
9
- Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
9
+ Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
10
10
 
11
11
  > **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
12
12
 
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
22
22
  - [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
23
23
  - [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
24
24
  - [OpenCode](https://opencode.ai) (`opencode` CLI)
25
+ - [Aider](https://aider.chat) (`aider` CLI)
26
+ - [Goose](https://github.com/block/goose) (`goose` CLI)
27
+ - [Amp](https://amp.dev) (`amp` CLI)
25
28
 
26
29
  ```bash
27
30
  bun install -g @os-eco/overstory-cli
@@ -102,6 +105,12 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
102
105
  | `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
103
106
  | `ov coordinator output` | Show recent coordinator output (`--lines`) |
104
107
  | `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
108
+ | `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
109
+ | `ov orchestrator stop` | Stop orchestrator |
110
+ | `ov orchestrator status` | Show orchestrator state |
111
+ | `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
112
+ | `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
113
+ | `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
105
114
  | `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
106
115
  | `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
107
116
  | `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
@@ -165,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
165
174
  | `ov monitor status` | Show monitor state |
166
175
  | `ov log <event>` | Log a hook event (`--agent`) |
167
176
  | `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
168
- | `ov doctor` | Run health checks on overstory setup — 11 categories (`--category`, `--fix`, `--json`) |
177
+ | `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
169
178
  | `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
170
179
  | `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
171
180
  | `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
@@ -188,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
188
197
  | Cursor | `agent` | (none — `--yolo`) | Experimental |
189
198
  | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
190
199
  | Gemini | `gemini` | `--sandbox` flag | Experimental |
200
+ | Aider | `aider` | (none — `--yes-always`) | Experimental |
201
+ | Goose | `goose` | Profile-based permissions | Experimental |
202
+ | Amp | `amp` | Built-in approval system | Experimental |
191
203
  | OpenCode | `opencode` | (none) | Experimental |
192
204
 
193
205
  ## How It Works
@@ -237,7 +249,7 @@ overstory/
237
249
  config.ts Config loader + validation
238
250
  errors.ts Custom error types
239
251
  json.ts Standardized JSON envelope helpers
240
- commands/ One file per CLI subcommand (36 commands)
252
+ commands/ One file per CLI subcommand (37 commands)
241
253
  agents.ts Agent discovery and querying
242
254
  coordinator.ts Persistent orchestrator lifecycle
243
255
  supervisor.ts Team lead management [DEPRECATED]
@@ -260,7 +272,7 @@ overstory/
260
272
  run.ts Orchestration run lifecycle
261
273
  trace.ts Agent/task timeline viewing
262
274
  clean.ts Worktree/session cleanup
263
- doctor.ts Health check runner (11 check modules)
275
+ doctor.ts Health check runner (12 check modules)
264
276
  inspect.ts Deep per-agent inspection
265
277
  spec.ts Task spec management
266
278
  errors.ts Aggregated error view
@@ -272,6 +284,7 @@ overstory/
272
284
  update.ts Refresh managed files
273
285
  upgrade.ts npm version upgrades
274
286
  discover.ts Brownfield codebase discovery via coordinator-driven scout swarm
287
+ orchestrator.ts Multi-repo coordination (PersistentAgentSpec)
275
288
  completions.ts Shell completion generation (bash/zsh/fish)
276
289
  canopy/
277
290
  client.ts Canopy client (prompt rendering, listing, emission)
@@ -282,6 +295,7 @@ overstory/
282
295
  checkpoint.ts Session checkpoint save/restore
283
296
  lifecycle.ts Handoff orchestration
284
297
  hooks-deployer.ts Deploy hooks + tool enforcement
298
+ copilot-hooks-deployer.ts Deploy hooks config to Copilot worktrees
285
299
  guard-rules.ts Shared guard constants (tool lists, bash patterns)
286
300
  worktree/ Git worktree + tmux management
287
301
  mail/ SQLite mail system (typed protocol, broadcast)
@@ -289,9 +303,10 @@ overstory/
289
303
  watchdog/ Tiered health monitoring (daemon, triage, health)
290
304
  logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
291
305
  metrics/ SQLite metrics + pricing + transcript parsing
292
- doctor/ Health check modules (11 checks)
306
+ doctor/ Health check modules (12 checks)
307
+ utils/ Shared utilities (bin, fs, pid, time, version)
293
308
  insights/ Session insight analyzer for auto-expertise
294
- runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
309
+ runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
295
310
  tracker/ Pluggable task tracker (beads + seeds backends)
296
311
  mulch/ mulch client (programmatic API + CLI wrapper)
297
312
  e2e/ End-to-end lifecycle tests
@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
11
11
  - **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
12
12
  - **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
13
13
  - **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
14
+ - **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
14
15
 
15
16
  ## failure-modes
16
17
 
17
18
  These are named failures. If you catch yourself doing any of these, stop and correct immediately.
18
19
 
19
- - **HIERARCHY_BYPASS** -- Spawning a builder, scout, reviewer, or merger directly without going through a lead. The coordinator dispatches leads only. Leads handle all downstream agent management. This is code-enforced but you should not even attempt it.
20
+ - **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
20
21
  - **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
21
22
  - **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
22
23
  - **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
46
47
  - **NEVER** use the Write tool on any file. You have no write access.
47
48
  - **NEVER** use the Edit tool on any file. You have no write access.
48
49
  - **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
49
- - **NEVER** spawn builders, scouts, reviewers, or mergers directly. Only spawn leads. This is enforced by `sling.ts` (HierarchyError).
50
+ - **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
50
51
  - **NEVER** run bash commands that modify source code, dependencies, or git history:
51
52
  - No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
52
53
  - No `rm`, `mv`, `cp`, `mkdir` on source directories
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
118
119
 
119
120
  ## role
120
121
 
121
- You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, lead dispatches, and coordination messages -- never code, never specs.
122
+ You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
122
123
 
123
124
  ## capabilities
124
125
 
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
128
129
  - **Grep** -- search file contents with regex
129
130
  - **Bash** (coordination commands only):
130
131
  - `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
131
- - `ov sling` (spawn lead agents into worktrees)
132
+ - `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
132
133
  - `ov status` (monitor active agents and worktrees)
133
134
  - `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
134
135
  - `ov nudge <agent> [message]` (poke stalled leads)
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
141
142
 
142
143
  ### Spawning Agents
143
144
 
144
- **You may ONLY spawn leads. This is code-enforced by `sling.ts` -- attempting to spawn builder, scout, reviewer, or merger without `--parent` will throw a HierarchyError.**
145
+ **Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
145
146
 
146
147
  ```bash
147
148
  ov sling <task-id> \
@@ -150,7 +151,20 @@ ov sling <task-id> \
150
151
  --depth 1
151
152
  ```
152
153
 
153
- You are always at depth 0. Leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2. This is the designed hierarchy:
154
+ Low-budget fallback examples:
155
+
156
+ ```bash
157
+ # Direct scout: coordinator is acting as combined coordinator/lead
158
+ ov sling <task-id> --capability scout --name <scout-name> --depth 1
159
+
160
+ # Direct builder for a small, concrete task that does not need a separate lead/spec cycle
161
+ ov sling <task-id> --capability builder --name <builder-name> --depth 1
162
+
163
+ # Compressed lead: keep the lead, but force it to act as lead/worker
164
+ ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
165
+ ```
166
+
167
+ You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
154
168
 
155
169
  ```
156
170
  Coordinator (you, depth 0)
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
160
174
  └── Reviewer (depth 2) — validates quality
161
175
  ```
162
176
 
177
+ Compressed hierarchy is also valid when you are deliberately minimizing agent count:
178
+
179
+ ```
180
+ Coordinator (you, depth 0, acting as coordinator/lead)
181
+ └── Scout or Builder (depth 1)
182
+ ```
183
+
163
184
  ### Communication
164
185
  - **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
165
186
  - **Check inbox:** `ov mail check` (unread messages)
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
206
227
  ```bash
207
228
  ov sling <task-id> --capability lead --name <lead-name> --depth 1
208
229
  ```
230
+ If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
231
+ - Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
232
+ - Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
209
233
  6. **Send dispatch mail** to each lead with the high-level objective:
210
234
  ```bash
211
235
  ov mail send --to <lead-name> --subject "Work stream: <title>" \
@@ -295,16 +319,16 @@ When a batch is complete (task group auto-closed, all issues resolved):
295
319
  4. **Only then** close the issue: `{{TRACKER_CLI}} close <id> --reason "Merged branch <branch-name>"`.
296
320
 
297
321
  1. Verify all issues are closed: run `{{TRACKER_CLI}} show <id>` for each issue in the group.
298
- 2. Verify all branches are merged: check `ov status` for unmerged branches. If any branch is unmerged, do NOT proceed — wait for the lead's `merge_ready` signal.
299
- 3. Clean up worktrees: `ov worktree clean --completed`.
300
- 4. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
301
- 5. Commit and sync state files: after all work is merged and issues are closed, commit any outstanding state changes so runtime state is not left uncommitted when the coordinator goes idle:
322
+ 2. Verify all branches are merged: check `ov status` for unmerged branches. If any branch is unmerged, do NOT proceed — wait for the lead's `merge_ready` signal. **Note:** merged branches carry each worker's committed `.mulch/` changes into the canonical branch — this is how discovery scout findings reach the main repo.
323
+ 3. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
324
+ 4. Commit and sync state files: after all work is merged and issues are closed, commit any outstanding state changes so runtime state is not left uncommitted when the coordinator goes idle:
302
325
  ```bash
303
326
  {{TRACKER_CLI}} sync
304
327
  git add .overstory/ .mulch/
305
328
  git diff --cached --quiet || git commit -m "chore: sync runtime state"
306
329
  git push
307
330
  ```
331
+ 5. Clean up worktrees: `ov worktree clean --completed`. **Only run this after branches are merged and .mulch/ state is committed** — cleaning worktrees before merging destroys any uncommitted scout findings.
308
332
  6. Report to the human operator: summarize what was accomplished, what was merged, any issues encountered.
309
333
  7. Check for follow-up work: `{{TRACKER_CLI}} ready` to see if new issues surfaced during the batch.
310
334
 
package/agents/lead.md CHANGED
@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
9
9
  - **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
10
10
  - **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
11
11
 
12
+ Budget compression rules:
13
+ - **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
14
+ - **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
15
+ - **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
16
+
12
17
  Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
13
18
 
14
19
  ## cost-awareness
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
19
24
 
20
25
  Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
21
26
 
27
+ When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
28
+
22
29
  Where to actually save tokens:
23
30
  - Prefer fewer, well-scoped builders over many small ones.
24
31
  - Batch status updates instead of sending per-worker messages.
@@ -143,7 +150,7 @@ Criteria — ANY:
143
150
  - Straightforward implementation with clear spec
144
151
  - Single builder can handle the full scope
145
152
 
146
- Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
153
+ Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
147
154
 
148
155
  ### Complex Tasks (Full Pipeline)
149
156
  Criteria — ANY:
@@ -153,6 +160,9 @@ Criteria — ANY:
153
160
  - Multiple builders needed with file scope partitioning
154
161
 
155
162
  Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
163
+ If your overlay budget is too small to support that pipeline, compress roles deliberately:
164
+ - With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
165
+ - With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
156
166
 
157
167
  ## three-phase-workflow
158
168
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@os-eco/overstory-cli",
3
- "version": "0.9.1",
3
+ "version": "0.9.3",
4
4
  "description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
5
5
  "author": "Jaymin West",
6
6
  "license": "MIT",
@@ -0,0 +1,162 @@
1
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
2
+ import { mkdtemp } from "node:fs/promises";
3
+ import { tmpdir } from "node:os";
4
+ import { join } from "node:path";
5
+ import { cleanupTempDir } from "../test-helpers.ts";
6
+ import { deployCopilotHooks } from "./copilot-hooks-deployer.ts";
7
+ import { PATH_PREFIX } from "./hooks-deployer.ts";
8
+
9
+ describe("deployCopilotHooks", () => {
10
+ let tempDir: string;
11
+
12
+ beforeEach(async () => {
13
+ tempDir = await mkdtemp(join(tmpdir(), "overstory-copilot-hooks-test-"));
14
+ });
15
+
16
+ afterEach(async () => {
17
+ await cleanupTempDir(tempDir);
18
+ });
19
+
20
+ test("writes hooks.json to .github/hooks/ directory", async () => {
21
+ const worktreePath = join(tempDir, "worktree");
22
+ await deployCopilotHooks(worktreePath, "my-builder");
23
+
24
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
25
+ const exists = await Bun.file(hooksPath).exists();
26
+ expect(exists).toBe(true);
27
+ });
28
+
29
+ test("creates .github/hooks/ directory if it does not exist", async () => {
30
+ const worktreePath = join(tempDir, "new-worktree");
31
+ // Directory does not exist before the call
32
+ await deployCopilotHooks(worktreePath, "builder-1");
33
+
34
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
35
+ expect(await Bun.file(hooksPath).exists()).toBe(true);
36
+ });
37
+
38
+ test("output file is valid JSON", async () => {
39
+ const worktreePath = join(tempDir, "worktree");
40
+ await deployCopilotHooks(worktreePath, "test-agent");
41
+
42
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
43
+ const raw = await Bun.file(hooksPath).text();
44
+ expect(() => JSON.parse(raw)).not.toThrow();
45
+ });
46
+
47
+ test("output has Copilot schema structure (top-level hooks with onSessionStart)", async () => {
48
+ const worktreePath = join(tempDir, "worktree");
49
+ await deployCopilotHooks(worktreePath, "test-agent");
50
+
51
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
52
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as Record<string, unknown>;
53
+
54
+ expect(config).toHaveProperty("hooks");
55
+ const hooks = config.hooks as Record<string, unknown>;
56
+ expect(hooks).toHaveProperty("onSessionStart");
57
+ expect(Array.isArray(hooks.onSessionStart)).toBe(true);
58
+ });
59
+
60
+ test("replaces {{AGENT_NAME}} with agentName in all commands", async () => {
61
+ const worktreePath = join(tempDir, "worktree");
62
+ await deployCopilotHooks(worktreePath, "scout-agent-42");
63
+
64
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
65
+ const raw = await Bun.file(hooksPath).text();
66
+
67
+ expect(raw).toContain("scout-agent-42");
68
+ expect(raw).not.toContain("{{AGENT_NAME}}");
69
+ });
70
+
71
+ test("prepends PATH_PREFIX to all hook commands", async () => {
72
+ const worktreePath = join(tempDir, "worktree");
73
+ await deployCopilotHooks(worktreePath, "builder-1");
74
+
75
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
76
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as {
77
+ hooks: Record<string, Array<{ command: string }>>;
78
+ };
79
+
80
+ const allCommands = Object.values(config.hooks)
81
+ .flat()
82
+ .map((e) => e.command);
83
+ expect(allCommands.length).toBeGreaterThan(0);
84
+ for (const cmd of allCommands) {
85
+ expect(cmd).toStartWith(PATH_PREFIX);
86
+ }
87
+ });
88
+
89
+ test("onSessionStart entries are objects with command field only (no matcher, no type)", async () => {
90
+ const worktreePath = join(tempDir, "worktree");
91
+ await deployCopilotHooks(worktreePath, "builder-1");
92
+
93
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
94
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as {
95
+ hooks: { onSessionStart: Array<Record<string, unknown>> };
96
+ };
97
+
98
+ for (const entry of config.hooks.onSessionStart) {
99
+ expect(typeof entry.command).toBe("string");
100
+ // Copilot schema has no matcher or type fields
101
+ expect(entry).not.toHaveProperty("matcher");
102
+ expect(entry).not.toHaveProperty("type");
103
+ }
104
+ });
105
+
106
+ test("onSessionStart includes ov prime command", async () => {
107
+ const worktreePath = join(tempDir, "worktree");
108
+ await deployCopilotHooks(worktreePath, "prime-test-agent");
109
+
110
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
111
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as {
112
+ hooks: { onSessionStart: Array<{ command: string }> };
113
+ };
114
+
115
+ const commands = config.hooks.onSessionStart.map((e) => e.command);
116
+ expect(commands.some((c) => c.includes("ov prime") && c.includes("prime-test-agent"))).toBe(
117
+ true,
118
+ );
119
+ });
120
+
121
+ test("onSessionStart includes ov mail check --inject command", async () => {
122
+ const worktreePath = join(tempDir, "worktree");
123
+ await deployCopilotHooks(worktreePath, "mail-test-agent");
124
+
125
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
126
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as {
127
+ hooks: { onSessionStart: Array<{ command: string }> };
128
+ };
129
+
130
+ const commands = config.hooks.onSessionStart.map((e) => e.command);
131
+ expect(
132
+ commands.some((c) => c.includes("ov mail check --inject") && c.includes("mail-test-agent")),
133
+ ).toBe(true);
134
+ });
135
+
136
+ test("all hook commands include ENV_GUARD pattern", async () => {
137
+ const worktreePath = join(tempDir, "worktree");
138
+ await deployCopilotHooks(worktreePath, "guard-test-agent");
139
+
140
+ const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
141
+ const config = JSON.parse(await Bun.file(hooksPath).text()) as {
142
+ hooks: Record<string, Array<{ command: string }>>;
143
+ };
144
+
145
+ const allCommands = Object.values(config.hooks)
146
+ .flat()
147
+ .map((e) => e.command);
148
+ for (const cmd of allCommands) {
149
+ expect(cmd).toContain("OVERSTORY_AGENT_NAME");
150
+ }
151
+ });
152
+
153
+ test("template file exists and is valid JSON after substitution", async () => {
154
+ // Verify template file is present and parseable (basic template health check).
155
+ const templatePath = join(import.meta.dir, "..", "..", "templates", "copilot-hooks.json.tmpl");
156
+ const exists = await Bun.file(templatePath).exists();
157
+ expect(exists).toBe(true);
158
+
159
+ const raw = (await Bun.file(templatePath).text()).replace(/\{\{AGENT_NAME\}\}/g, "test");
160
+ expect(() => JSON.parse(raw)).not.toThrow();
161
+ });
162
+ });
@@ -0,0 +1,93 @@
1
+ import { mkdir } from "node:fs/promises";
2
+ import { dirname, join } from "node:path";
3
+ import { AgentError } from "../errors.ts";
4
+ import { PATH_PREFIX } from "./hooks-deployer.ts";
5
+
6
+ /** Copilot hook entry shape — simpler than Claude Code (no matcher, no type field). */
7
+ interface CopilotHookEntry {
8
+ command: string;
9
+ }
10
+
11
+ /**
12
+ * Resolve the path to the Copilot hooks template file.
13
+ * The template lives at `templates/copilot-hooks.json.tmpl` relative to the repo root.
14
+ */
15
+ function getTemplatePath(): string {
16
+ // src/agents/copilot-hooks-deployer.ts -> repo root is ../../
17
+ return join(dirname(import.meta.dir), "..", "templates", "copilot-hooks.json.tmpl");
18
+ }
19
+
20
+ /**
21
+ * Deploy Copilot lifecycle hooks to an agent's worktree.
22
+ *
23
+ * Reads `templates/copilot-hooks.json.tmpl`, replaces all `{{AGENT_NAME}}` tokens,
24
+ * prepends PATH_PREFIX to every hook command so CLIs (ov, ml, sd) resolve correctly
25
+ * under Copilot's minimal PATH, then writes the result to
26
+ * `<worktreePath>/.github/hooks/hooks.json`.
27
+ *
28
+ * Phase 1: lifecycle hooks only (onSessionStart). No security guards.
29
+ *
30
+ * @param worktreePath - Absolute path to the agent's git worktree
31
+ * @param agentName - The unique name of the agent (replaces {{AGENT_NAME}} in template)
32
+ * @throws {AgentError} If the template is missing or the write fails
33
+ */
34
+ export async function deployCopilotHooks(worktreePath: string, agentName: string): Promise<void> {
35
+ const templatePath = getTemplatePath();
36
+ const file = Bun.file(templatePath);
37
+ const exists = await file.exists();
38
+
39
+ if (!exists) {
40
+ throw new AgentError(`Copilot hooks template not found: ${templatePath}`, {
41
+ agentName,
42
+ });
43
+ }
44
+
45
+ let template: string;
46
+ try {
47
+ template = await file.text();
48
+ } catch (err) {
49
+ throw new AgentError(`Failed to read Copilot hooks template: ${templatePath}`, {
50
+ agentName,
51
+ cause: err instanceof Error ? err : undefined,
52
+ });
53
+ }
54
+
55
+ // Replace all occurrences of {{AGENT_NAME}}
56
+ let content = template;
57
+ while (content.includes("{{AGENT_NAME}}")) {
58
+ content = content.replace("{{AGENT_NAME}}", agentName);
59
+ }
60
+
61
+ // Parse the base config from the template
62
+ const config = JSON.parse(content) as { hooks: Record<string, CopilotHookEntry[]> };
63
+
64
+ // Extend PATH in all hook commands.
65
+ // Copilot CLI executes hooks with a minimal PATH — ~/.bun/bin (where ov, ml, sd live)
66
+ // is not included. Prepend PATH_PREFIX so CLIs resolve correctly.
67
+ for (const entries of Object.values(config.hooks)) {
68
+ for (const entry of entries) {
69
+ entry.command = `${PATH_PREFIX} ${entry.command}`;
70
+ }
71
+ }
72
+
73
+ const hooksDir = join(worktreePath, ".github", "hooks");
74
+ const outputPath = join(hooksDir, "hooks.json");
75
+
76
+ try {
77
+ await mkdir(hooksDir, { recursive: true });
78
+ } catch (err) {
79
+ throw new AgentError(`Failed to create .github/hooks/ directory at: ${hooksDir}`, {
80
+ agentName,
81
+ cause: err instanceof Error ? err : undefined,
82
+ });
83
+ }
84
+
85
+ try {
86
+ await Bun.write(outputPath, `${JSON.stringify(config, null, "\t")}\n`);
87
+ } catch (err) {
88
+ throw new AgentError(`Failed to write Copilot hooks config to: ${outputPath}`, {
89
+ agentName,
90
+ cause: err instanceof Error ? err : undefined,
91
+ });
92
+ }
93
+ }
@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
2555
2555
  });
2556
2556
 
2557
2557
  test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
2558
- const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
2558
+ const capabilities = [
2559
+ "builder",
2560
+ "scout",
2561
+ "reviewer",
2562
+ "lead",
2563
+ "merger",
2564
+ "orchestrator",
2565
+ "coordinator",
2566
+ ];
2559
2567
 
2560
2568
  for (const cap of capabilities) {
2561
2569
  const wt = join(tempDir, `${cap}-tc-wt`);
@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
20
20
  "scout",
21
21
  "reviewer",
22
22
  "lead",
23
+ "orchestrator",
23
24
  "coordinator",
24
25
  "supervisor",
25
26
  "monitor",
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
29
30
  * Capabilities that coordinate work and need git add/commit for syncing
30
31
  * tasks, mulch, and other metadata — but must NOT git push.
31
32
  */
32
- const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
33
+ const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
33
34
 
34
35
  /**
35
36
  * Additional safe Bash prefixes for coordination capabilities.
@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
523
523
  expect(output).toContain("3");
524
524
  });
525
525
 
526
+ test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
527
+ const config = makeConfig({
528
+ capability: "lead",
529
+ maxAgentsOverride: 1,
530
+ canSpawn: true,
531
+ });
532
+ const output = await generateOverlay(config);
533
+
534
+ expect(output).toContain("MAX AGENTS");
535
+ expect(output).toContain("combined **lead/worker**");
536
+ expect(output).toContain("only slot");
537
+ });
538
+
539
+ test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
540
+ const config = makeConfig({
541
+ capability: "lead",
542
+ maxAgentsOverride: 2,
543
+ canSpawn: true,
544
+ });
545
+ const output = await generateOverlay(config);
546
+
547
+ expect(output).toContain("MAX AGENTS");
548
+ expect(output).toContain("compressed mode");
549
+ expect(output).toContain("self-verification");
550
+ });
551
+
526
552
  test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
527
553
  const config = makeConfig({
528
554
  capability: "lead",
@@ -102,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
102
102
  }
103
103
 
104
104
  if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
105
- sections.push(
106
- `- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
107
- "Do not spawn more than this many sub-workers.",
108
- );
105
+ if (config.maxAgentsOverride === 1) {
106
+ sections.push(
107
+ "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
108
+ "Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
109
+ "Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
110
+ );
111
+ } else if (config.maxAgentsOverride === 2) {
112
+ sections.push(
113
+ "- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
114
+ "Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
115
+ "Prefer self-verification over spawning a separate reviewer.",
116
+ );
117
+ } else {
118
+ sections.push(
119
+ `- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
120
+ "Do not spawn more than this many sub-workers.",
121
+ );
122
+ }
109
123
  }
110
124
 
111
125
  if (sections.length === 0) return "";