npm - @os-eco/overstory-cli - Versions diffs - 0.9.1 → 0.9.3 - Mend

@os-eco/overstory-cli 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

package/README.md +21 -6
package/agents/coordinator.md +34 -10
package/agents/lead.md +11 -1
package/package.json +1 -1
package/src/agents/copilot-hooks-deployer.test.ts +162 -0
package/src/agents/copilot-hooks-deployer.ts +93 -0
package/src/agents/hooks-deployer.test.ts +9 -1
package/src/agents/hooks-deployer.ts +2 -1
package/src/agents/overlay.test.ts +26 -0
package/src/agents/overlay.ts +18 -4
package/src/beads/client.ts +31 -3
package/src/commands/agents.ts +1 -1
package/src/commands/clean.test.ts +3 -0
package/src/commands/clean.ts +1 -58
package/src/commands/completions.test.ts +18 -6
package/src/commands/completions.ts +40 -1
package/src/commands/coordinator.test.ts +77 -4
package/src/commands/coordinator.ts +228 -125
package/src/commands/dashboard.ts +50 -10
package/src/commands/doctor.ts +3 -1
package/src/commands/ecosystem.test.ts +126 -1
package/src/commands/ecosystem.ts +7 -53
package/src/commands/feed.test.ts +117 -2
package/src/commands/feed.ts +46 -30
package/src/commands/group.test.ts +274 -155
package/src/commands/group.ts +11 -5
package/src/commands/init.ts +50 -0
package/src/commands/inspect.ts +8 -4
package/src/commands/log.test.ts +35 -0
package/src/commands/log.ts +10 -6
package/src/commands/logs.test.ts +423 -1
package/src/commands/logs.ts +99 -104
package/src/commands/monitor.ts +8 -2
package/src/commands/orchestrator.ts +42 -0
package/src/commands/prime.test.ts +177 -2
package/src/commands/prime.ts +4 -2
package/src/commands/sling.ts +8 -3
package/src/commands/upgrade.test.ts +2 -0
package/src/commands/upgrade.ts +1 -17
package/src/commands/watch.test.ts +67 -1
package/src/commands/watch.ts +4 -79
package/src/config.test.ts +250 -0
package/src/config.ts +43 -0
package/src/doctor/agents.test.ts +72 -5
package/src/doctor/agents.ts +10 -10
package/src/doctor/consistency.test.ts +35 -0
package/src/doctor/consistency.ts +7 -3
package/src/doctor/dependencies.test.ts +58 -1
package/src/doctor/dependencies.ts +4 -2
package/src/doctor/providers.test.ts +41 -5
package/src/doctor/types.ts +2 -1
package/src/doctor/version.test.ts +106 -2
package/src/doctor/version.ts +4 -2
package/src/doctor/watchdog.test.ts +167 -0
package/src/doctor/watchdog.ts +158 -0
package/src/e2e/init-sling-lifecycle.test.ts +2 -1
package/src/errors.test.ts +350 -0
package/src/events/tailer.test.ts +25 -0
package/src/events/tailer.ts +8 -1
package/src/index.ts +4 -1
package/src/mail/store.test.ts +110 -0
package/src/runtimes/aider.test.ts +124 -0
package/src/runtimes/aider.ts +147 -0
package/src/runtimes/amp.test.ts +164 -0
package/src/runtimes/amp.ts +154 -0
package/src/runtimes/claude.test.ts +4 -2
package/src/runtimes/codex.test.ts +38 -1
package/src/runtimes/codex.ts +22 -3
package/src/runtimes/copilot.test.ts +213 -13
package/src/runtimes/copilot.ts +93 -11
package/src/runtimes/goose.test.ts +133 -0
package/src/runtimes/goose.ts +157 -0
package/src/runtimes/pi-guards.ts +2 -1
package/src/runtimes/pi.test.ts +33 -9
package/src/runtimes/pi.ts +10 -10
package/src/runtimes/registry.test.ts +1 -1
package/src/runtimes/registry.ts +13 -4
package/src/runtimes/sapling.ts +2 -1
package/src/runtimes/types.ts +9 -2
package/src/tracker/factory.test.ts +10 -0
package/src/tracker/factory.ts +3 -2
package/src/types.ts +4 -0
package/src/utils/bin.test.ts +10 -0
package/src/utils/bin.ts +37 -0
package/src/utils/fs.test.ts +119 -0
package/src/utils/fs.ts +62 -0
package/src/utils/pid.test.ts +68 -0
package/src/utils/pid.ts +45 -0
package/src/utils/time.test.ts +43 -0
package/src/utils/time.ts +37 -0
package/src/utils/version.test.ts +33 -0
package/src/utils/version.ts +70 -0
package/src/watchdog/daemon.test.ts +255 -1
package/src/watchdog/daemon.ts +46 -9
package/src/watchdog/health.test.ts +15 -1
package/src/watchdog/health.ts +1 -1
package/src/watchdog/triage.test.ts +49 -9
package/src/watchdog/triage.ts +21 -5
package/src/worktree/tmux.test.ts +166 -49
package/src/worktree/tmux.ts +36 -37
package/templates/copilot-hooks.json.tmpl +13 -0

package/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
 [![CI](https://github.com/jayminwest/overstory/actions/workflows/ci.yml/badge.svg)](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
-Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
+Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
 > **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
 - [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
 - [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
 - [OpenCode](https://opencode.ai) (`opencode` CLI)
+- [Aider](https://aider.chat) (`aider` CLI)
+- [Goose](https://github.com/block/goose) (`goose` CLI)
+- [Amp](https://amp.dev) (`amp` CLI)
 ```bash
 bun install -g @os-eco/overstory-cli
@@ -102,6 +105,12 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
 | `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
 | `ov coordinator output` | Show recent coordinator output (`--lines`) |
 | `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
+| `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
+| `ov orchestrator stop` | Stop orchestrator |
+| `ov orchestrator status` | Show orchestrator state |
+| `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
+| `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
+| `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
 | `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
 | `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
 | `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
@@ -165,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
 | `ov monitor status` | Show monitor state |
 | `ov log <event>` | Log a hook event (`--agent`) |
 | `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
-| `ov doctor` | Run health checks on overstory setup — 11 categories (`--category`, `--fix`, `--json`) |
+| `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
 | `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
 | `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
 | `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
@@ -188,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
 | Cursor | `agent` | (none — `--yolo`) | Experimental |
 | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
 | Gemini | `gemini` | `--sandbox` flag | Experimental |
+| Aider | `aider` | (none — `--yes-always`) | Experimental |
+| Goose | `goose` | Profile-based permissions | Experimental |
+| Amp | `amp` | Built-in approval system | Experimental |
 | OpenCode | `opencode` | (none) | Experimental |
 ## How It Works
@@ -237,7 +249,7 @@ overstory/
     config.ts                     Config loader + validation
     errors.ts                     Custom error types
     json.ts                       Standardized JSON envelope helpers
-    commands/                     One file per CLI subcommand (36 commands)
+    commands/                     One file per CLI subcommand (37 commands)
       agents.ts                   Agent discovery and querying
       coordinator.ts              Persistent orchestrator lifecycle
       supervisor.ts               Team lead management [DEPRECATED]
@@ -260,7 +272,7 @@ overstory/
       run.ts                      Orchestration run lifecycle
       trace.ts                    Agent/task timeline viewing
       clean.ts                    Worktree/session cleanup
-      doctor.ts                   Health check runner (11 check modules)
+      doctor.ts                   Health check runner (12 check modules)
       inspect.ts                  Deep per-agent inspection
       spec.ts                     Task spec management
       errors.ts                   Aggregated error view
@@ -272,6 +284,7 @@ overstory/
       update.ts                   Refresh managed files
       upgrade.ts                  npm version upgrades
       discover.ts                 Brownfield codebase discovery via coordinator-driven scout swarm
+      orchestrator.ts             Multi-repo coordination (PersistentAgentSpec)
       completions.ts              Shell completion generation (bash/zsh/fish)
     canopy/
       client.ts                   Canopy client (prompt rendering, listing, emission)
@@ -282,6 +295,7 @@ overstory/
       checkpoint.ts               Session checkpoint save/restore
       lifecycle.ts                Handoff orchestration
       hooks-deployer.ts           Deploy hooks + tool enforcement
+      copilot-hooks-deployer.ts   Deploy hooks config to Copilot worktrees
       guard-rules.ts              Shared guard constants (tool lists, bash patterns)
     worktree/                     Git worktree + tmux management
     mail/                         SQLite mail system (typed protocol, broadcast)
@@ -289,9 +303,10 @@ overstory/
     watchdog/                     Tiered health monitoring (daemon, triage, health)
     logging/                      Multi-format logger + sanitizer + reporter + color control + shared theme/format
     metrics/                      SQLite metrics + pricing + transcript parsing
-    doctor/                       Health check modules (11 checks)
+    doctor/                       Health check modules (12 checks)
+    utils/                        Shared utilities (bin, fs, pid, time, version)
     insights/                     Session insight analyzer for auto-expertise
-    runtimes/                     AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
+    runtimes/                     AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
     tracker/                      Pluggable task tracker (beads + seeds backends)
     mulch/                        mulch client (programmatic API + CLI wrapper)
     e2e/                          End-to-end lifecycle tests

package/agents/coordinator.md CHANGED Viewed

@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
 - **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
 - **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
 - **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
+- **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
 ## failure-modes
 These are named failures. If you catch yourself doing any of these, stop and correct immediately.
-- **HIERARCHY_BYPASS** -- Spawning a builder, scout, reviewer, or merger directly without going through a lead. The coordinator dispatches leads only. Leads handle all downstream agent management. This is code-enforced but you should not even attempt it.
+- **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
 - **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
 - **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
 - **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
 - **NEVER** use the Write tool on any file. You have no write access.
 - **NEVER** use the Edit tool on any file. You have no write access.
 - **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
-- **NEVER** spawn builders, scouts, reviewers, or mergers directly. Only spawn leads. This is enforced by `sling.ts` (HierarchyError).
+- **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
 - **NEVER** run bash commands that modify source code, dependencies, or git history:
   - No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
   - No `rm`, `mv`, `cp`, `mkdir` on source directories
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
 ## role
-You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, lead dispatches, and coordination messages -- never code, never specs.
+You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
 ## capabilities
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
 - **Grep** -- search file contents with regex
 - **Bash** (coordination commands only):
   - `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
-  - `ov sling` (spawn lead agents into worktrees)
+  - `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
   - `ov status` (monitor active agents and worktrees)
   - `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
   - `ov nudge <agent> [message]` (poke stalled leads)
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
 ### Spawning Agents
-**You may ONLY spawn leads. This is code-enforced by `sling.ts` -- attempting to spawn builder, scout, reviewer, or merger without `--parent` will throw a HierarchyError.**
+**Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
 ```bash
 ov sling <task-id> \
@@ -150,7 +151,20 @@ ov sling <task-id> \
   --depth 1
 ```
-You are always at depth 0. Leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2. This is the designed hierarchy:
+Low-budget fallback examples:
+```bash
+# Direct scout: coordinator is acting as combined coordinator/lead
+ov sling <task-id> --capability scout --name <scout-name> --depth 1
+# Direct builder for a small, concrete task that does not need a separate lead/spec cycle
+ov sling <task-id> --capability builder --name <builder-name> --depth 1
+# Compressed lead: keep the lead, but force it to act as lead/worker
+ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
+```
+You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
 ```
 Coordinator (you, depth 0)
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
         └── Reviewer (depth 2) — validates quality
 ```
+Compressed hierarchy is also valid when you are deliberately minimizing agent count:
+```
+Coordinator (you, depth 0, acting as coordinator/lead)
+  └── Scout or Builder (depth 1)
+```
 ### Communication
 - **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
 - **Check inbox:** `ov mail check` (unread messages)
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
    ```bash
    ov sling <task-id> --capability lead --name <lead-name> --depth 1
    ```
+   If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
+   - Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
+   - Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
 6. **Send dispatch mail** to each lead with the high-level objective:
    ```bash
    ov mail send --to <lead-name> --subject "Work stream: <title>" \
@@ -295,16 +319,16 @@ When a batch is complete (task group auto-closed, all issues resolved):
 4. **Only then** close the issue: `{{TRACKER_CLI}} close <id> --reason "Merged branch <branch-name>"`.
 1. Verify all issues are closed: run `{{TRACKER_CLI}} show <id>` for each issue in the group.
-2. Verify all branches are merged: check `ov status` for unmerged branches. If any branch is unmerged, do NOT proceed — wait for the lead's `merge_ready` signal.
-3. Clean up worktrees: `ov worktree clean --completed`.
-4. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
-5. Commit and sync state files: after all work is merged and issues are closed, commit any outstanding state changes so runtime state is not left uncommitted when the coordinator goes idle:
+2. Verify all branches are merged: check `ov status` for unmerged branches. If any branch is unmerged, do NOT proceed — wait for the lead's `merge_ready` signal. **Note:** merged branches carry each worker's committed `.mulch/` changes into the canonical branch — this is how discovery scout findings reach the main repo.
+3. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
+4. Commit and sync state files: after all work is merged and issues are closed, commit any outstanding state changes so runtime state is not left uncommitted when the coordinator goes idle:
    ```bash
    {{TRACKER_CLI}} sync
    git add .overstory/ .mulch/
    git diff --cached --quiet || git commit -m "chore: sync runtime state"
    git push
    ```
+5. Clean up worktrees: `ov worktree clean --completed`. **Only run this after branches are merged and .mulch/ state is committed** — cleaning worktrees before merging destroys any uncommitted scout findings.
 6. Report to the human operator: summarize what was accomplished, what was merged, any issues encountered.
 7. Check for follow-up work: `{{TRACKER_CLI}} ready` to see if new issues surfaced during the batch.

package/agents/lead.md CHANGED Viewed

@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
 - **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
 - **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
+Budget compression rules:
+- **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
+- **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
+- **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
 Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
 ## cost-awareness
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
 Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
+When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
 Where to actually save tokens:
 - Prefer fewer, well-scoped builders over many small ones.
 - Batch status updates instead of sending per-worker messages.
@@ -143,7 +150,7 @@ Criteria — ANY:
 - Straightforward implementation with clear spec
 - Single builder can handle the full scope
-Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
+Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
 ### Complex Tasks (Full Pipeline)
 Criteria — ANY:
@@ -153,6 +160,9 @@ Criteria — ANY:
 - Multiple builders needed with file scope partitioning
 Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
+If your overlay budget is too small to support that pipeline, compress roles deliberately:
+- With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
+- With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
 ## three-phase-workflow

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@os-eco/overstory-cli",
-	"version": "0.9.1",
+	"version": "0.9.3",
 	"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
 	"author": "Jaymin West",
 	"license": "MIT",

package/src/agents/copilot-hooks-deployer.test.ts ADDED Viewed

@@ -0,0 +1,162 @@
+import { afterEach, beforeEach, describe, expect, test } from "bun:test";
+import { mkdtemp } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { cleanupTempDir } from "../test-helpers.ts";
+import { deployCopilotHooks } from "./copilot-hooks-deployer.ts";
+import { PATH_PREFIX } from "./hooks-deployer.ts";
+describe("deployCopilotHooks", () => {
+	let tempDir: string;
+	beforeEach(async () => {
+		tempDir = await mkdtemp(join(tmpdir(), "overstory-copilot-hooks-test-"));
+	});
+	afterEach(async () => {
+		await cleanupTempDir(tempDir);
+	});
+	test("writes hooks.json to .github/hooks/ directory", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "my-builder");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const exists = await Bun.file(hooksPath).exists();
+		expect(exists).toBe(true);
+	});
+	test("creates .github/hooks/ directory if it does not exist", async () => {
+		const worktreePath = join(tempDir, "new-worktree");
+		// Directory does not exist before the call
+		await deployCopilotHooks(worktreePath, "builder-1");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		expect(await Bun.file(hooksPath).exists()).toBe(true);
+	});
+	test("output file is valid JSON", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "test-agent");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const raw = await Bun.file(hooksPath).text();
+		expect(() => JSON.parse(raw)).not.toThrow();
+	});
+	test("output has Copilot schema structure (top-level hooks with onSessionStart)", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "test-agent");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as Record<string, unknown>;
+		expect(config).toHaveProperty("hooks");
+		const hooks = config.hooks as Record<string, unknown>;
+		expect(hooks).toHaveProperty("onSessionStart");
+		expect(Array.isArray(hooks.onSessionStart)).toBe(true);
+	});
+	test("replaces {{AGENT_NAME}} with agentName in all commands", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "scout-agent-42");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const raw = await Bun.file(hooksPath).text();
+		expect(raw).toContain("scout-agent-42");
+		expect(raw).not.toContain("{{AGENT_NAME}}");
+	});
+	test("prepends PATH_PREFIX to all hook commands", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "builder-1");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as {
+			hooks: Record<string, Array<{ command: string }>>;
+		};
+		const allCommands = Object.values(config.hooks)
+			.flat()
+			.map((e) => e.command);
+		expect(allCommands.length).toBeGreaterThan(0);
+		for (const cmd of allCommands) {
+			expect(cmd).toStartWith(PATH_PREFIX);
+		}
+	});
+	test("onSessionStart entries are objects with command field only (no matcher, no type)", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "builder-1");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as {
+			hooks: { onSessionStart: Array<Record<string, unknown>> };
+		};
+		for (const entry of config.hooks.onSessionStart) {
+			expect(typeof entry.command).toBe("string");
+			// Copilot schema has no matcher or type fields
+			expect(entry).not.toHaveProperty("matcher");
+			expect(entry).not.toHaveProperty("type");
+		}
+	});
+	test("onSessionStart includes ov prime command", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "prime-test-agent");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as {
+			hooks: { onSessionStart: Array<{ command: string }> };
+		};
+		const commands = config.hooks.onSessionStart.map((e) => e.command);
+		expect(commands.some((c) => c.includes("ov prime") && c.includes("prime-test-agent"))).toBe(
+			true,
+		);
+	});
+	test("onSessionStart includes ov mail check --inject command", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "mail-test-agent");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as {
+			hooks: { onSessionStart: Array<{ command: string }> };
+		};
+		const commands = config.hooks.onSessionStart.map((e) => e.command);
+		expect(
+			commands.some((c) => c.includes("ov mail check --inject") && c.includes("mail-test-agent")),
+		).toBe(true);
+	});
+	test("all hook commands include ENV_GUARD pattern", async () => {
+		const worktreePath = join(tempDir, "worktree");
+		await deployCopilotHooks(worktreePath, "guard-test-agent");
+		const hooksPath = join(worktreePath, ".github", "hooks", "hooks.json");
+		const config = JSON.parse(await Bun.file(hooksPath).text()) as {
+			hooks: Record<string, Array<{ command: string }>>;
+		};
+		const allCommands = Object.values(config.hooks)
+			.flat()
+			.map((e) => e.command);
+		for (const cmd of allCommands) {
+			expect(cmd).toContain("OVERSTORY_AGENT_NAME");
+		}
+	});
+	test("template file exists and is valid JSON after substitution", async () => {
+		// Verify template file is present and parseable (basic template health check).
+		const templatePath = join(import.meta.dir, "..", "..", "templates", "copilot-hooks.json.tmpl");
+		const exists = await Bun.file(templatePath).exists();
+		expect(exists).toBe(true);
+		const raw = (await Bun.file(templatePath).text()).replace(/\{\{AGENT_NAME\}\}/g, "test");
+		expect(() => JSON.parse(raw)).not.toThrow();
+	});
+});

package/src/agents/copilot-hooks-deployer.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import { mkdir } from "node:fs/promises";
+import { dirname, join } from "node:path";
+import { AgentError } from "../errors.ts";
+import { PATH_PREFIX } from "./hooks-deployer.ts";
+/** Copilot hook entry shape — simpler than Claude Code (no matcher, no type field). */
+interface CopilotHookEntry {
+	command: string;
+}
+/**
+ * Resolve the path to the Copilot hooks template file.
+ * The template lives at `templates/copilot-hooks.json.tmpl` relative to the repo root.
+ */
+function getTemplatePath(): string {
+	// src/agents/copilot-hooks-deployer.ts -> repo root is ../../
+	return join(dirname(import.meta.dir), "..", "templates", "copilot-hooks.json.tmpl");
+}
+/**
+ * Deploy Copilot lifecycle hooks to an agent's worktree.
+ *
+ * Reads `templates/copilot-hooks.json.tmpl`, replaces all `{{AGENT_NAME}}` tokens,
+ * prepends PATH_PREFIX to every hook command so CLIs (ov, ml, sd) resolve correctly
+ * under Copilot's minimal PATH, then writes the result to
+ * `<worktreePath>/.github/hooks/hooks.json`.
+ *
+ * Phase 1: lifecycle hooks only (onSessionStart). No security guards.
+ *
+ * @param worktreePath - Absolute path to the agent's git worktree
+ * @param agentName - The unique name of the agent (replaces {{AGENT_NAME}} in template)
+ * @throws {AgentError} If the template is missing or the write fails
+ */
+export async function deployCopilotHooks(worktreePath: string, agentName: string): Promise<void> {
+	const templatePath = getTemplatePath();
+	const file = Bun.file(templatePath);
+	const exists = await file.exists();
+	if (!exists) {
+		throw new AgentError(`Copilot hooks template not found: ${templatePath}`, {
+			agentName,
+		});
+	}
+	let template: string;
+	try {
+		template = await file.text();
+	} catch (err) {
+		throw new AgentError(`Failed to read Copilot hooks template: ${templatePath}`, {
+			agentName,
+			cause: err instanceof Error ? err : undefined,
+		});
+	}
+	// Replace all occurrences of {{AGENT_NAME}}
+	let content = template;
+	while (content.includes("{{AGENT_NAME}}")) {
+		content = content.replace("{{AGENT_NAME}}", agentName);
+	}
+	// Parse the base config from the template
+	const config = JSON.parse(content) as { hooks: Record<string, CopilotHookEntry[]> };
+	// Extend PATH in all hook commands.
+	// Copilot CLI executes hooks with a minimal PATH — ~/.bun/bin (where ov, ml, sd live)
+	// is not included. Prepend PATH_PREFIX so CLIs resolve correctly.
+	for (const entries of Object.values(config.hooks)) {
+		for (const entry of entries) {
+			entry.command = `${PATH_PREFIX} ${entry.command}`;
+		}
+	}
+	const hooksDir = join(worktreePath, ".github", "hooks");
+	const outputPath = join(hooksDir, "hooks.json");
+	try {
+		await mkdir(hooksDir, { recursive: true });
+	} catch (err) {
+		throw new AgentError(`Failed to create .github/hooks/ directory at: ${hooksDir}`, {
+			agentName,
+			cause: err instanceof Error ? err : undefined,
+		});
+	}
+	try {
+		await Bun.write(outputPath, `${JSON.stringify(config, null, "\t")}\n`);
+	} catch (err) {
+		throw new AgentError(`Failed to write Copilot hooks config to: ${outputPath}`, {
+			agentName,
+			cause: err instanceof Error ? err : undefined,
+		});
+	}
+}

package/src/agents/hooks-deployer.test.ts CHANGED Viewed

@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
 	});
 	test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
-		const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
+		const capabilities = [
+			"builder",
+			"scout",
+			"reviewer",
+			"lead",
+			"merger",
+			"orchestrator",
+			"coordinator",
+		];
 		for (const cap of capabilities) {
 			const wt = join(tempDir, `${cap}-tc-wt`);

package/src/agents/hooks-deployer.ts CHANGED Viewed

@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
 	"scout",
 	"reviewer",
 	"lead",
+	"orchestrator",
 	"coordinator",
 	"supervisor",
 	"monitor",
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
  * Capabilities that coordinate work and need git add/commit for syncing
  * tasks, mulch, and other metadata — but must NOT git push.
  */
-const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
+const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
 /**
  * Additional safe Bash prefixes for coordination capabilities.

package/src/agents/overlay.test.ts CHANGED Viewed

@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
 		expect(output).toContain("3");
 	});
+	test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
+		const config = makeConfig({
+			capability: "lead",
+			maxAgentsOverride: 1,
+			canSpawn: true,
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("MAX AGENTS");
+		expect(output).toContain("combined **lead/worker**");
+		expect(output).toContain("only slot");
+	});
+	test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
+		const config = makeConfig({
+			capability: "lead",
+			maxAgentsOverride: 2,
+			canSpawn: true,
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("MAX AGENTS");
+		expect(output).toContain("compressed mode");
+		expect(output).toContain("self-verification");
+	});
 	test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
 		const config = makeConfig({
 			capability: "lead",

package/src/agents/overlay.ts CHANGED Viewed

@@ -102,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
 	}
 	if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
-		sections.push(
-			`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
-				"Do not spawn more than this many sub-workers.",
-		);
+		if (config.maxAgentsOverride === 1) {
+			sections.push(
+				"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
+					"Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
+					"Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
+			);
+		} else if (config.maxAgentsOverride === 2) {
+			sections.push(
+				"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
+					"Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
+					"Prefer self-verification over spawning a separate reviewer.",
+			);
+		} else {
+			sections.push(
+				`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
+					"Do not spawn more than this many sub-workers.",
+			);
+		}
 	}
 	if (sections.length === 0) return "";