npm - @os-eco/overstory-cli - Versions diffs - 0.8.7 → 0.9.2 - Mend

@os-eco/overstory-cli 0.8.7 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +26 -8
package/agents/coordinator.md +30 -6
package/agents/lead.md +11 -1
package/agents/ov-co-creation.md +90 -0
package/package.json +1 -1
package/src/agents/hooks-deployer.test.ts +9 -1
package/src/agents/hooks-deployer.ts +2 -1
package/src/agents/overlay.test.ts +26 -0
package/src/agents/overlay.ts +31 -4
package/src/canopy/client.test.ts +107 -0
package/src/canopy/client.ts +179 -0
package/src/commands/agents.ts +1 -1
package/src/commands/clean.test.ts +3 -0
package/src/commands/clean.ts +1 -58
package/src/commands/completions.test.ts +18 -6
package/src/commands/completions.ts +40 -1
package/src/commands/coordinator.test.ts +77 -4
package/src/commands/coordinator.ts +304 -146
package/src/commands/dashboard.ts +47 -10
package/src/commands/discover.test.ts +288 -0
package/src/commands/discover.ts +202 -0
package/src/commands/doctor.ts +3 -1
package/src/commands/ecosystem.test.ts +126 -1
package/src/commands/ecosystem.ts +7 -53
package/src/commands/feed.test.ts +117 -2
package/src/commands/feed.ts +46 -30
package/src/commands/group.test.ts +274 -155
package/src/commands/group.ts +11 -5
package/src/commands/init.test.ts +2 -1
package/src/commands/init.ts +8 -0
package/src/commands/log.test.ts +35 -0
package/src/commands/log.ts +10 -6
package/src/commands/logs.test.ts +423 -1
package/src/commands/logs.ts +99 -104
package/src/commands/orchestrator.ts +42 -0
package/src/commands/prime.test.ts +177 -2
package/src/commands/prime.ts +4 -2
package/src/commands/sling.ts +23 -3
package/src/commands/update.test.ts +1 -0
package/src/commands/upgrade.test.ts +2 -0
package/src/commands/upgrade.ts +1 -17
package/src/commands/watch.test.ts +67 -1
package/src/commands/watch.ts +13 -88
package/src/config.test.ts +250 -0
package/src/config.ts +43 -0
package/src/doctor/agents.test.ts +72 -5
package/src/doctor/agents.ts +10 -10
package/src/doctor/consistency.test.ts +35 -0
package/src/doctor/consistency.ts +7 -3
package/src/doctor/dependencies.test.ts +58 -1
package/src/doctor/dependencies.ts +4 -2
package/src/doctor/providers.test.ts +41 -5
package/src/doctor/types.ts +2 -1
package/src/doctor/version.test.ts +106 -2
package/src/doctor/version.ts +4 -2
package/src/doctor/watchdog.test.ts +167 -0
package/src/doctor/watchdog.ts +158 -0
package/src/e2e/init-sling-lifecycle.test.ts +4 -2
package/src/errors.test.ts +350 -0
package/src/events/tailer.test.ts +25 -0
package/src/events/tailer.ts +8 -1
package/src/index.ts +9 -1
package/src/mail/store.test.ts +110 -0
package/src/mail/store.ts +2 -1
package/src/runtimes/aider.test.ts +124 -0
package/src/runtimes/aider.ts +147 -0
package/src/runtimes/amp.test.ts +164 -0
package/src/runtimes/amp.ts +154 -0
package/src/runtimes/claude.test.ts +4 -2
package/src/runtimes/goose.test.ts +133 -0
package/src/runtimes/goose.ts +157 -0
package/src/runtimes/pi-guards.ts +2 -1
package/src/runtimes/pi.test.ts +9 -9
package/src/runtimes/pi.ts +6 -7
package/src/runtimes/registry.test.ts +1 -1
package/src/runtimes/registry.ts +13 -4
package/src/runtimes/sapling.ts +2 -1
package/src/runtimes/types.ts +2 -2
package/src/schema-consistency.test.ts +1 -0
package/src/sessions/store.ts +25 -4
package/src/types.ts +65 -1
package/src/utils/bin.test.ts +10 -0
package/src/utils/bin.ts +37 -0
package/src/utils/fs.test.ts +119 -0
package/src/utils/fs.ts +62 -0
package/src/utils/pid.test.ts +68 -0
package/src/utils/pid.ts +45 -0
package/src/utils/time.test.ts +43 -0
package/src/utils/time.ts +37 -0
package/src/utils/version.test.ts +33 -0
package/src/utils/version.ts +70 -0
package/src/watchdog/daemon.test.ts +255 -1
package/src/watchdog/daemon.ts +87 -9
package/src/watchdog/health.test.ts +15 -1
package/src/watchdog/health.ts +1 -1
package/src/watchdog/triage.test.ts +49 -9
package/src/watchdog/triage.ts +21 -5
package/templates/overlay.md.tmpl +2 -0

package/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
 [![CI](https://github.com/jayminwest/overstory/actions/workflows/ci.yml/badge.svg)](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
-Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
+Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between 11 runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), [Aider](https://aider.chat), [Goose](https://github.com/block/goose), [Amp](https://amp.dev), or your own adapter.
 > **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
@@ -22,6 +22,9 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
 - [Cursor CLI](https://cursor.com/docs/cli/overview) (`agent` CLI)
 - [Sapling](https://github.com/jayminwest/sapling) (`sp` CLI)
 - [OpenCode](https://opencode.ai) (`opencode` CLI)
+- [Aider](https://aider.chat) (`aider` CLI)
+- [Goose](https://github.com/block/goose) (`goose` CLI)
+- [Amp](https://amp.dev) (`amp` CLI)
 ```bash
 bun install -g @os-eco/overstory-cli
@@ -84,23 +87,30 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
 | Command | Description |
 |---------|-------------|
 | `ov init` | Initialize `.overstory/` and bootstrap os-eco tools (`--yes`, `--name`, `--tools`, `--skip-mulch`, `--skip-seeds`, `--skip-canopy`, `--skip-onboard`, `--json`) |
-| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--json`) |
+| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--profile`, `--json`) |
 | `ov stop <agent-name>` | Terminate a running agent (`--clean-worktree`, `--json`) |
 | `ov prime` | Load context for orchestrator/agent (`--agent`, `--compact`) |
 | `ov spec write <task-id>` | Write a task specification (`--body`) |
+| `ov discover` | Discover a brownfield codebase via coordinator-driven scout swarm (`--skip`, `--name`, `--attach`, `--watchdog`, `--json`) |
 | `ov update` | Refresh `.overstory/` managed files from installed package (`--agents`, `--manifest`, `--hooks`, `--dry-run`, `--json`) |
 ### Coordination
 | Command | Description |
 |---------|-------------|
-| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`) |
+| `ov coordinator start` | Start persistent coordinator agent (`--attach`/`--no-attach`, `--watchdog`, `--monitor`, `--profile`) |
 | `ov coordinator stop` | Stop coordinator |
 | `ov coordinator status` | Show coordinator state |
 | `ov coordinator send` | Fire-and-forget message to coordinator (`--subject`) |
 | `ov coordinator ask` | Synchronous request/response to coordinator (`--subject`, `--timeout`) |
 | `ov coordinator output` | Show recent coordinator output (`--lines`) |
 | `ov coordinator check-complete` | Evaluate exit triggers, return completion status |
+| `ov orchestrator start` | Start multi-repo orchestrator agent (`--attach`/`--no-attach`, `--watchdog`, `--profile`) |
+| `ov orchestrator stop` | Stop orchestrator |
+| `ov orchestrator status` | Show orchestrator state |
+| `ov orchestrator send` | Fire-and-forget message to orchestrator (`--subject`) |
+| `ov orchestrator ask` | Synchronous request/response to orchestrator (`--subject`, `--timeout`) |
+| `ov orchestrator output` | Show recent orchestrator output (`--lines`) |
 | `ov supervisor start` | **[DEPRECATED]** Start per-project supervisor agent |
 | `ov supervisor stop` | **[DEPRECATED]** Stop supervisor |
 | `ov supervisor status` | **[DEPRECATED]** Show supervisor state |
@@ -164,7 +174,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
 | `ov monitor status` | Show monitor state |
 | `ov log <event>` | Log a hook event (`--agent`) |
 | `ov clean` | Clean up worktrees, sessions, artifacts (`--completed`, `--all`, `--run`) |
-| `ov doctor` | Run health checks on overstory setup — 11 categories (`--category`, `--fix`, `--json`) |
+| `ov doctor` | Run health checks on overstory setup — 12 categories (`--category`, `--fix`, `--json`) |
 | `ov ecosystem` | Show os-eco tool versions and health (`--json`) |
 | `ov upgrade` | Upgrade overstory to latest npm version (`--check`, `--all`, `--json`) |
 | `ov agents discover` | Discover agents by capability/state/parent (`--capability`, `--state`, `--parent`, `--json`) |
@@ -187,6 +197,9 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
 | Cursor | `agent` | (none — `--yolo`) | Experimental |
 | Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Experimental |
 | Gemini | `gemini` | `--sandbox` flag | Experimental |
+| Aider | `aider` | (none — `--yes-always`) | Experimental |
+| Goose | `goose` | Profile-based permissions | Experimental |
+| Amp | `amp` | Built-in approval system | Experimental |
 | OpenCode | `opencode` | (none) | Experimental |
 ## How It Works
@@ -236,7 +249,7 @@ overstory/
     config.ts                     Config loader + validation
     errors.ts                     Custom error types
     json.ts                       Standardized JSON envelope helpers
-    commands/                     One file per CLI subcommand (35 commands)
+    commands/                     One file per CLI subcommand (37 commands)
       agents.ts                   Agent discovery and querying
       coordinator.ts              Persistent orchestrator lifecycle
       supervisor.ts               Team lead management [DEPRECATED]
@@ -259,7 +272,7 @@ overstory/
       run.ts                      Orchestration run lifecycle
       trace.ts                    Agent/task timeline viewing
       clean.ts                    Worktree/session cleanup
-      doctor.ts                   Health check runner (11 check modules)
+      doctor.ts                   Health check runner (12 check modules)
       inspect.ts                  Deep per-agent inspection
       spec.ts                     Task spec management
       errors.ts                   Aggregated error view
@@ -270,7 +283,11 @@ overstory/
       ecosystem.ts                os-eco tool dashboard
       update.ts                   Refresh managed files
       upgrade.ts                  npm version upgrades
+      discover.ts                 Brownfield codebase discovery via coordinator-driven scout swarm
+      orchestrator.ts             Multi-repo coordination (PersistentAgentSpec)
       completions.ts              Shell completion generation (bash/zsh/fish)
+    canopy/
+      client.ts                   Canopy client (prompt rendering, listing, emission)
     agents/                       Agent lifecycle management
       manifest.ts                 Agent registry (load + query)
       overlay.ts                  Dynamic CLAUDE.md overlay generator
@@ -285,9 +302,10 @@ overstory/
     watchdog/                     Tiered health monitoring (daemon, triage, health)
     logging/                      Multi-format logger + sanitizer + reporter + color control + shared theme/format
     metrics/                      SQLite metrics + pricing + transcript parsing
-    doctor/                       Health check modules (11 checks)
+    doctor/                       Health check modules (12 checks)
+    utils/                        Shared utilities (bin, fs, pid, time, version)
     insights/                     Session insight analyzer for auto-expertise
-    runtimes/                     AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor)
+    runtimes/                     AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini, Sapling, OpenCode, Cursor, Aider, Goose, Amp)
     tracker/                      Pluggable task tracker (beads + seeds backends)
     mulch/                        mulch client (programmatic API + CLI wrapper)
     e2e/                          End-to-end lifecycle tests

package/agents/coordinator.md CHANGED Viewed

@@ -11,12 +11,13 @@ Every spawned agent costs a full Claude Code session. The coordinator must be ec
 - **Avoid polling loops.** Check status after each mail, or at reasonable intervals. The mail system notifies you of completions.
 - **Trust your leads.** Do not micromanage. Give leads clear objectives and let them decompose, explore, spec, and build autonomously. Only intervene on escalations or stalls.
 - **Prefer fewer, broader leads** over many narrow ones. A lead managing 5 builders is more efficient than you coordinating 5 builders directly.
+- **Compress roles when the budget is tight.** If keeping total agents low matters, you may act as a combined coordinator/lead by spawning a scout or builder directly for a narrow work stream, or dispatch a lead with `--dispatch-max-agents 1` or `2` so the lead compresses into lead/worker mode.
 ## failure-modes
 These are named failures. If you catch yourself doing any of these, stop and correct immediately.
-- **HIERARCHY_BYPASS** -- Spawning a builder, scout, reviewer, or merger directly without going through a lead. The coordinator dispatches leads only. Leads handle all downstream agent management. This is code-enforced but you should not even attempt it.
+- **HIERARCHY_BYPASS** -- Spawning a reviewer or merger directly, or spawning a builder/scout directly for work that clearly needs a lead-owned work stream. Direct scout/builder fallback is only for narrow or budget-constrained cases.
 - **SPEC_WRITING** -- Writing spec files or using the Write/Edit tools. You have no write access. Leads produce specs (via their scouts). Your job is to provide high-level objectives in {{TRACKER_NAME}} issues and dispatch mail.
 - **CODE_MODIFICATION** -- Using Write or Edit on any file. You are a coordinator, not an implementer.
 - **UNNECESSARY_SPAWN** -- Spawning a lead for a trivially small task. If the objective is a single small change, a single lead is sufficient. Only spawn multiple leads for genuinely independent work streams.
@@ -46,7 +47,7 @@ This file tells you HOW to coordinate. Your objectives come from the channels ab
 - **NEVER** use the Write tool on any file. You have no write access.
 - **NEVER** use the Edit tool on any file. You have no write access.
 - **NEVER** write spec files. Leads own spec production -- they spawn scouts to explore, then write specs from findings.
-- **NEVER** spawn builders, scouts, reviewers, or mergers directly. Only spawn leads. This is enforced by `sling.ts` (HierarchyError).
+- **NEVER** spawn reviewers or mergers directly. `sling.ts` allows direct `lead`, `scout`, and `builder` spawns, but direct `scout`/`builder` use is a fallback for low-budget or very small tasks, not the default.
 - **NEVER** run bash commands that modify source code, dependencies, or git history:
   - No `git commit`, `git checkout`, `git merge`, `git push`, `git reset`
   - No `rm`, `mv`, `cp`, `mkdir` on source directories
@@ -118,7 +119,7 @@ You are the **coordinator agent** in the overstory swarm system. You are the per
 ## role
-You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, lead dispatches, and coordination messages -- never code, never specs.
+You are the top-level decision-maker for automated work. When a human gives you an objective (a feature, a refactor, a migration), you analyze it, create high-level {{TRACKER_NAME}} issues, dispatch **lead agents** to own each work stream, monitor their progress via mail and status checks, and handle escalations. Leads handle all downstream coordination: they spawn scouts to explore, write specs from findings, spawn builders to implement, and spawn reviewers to validate. When the available agent budget is intentionally small, you may compress roles by either spawning a direct scout/builder yourself or by dispatching a lead with a very small `--dispatch-max-agents` budget. You operate from the project root with full read visibility but **no write access** to any files. Your outputs are issues, dispatches, and coordination messages -- never code, never specs.
 ## capabilities
@@ -128,7 +129,7 @@ You are the top-level decision-maker for automated work. When a human gives you
 - **Grep** -- search file contents with regex
 - **Bash** (coordination commands only):
   - `{{TRACKER_CLI}} create`, `{{TRACKER_CLI}} show`, `{{TRACKER_CLI}} ready`, `{{TRACKER_CLI}} update`, `{{TRACKER_CLI}} close`, `{{TRACKER_CLI}} list`, `{{TRACKER_CLI}} sync` (full {{TRACKER_NAME}} lifecycle)
-  - `ov sling` (spawn lead agents into worktrees)
+  - `ov sling` (spawn lead agents by default; direct scout/builder fallback for low-budget narrow work)
   - `ov status` (monitor active agents and worktrees)
   - `ov mail send`, `ov mail check`, `ov mail list`, `ov mail read`, `ov mail reply` (full mail protocol)
   - `ov nudge <agent> [message]` (poke stalled leads)
@@ -141,7 +142,7 @@ You are the top-level decision-maker for automated work. When a human gives you
 ### Spawning Agents
-**You may ONLY spawn leads. This is code-enforced by `sling.ts` -- attempting to spawn builder, scout, reviewer, or merger without `--parent` will throw a HierarchyError.**
+**Default:** spawn leads. **Fallback:** you may also spawn a `scout` or `builder` directly when the work stream is narrow enough that a separate lead would be pure overhead, or when the agent budget is intentionally low. Never spawn `reviewer` or `merger` directly.
 ```bash
 ov sling <task-id> \
@@ -150,7 +151,20 @@ ov sling <task-id> \
   --depth 1
 ```
-You are always at depth 0. Leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2. This is the designed hierarchy:
+Low-budget fallback examples:
+```bash
+# Direct scout: coordinator is acting as combined coordinator/lead
+ov sling <task-id> --capability scout --name <scout-name> --depth 1
+# Direct builder for a small, concrete task that does not need a separate lead/spec cycle
+ov sling <task-id> --capability builder --name <builder-name> --depth 1
+# Compressed lead: keep the lead, but force it to act as lead/worker
+ov sling <task-id> --capability lead --name <lead-name> --depth 1 --dispatch-max-agents 1
+```
+You are always at depth 0. In the normal hierarchy, leads you spawn are depth 1. Leads spawn their own scouts, builders, and reviewers at depth 2:
 ```
 Coordinator (you, depth 0)
@@ -160,6 +174,13 @@ Coordinator (you, depth 0)
         └── Reviewer (depth 2) — validates quality
 ```
+Compressed hierarchy is also valid when you are deliberately minimizing agent count:
+```
+Coordinator (you, depth 0, acting as coordinator/lead)
+  └── Scout or Builder (depth 1)
+```
 ### Communication
 - **Send typed mail:** `ov mail send --to <agent> --subject "<subject>" --body "<body>" --type <type> --priority <priority>`
 - **Check inbox:** `ov mail check` (unread messages)
@@ -206,6 +227,9 @@ Coordinator (you, depth 0)
    ```bash
    ov sling <task-id> --capability lead --name <lead-name> --depth 1
    ```
+   If a work stream is very small or the available agent budget is intentionally constrained, you may instead:
+   - Spawn a direct `scout` or `builder` and treat yourself as the combined coordinator/lead for that stream.
+   - Spawn a lead with `--dispatch-max-agents 1` or `--dispatch-max-agents 2` so the lead compresses its downstream roles.
 6. **Send dispatch mail** to each lead with the high-level objective:
    ```bash
    ov mail send --to <lead-name> --subject "Work stream: <title>" \

package/agents/lead.md CHANGED Viewed

@@ -9,6 +9,11 @@ Your overlay may contain a **Dispatch Overrides** section with directives from y
 - **SKIP REVIEW**: Do not spawn a reviewer. Self-verify by reading the builder diff and running quality gates. This is appropriate for simple or well-tested changes.
 - **MAX AGENTS**: Limits the number of sub-workers you may spawn. Plan your decomposition to fit within this budget.
+Budget compression rules:
+- **MAX AGENTS = 1**: Act as a combined **lead/worker**. Default to doing the implementation yourself. Only use the single spawn slot if one specialist is clearly more valuable than your own direct work.
+- **MAX AGENTS = 2**: Act as a compressed lead. Prefer at most one helper at a time, then finish remaining implementation and verification yourself. Do not assume there is room for a separate reviewer.
+- **MAX AGENTS >= 3**: Use normal lead behavior and choose the right scout/builder/reviewer mix for the task.
 Always check your overlay for dispatch overrides before following the default three-phase workflow. If no overrides section exists, follow the standard playbook.
 ## cost-awareness
@@ -19,6 +24,8 @@ Scouts and reviewers are quality investments, not overhead. Skipping a scout to
 Reviewers are valuable for complex changes but optional for simple ones. The lead can self-verify simple changes by reading the diff and running quality gates, saving a full agent spawn.
+When your overlay gives you a very small agent budget, role compression beats ceremony. A correct combined lead/worker execution is better than blocking on an ideal scout -> builder -> reviewer chain that the budget cannot support.
 Where to actually save tokens:
 - Prefer fewer, well-scoped builders over many small ones.
 - Batch status updates instead of sending per-worker messages.
@@ -143,7 +150,7 @@ Criteria — ANY:
 - Straightforward implementation with clear spec
 - Single builder can handle the full scope
-Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer.
+Action: Skip scouts if you have sufficient context (mulch records, dispatch details, file reads). Spawn one builder. Lead verifies by reading the diff and checking quality gates instead of spawning a reviewer. If **MAX AGENTS = 1**, do this work yourself instead of spawning the builder.
 ### Complex Tasks (Full Pipeline)
 Criteria — ANY:
@@ -153,6 +160,9 @@ Criteria — ANY:
 - Multiple builders needed with file scope partitioning
 Action: Full Scout → Build → Verify pipeline. Spawn scouts for exploration, multiple builders for parallel work, reviewers for independent verification.
+If your overlay budget is too small to support that pipeline, compress roles deliberately:
+- With **MAX AGENTS = 2**, use one scout or one builder, not both in parallel, then do the remaining work and verification yourself.
+- With **MAX AGENTS = 1**, you are effectively the worker. Explore just enough to ground the change, implement directly, and self-verify.
 ## three-phase-workflow

package/agents/ov-co-creation.md ADDED Viewed

@@ -0,0 +1,90 @@
+---
+name: ov-co-creation
+description: Co-creation workflow profile — human-in-the-loop at explicit decision gates
+---
+## propulsion-principle
+Read your assignment. For implementation work within an approved plan, execute immediately — no confirmation needed for routine decisions (naming, file organization, test strategy, implementation details within spec).
+PAUSE at decision gates. When you encounter an architectural choice, design fork, scope boundary, or tool selection, stop and do not proceed. Instead:
+1. Write a structured decision document (context, options, tradeoffs, recommendation).
+2. Send it as a decision_gate mail to the coordinator.
+3. Wait for a response before proceeding past the gate.
+Hesitation is the default at gates; action is the default within approved plans.
+## escalation-policy
+At decision points, present options rather than choosing. When you encounter a meaningful decision:
+1. Write a structured decision document: context, 2+ options with tradeoffs, and your recommendation.
+2. Send it as a decision_gate mail to the coordinator and wait.
+3. Do not proceed until you receive a reply selecting an option.
+Routine implementation decisions within an already-approved plan remain autonomous. Do not send decision gates for: variable names, file organization within spec, test strategy, or minor implementation choices that do not affect overall direction.
+Escalate immediately (not as a decision gate) when you discover: risks that could cause data loss, security issues, or breaking changes beyond scope; blocked dependencies outside your control.
+## artifact-expectations
+Decision artifacts come before code. Deliverables in order:
+1. **Option memos**: For any decision with multiple viable approaches, write a structured memo with options, tradeoffs, and a recommendation. Send as a decision_gate mail and await approval.
+2. **ADRs (Architecture Decision Records)**: For architectural choices, create a lightweight ADR capturing context, decision, and consequences.
+3. **Tradeoff matrices**: When comparing approaches across multiple dimensions, present a structured comparison.
+4. **Code and tests**: Implementation proceeds after decision artifacts are approved. Code must be clean, follow project conventions, and include automated tests.
+5. **Quality gates**: All lints, type checks, and tests must pass before reporting completion.
+Do not write implementation code before decisions are resolved. The human reviews and approves decision documents; implementation follows approval.
+## completion-criteria
+Work is complete when all of the following are true:
+- All quality gates pass: tests green, linting clean, type checking passes.
+- Changes are committed to the appropriate branch.
+- Any issues tracked in the task system are updated or closed.
+- A completion signal has been sent to the appropriate recipient (parent agent, coordinator, or human).
+Do not declare completion prematurely. Run the quality gates yourself — do not assume they pass. If a gate fails, fix the issue before reporting done.
+## human-role
+The human is an active co-creator at explicit decision gates — not a hands-off supervisor.
+- **Active at gates.** The human reviews decision documents and selects options via mail reply. The agent waits for this input before proceeding.
+- **Autonomous between gates.** Once a direction is approved, the agent executes without further check-ins. Implementation details within an approved plan are delegated.
+- **Milestone reviews.** The human reviews work at defined checkpoints (planning, prototype, final). These are collaborative reviews with explicit proceed signals.
+- **Minimal interruption between gates.** Do not ask questions that could be answered by reading the codebase or attempting something. Reserve interruptions for genuinely ambiguous requirements.
+## decision-gates
+When you reach a decision point (architectural choice, scope boundary, design fork, tool selection), follow this protocol:
+1. **Write a structured decision document** containing:
+   - **Context**: What problem are you solving? What constraints apply?
+   - **Options**: At least 2 viable approaches, each with: description, tradeoffs (pros/cons), and implementation implications.
+   - **Recommendation**: Which option you recommend and why.
+2. **Send a decision_gate mail** to the coordinator with the decision document in the body. Include a payload with the options array and brief context. Use --type decision_gate.
+3. **BLOCK and wait** for a reply. Do not continue past the gate without a response. Poll your inbox periodically while waiting.
+Decision gates are NOT for: variable names, file organization within spec, test strategy, or minor implementation choices within an approved design. They are for choices that meaningfully affect the direction of work.
+## milestone-reviews
+Send checkpoint reviews at three milestones:
+**After planning** (before any implementation begins):
+Send a status mail with: scope summary (what will be built), approach (high-level design with all decisions resolved via gates), file list (which files will be affected), and any open questions requiring confirmation before starting.
+**After prototyping** (when a working prototype exists):
+Send a status mail with: what works and what is rough, remaining decisions (if any), revised scope if it changed during prototyping, and an explicit request to proceed before final implementation.
+**Before final implementation** (after all gates resolved and prototype reviewed):
+Send a status mail summarizing: complete plan with all decisions incorporated, any deviations from original scope, and a confirmation request before beginning the final commit sequence.
+Each milestone review uses mail type status and clearly labels the milestone in the subject line.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@os-eco/overstory-cli",
-	"version": "0.8.7",
+	"version": "0.9.2",
 	"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
 	"author": "Jaymin West",
 	"license": "MIT",

package/src/agents/hooks-deployer.test.ts CHANGED Viewed

@@ -2555,7 +2555,15 @@ describe("deployHooks tracker close guard integration", () => {
 	});
 	test("deployHooks includes tracker close guard in PreToolUse for all capabilities", async () => {
-		const capabilities = ["builder", "scout", "reviewer", "lead", "merger", "coordinator"];
+		const capabilities = [
+			"builder",
+			"scout",
+			"reviewer",
+			"lead",
+			"merger",
+			"orchestrator",
+			"coordinator",
+		];
 		for (const cap of capabilities) {
 			const wt = join(tempDir, `${cap}-tc-wt`);

package/src/agents/hooks-deployer.ts CHANGED Viewed

@@ -20,6 +20,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
 	"scout",
 	"reviewer",
 	"lead",
+	"orchestrator",
 	"coordinator",
 	"supervisor",
 	"monitor",
@@ -29,7 +30,7 @@ const NON_IMPLEMENTATION_CAPABILITIES = new Set([
  * Capabilities that coordinate work and need git add/commit for syncing
  * tasks, mulch, and other metadata — but must NOT git push.
  */
-const COORDINATION_CAPABILITIES = new Set(["coordinator", "supervisor", "monitor"]);
+const COORDINATION_CAPABILITIES = new Set(["coordinator", "orchestrator", "supervisor", "monitor"]);
 /**
  * Additional safe Bash prefixes for coordination capabilities.

package/src/agents/overlay.test.ts CHANGED Viewed

@@ -523,6 +523,32 @@ describe("generateOverlay", () => {
 		expect(output).toContain("3");
 	});
+	test("dispatch overrides: maxAgentsOverride of 1 enables combined lead/worker guidance", async () => {
+		const config = makeConfig({
+			capability: "lead",
+			maxAgentsOverride: 1,
+			canSpawn: true,
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("MAX AGENTS");
+		expect(output).toContain("combined **lead/worker**");
+		expect(output).toContain("only slot");
+	});
+	test("dispatch overrides: maxAgentsOverride of 2 enables compressed-mode guidance", async () => {
+		const config = makeConfig({
+			capability: "lead",
+			maxAgentsOverride: 2,
+			canSpawn: true,
+		});
+		const output = await generateOverlay(config);
+		expect(output).toContain("MAX AGENTS");
+		expect(output).toContain("compressed mode");
+		expect(output).toContain("self-verification");
+	});
 	test("dispatch overrides: both skipReview and maxAgentsOverride together", async () => {
 		const config = makeConfig({
 			capability: "lead",

package/src/agents/overlay.ts CHANGED Viewed

@@ -35,6 +35,18 @@ function formatMulchDomains(domains: readonly string[]): string {
 	return `\`\`\`bash\nml prime ${domains.join(" ")}\n\`\`\``;
 }
+/**
+ * Format profile content (Layer 2: deployment-specific WHAT KIND) for embedding in the overlay.
+ * Returns empty string if no profile was provided (omits the section entirely).
+ * When profile IS provided, renders it as-is — the caller (canopy) owns the formatting.
+ */
+function formatProfile(profileContent: string | undefined): string {
+	if (!profileContent || profileContent.trim().length === 0) {
+		return "";
+	}
+	return profileContent;
+}
 /**
  * Format pre-fetched mulch expertise for embedding in the overlay.
  * Returns empty string if no expertise was provided (omits the section entirely).
@@ -90,10 +102,24 @@ function formatDispatchOverrides(config: OverlayConfig): string {
 	}
 	if (config.maxAgentsOverride !== undefined && config.maxAgentsOverride > 0) {
-		sections.push(
-			`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
-				"Do not spawn more than this many sub-workers.",
-		);
+		if (config.maxAgentsOverride === 1) {
+			sections.push(
+				"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **1**. " +
+					"Operate as a combined **lead/worker**: implement the task yourself unless a single specialist is absolutely necessary. " +
+					"Do not spend your only slot on a scout or reviewer unless that specialist work is the real bottleneck.",
+			);
+		} else if (config.maxAgentsOverride === 2) {
+			sections.push(
+				"- **MAX AGENTS**: Your per-lead agent ceiling has been set to **2**. " +
+					"Operate in compressed mode: use at most one helper at a time when possible, then complete the remaining implementation and verification yourself. " +
+					"Prefer self-verification over spawning a separate reviewer.",
+			);
+		} else {
+			sections.push(
+				`- **MAX AGENTS**: Your per-lead agent ceiling has been set to **${config.maxAgentsOverride}**. ` +
+					"Do not spawn more than this many sub-workers.",
+			);
+		}
 	}
 	if (sections.length === 0) return "";
@@ -314,6 +340,7 @@ export async function generateOverlay(config: OverlayConfig): Promise<string> {
 		"{{SKIP_SCOUT}}": config.skipScout ? SKIP_SCOUT_SECTION : "",
 		"{{DISPATCH_OVERRIDES}}": formatDispatchOverrides(config),
 		"{{BASE_DEFINITION}}": config.baseDefinition,
+		"{{PROFILE_INSTRUCTIONS}}": formatProfile(config.profileContent),
 		"{{QUALITY_GATE_INLINE}}": formatQualityGatesInline(config.qualityGates),
 		"{{QUALITY_GATE_STEPS}}": formatQualityGatesSteps(config.qualityGates),
 		"{{QUALITY_GATE_BASH}}": formatQualityGatesBash(config.qualityGates),

package/src/canopy/client.test.ts ADDED Viewed

@@ -0,0 +1,107 @@
+/**
+ * Tests for the Canopy CLI client.
+ *
+ * Uses real `cn` CLI calls against the actual .canopy/ directory.
+ * We do not mock the CLI — the project root has real prompts to test against.
+ * Tests are skipped if the `cn` CLI is not installed (e.g. in CI).
+ */
+import { describe, expect, test } from "bun:test";
+import { AgentError } from "../errors.ts";
+import { createCanopyClient } from "./client.ts";
+// Check if canopy CLI is available
+let hasCanopy = false;
+try {
+	const proc = Bun.spawn(["which", "cn"], { stdout: "pipe", stderr: "pipe" });
+	const exitCode = await proc.exited;
+	hasCanopy = exitCode === 0;
+} catch {
+	hasCanopy = false;
+}
+// The worktree root has its own .canopy/ symlinked/shared from the canonical root.
+// Use process.cwd() which is set to the worktree root in bun test.
+const cwd = process.cwd();
+const client = createCanopyClient(cwd);
+describe("CanopyClient.list()", () => {
+	test.skipIf(!hasCanopy)("returns prompts array with at least one entry", async () => {
+		const result = await client.list();
+		expect(result.success).toBe(true);
+		expect(Array.isArray(result.prompts)).toBe(true);
+		expect(result.prompts.length).toBeGreaterThan(0);
+		const first = result.prompts[0];
+		expect(first).toBeDefined();
+		expect(typeof first?.name).toBe("string");
+		expect(typeof first?.version).toBe("number");
+		expect(Array.isArray(first?.sections)).toBe(true);
+	});
+});
+describe("CanopyClient.render()", () => {
+	test.skipIf(!hasCanopy)(
+		"returns CanopyRenderResult with name, version, sections for 'builder' prompt",
+		async () => {
+			const result = await client.render("builder");
+			expect(result.success).toBe(true);
+			expect(result.name).toBe("builder");
+			expect(typeof result.version).toBe("number");
+			expect(result.version).toBeGreaterThan(0);
+			expect(Array.isArray(result.sections)).toBe(true);
+			expect(result.sections.length).toBeGreaterThan(0);
+			const section = result.sections[0];
+			expect(section).toBeDefined();
+			expect(typeof section?.name).toBe("string");
+			expect(typeof section?.body).toBe("string");
+		},
+	);
+	test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
+		await expect(client.render("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
+	});
+});
+describe("CanopyClient.show()", () => {
+	test.skipIf(!hasCanopy)("returns prompt object for 'builder'", async () => {
+		const result = await client.show("builder");
+		expect(result.success).toBe(true);
+		expect(result.prompt).toBeDefined();
+		expect(result.prompt.name).toBe("builder");
+		expect(typeof result.prompt.version).toBe("number");
+		expect(typeof result.prompt.id).toBe("string");
+		expect(Array.isArray(result.prompt.sections)).toBe(true);
+	});
+	test.skipIf(!hasCanopy)("throws AgentError on non-existent prompt", async () => {
+		await expect(client.show("nonexistent-prompt-xyz-404")).rejects.toThrow(AgentError);
+	});
+});
+describe("CanopyClient.validate()", () => {
+	test.skipIf(!hasCanopy)("returns {success, errors} for a named prompt", async () => {
+		const result = await client.validate("scout");
+		expect(typeof result.success).toBe("boolean");
+		expect(Array.isArray(result.errors)).toBe(true);
+		if (result.success) {
+			expect(result.errors.length).toBe(0);
+		}
+	});
+	test.skipIf(!hasCanopy)("returns success=false with errors for an invalid prompt", async () => {
+		// 'builder' is known to fail schema validation (missing test gate)
+		const result = await client.validate("builder");
+		expect(typeof result.success).toBe("boolean");
+		expect(Array.isArray(result.errors)).toBe(true);
+		// Either valid or invalid — just verify structure is correct
+		if (!result.success) {
+			expect(result.errors.length).toBeGreaterThan(0);
+		}
+	});
+	test.skipIf(!hasCanopy)("validate --all returns result with success boolean", async () => {
+		const result = await client.validate(undefined, { all: true });
+		expect(typeof result.success).toBe("boolean");
+		expect(Array.isArray(result.errors)).toBe(true);
+	});
+});