@os-eco/overstory-cli 0.7.7 → 0.7.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -3
- package/package.json +1 -1
- package/src/agents/manifest.test.ts +168 -1
- package/src/agents/manifest.ts +23 -2
- package/src/commands/agents.ts +1 -0
- package/src/commands/coordinator.test.ts +131 -2
- package/src/commands/coordinator.ts +40 -9
- package/src/commands/costs.test.ts +5 -0
- package/src/commands/costs.ts +1 -1
- package/src/commands/init.test.ts +1 -0
- package/src/commands/init.ts +1 -0
- package/src/commands/log.ts +2 -0
- package/src/commands/prime.test.ts +1 -0
- package/src/commands/sling.test.ts +63 -1
- package/src/commands/sling.ts +37 -2
- package/src/config.test.ts +68 -0
- package/src/config.ts +16 -0
- package/src/doctor/structure.test.ts +1 -0
- package/src/doctor/structure.ts +1 -0
- package/src/index.ts +2 -1
- package/src/metrics/pricing.test.ts +258 -0
- package/src/metrics/store.test.ts +227 -0
- package/src/metrics/store.ts +40 -5
- package/src/runtimes/gemini.test.ts +537 -0
- package/src/runtimes/gemini.ts +235 -0
- package/src/runtimes/registry.test.ts +15 -1
- package/src/runtimes/registry.ts +2 -0
- package/src/schema-consistency.test.ts +1 -0
- package/src/types.ts +8 -0
- package/src/worktree/tmux.test.ts +49 -0
- package/src/worktree/tmux.ts +33 -0
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ Multi-agent orchestration for AI coding agents.
|
|
|
6
6
|
[](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
|
|
7
7
|
[](LICENSE)
|
|
8
8
|
|
|
9
|
-
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), or your own adapter.
|
|
9
|
+
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent), [Gemini CLI](https://github.com/google-gemini/gemini-cli), or your own adapter.
|
|
10
10
|
|
|
11
11
|
> **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
|
|
12
12
|
|
|
@@ -18,6 +18,7 @@ Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agen
|
|
|
18
18
|
- [Pi](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent) (`pi` CLI)
|
|
19
19
|
- [GitHub Copilot](https://github.com/features/copilot) (`copilot` CLI)
|
|
20
20
|
- [Codex](https://github.com/openai/codex) (`codex` CLI)
|
|
21
|
+
- [Gemini CLI](https://github.com/google-gemini/gemini-cli) (`gemini` CLI)
|
|
21
22
|
|
|
22
23
|
```bash
|
|
23
24
|
bun install -g @os-eco/overstory-cli
|
|
@@ -80,7 +81,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
80
81
|
| Command | Description |
|
|
81
82
|
|---------|-------------|
|
|
82
83
|
| `ov init` | Initialize `.overstory/` and bootstrap os-eco tools (`--yes`, `--name`, `--tools`, `--skip-mulch`, `--skip-seeds`, `--skip-canopy`, `--skip-onboard`, `--json`) |
|
|
83
|
-
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--json`) |
|
|
84
|
+
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--base-branch`, `--json`) |
|
|
84
85
|
| `ov stop <agent-name>` | Terminate a running agent (`--clean-worktree`, `--json`) |
|
|
85
86
|
| `ov prime` | Load context for orchestrator/agent (`--agent`, `--compact`) |
|
|
86
87
|
| `ov spec write <task-id>` | Write a task specification (`--body`) |
|
|
@@ -175,6 +176,7 @@ Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types
|
|
|
175
176
|
| Pi | `pi` | `.pi/extensions/` guard extension | Active development |
|
|
176
177
|
| Copilot | `copilot` | (none — `--allow-all-tools`) | Active development |
|
|
177
178
|
| Codex | `codex` | OS-level sandbox (Seatbelt/Landlock) | Active development |
|
|
179
|
+
| Gemini | `gemini` | `--sandbox` flag | Active development |
|
|
178
180
|
|
|
179
181
|
## How It Works
|
|
180
182
|
|
|
@@ -271,7 +273,7 @@ overstory/
|
|
|
271
273
|
metrics/ SQLite metrics + pricing + transcript parsing
|
|
272
274
|
doctor/ Health check modules (11 checks)
|
|
273
275
|
insights/ Session insight analyzer for auto-expertise
|
|
274
|
-
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex)
|
|
276
|
+
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi, Copilot, Codex, Gemini)
|
|
275
277
|
tracker/ Pluggable task tracker (beads + seeds backends)
|
|
276
278
|
mulch/ mulch client (programmatic API + CLI wrapper)
|
|
277
279
|
e2e/ End-to-end lifecycle tests
|
|
@@ -279,6 +281,106 @@ overstory/
|
|
|
279
281
|
templates/ Templates for overlays and hooks
|
|
280
282
|
```
|
|
281
283
|
|
|
284
|
+
## Configuration
|
|
285
|
+
|
|
286
|
+
### Gateway Providers
|
|
287
|
+
|
|
288
|
+
Route agent API calls through custom gateway endpoints (z.ai, OpenRouter, self-hosted proxies). Configure providers in `.overstory/config.yaml`:
|
|
289
|
+
|
|
290
|
+
```yaml
|
|
291
|
+
providers:
|
|
292
|
+
anthropic:
|
|
293
|
+
type: native
|
|
294
|
+
zai:
|
|
295
|
+
type: gateway
|
|
296
|
+
baseUrl: https://api.z.ai/v1
|
|
297
|
+
authTokenEnv: ZAI_API_KEY
|
|
298
|
+
openrouter:
|
|
299
|
+
type: gateway
|
|
300
|
+
baseUrl: https://openrouter.ai/api/v1
|
|
301
|
+
authTokenEnv: OPENROUTER_API_KEY
|
|
302
|
+
models:
|
|
303
|
+
builder: zai/claude-sonnet-4-6
|
|
304
|
+
scout: openrouter/openai/gpt-4o
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
**How it works:** Model refs use `provider/model-id` format. Overstory sets `ANTHROPIC_BASE_URL` to the gateway `baseUrl`, `ANTHROPIC_AUTH_TOKEN` from the env var named in `authTokenEnv`, and `ANTHROPIC_API_KEY=""` to prevent direct Anthropic calls. The agent receives `"sonnet"` as a model alias and Claude Code routes via env vars.
|
|
308
|
+
|
|
309
|
+
**Environment variable notes:**
|
|
310
|
+
- `ANTHROPIC_AUTH_TOKEN` and `ANTHROPIC_API_KEY` are mutually exclusive per-agent
|
|
311
|
+
- Gateway agents get `ANTHROPIC_API_KEY=""` and `ANTHROPIC_AUTH_TOKEN` from provider config
|
|
312
|
+
- Direct Anthropic API calls (merge resolver, watchdog triage) still need `ANTHROPIC_API_KEY` in the orchestrator env
|
|
313
|
+
|
|
314
|
+
**Validation:** `ov doctor --category providers` checks reachability, auth tokens, model-provider refs, and tool-use compatibility.
|
|
315
|
+
|
|
316
|
+
**`ProviderConfig` fields:**
|
|
317
|
+
|
|
318
|
+
| Field | Type | Required | Description |
|
|
319
|
+
|-------|------|----------|-------------|
|
|
320
|
+
| `type` | `native` or `gateway` | Yes | Provider type |
|
|
321
|
+
| `baseUrl` | string | Gateway only | API endpoint URL |
|
|
322
|
+
| `authTokenEnv` | string | Gateway only | Env var name holding auth token |
|
|
323
|
+
|
|
324
|
+
## Troubleshooting
|
|
325
|
+
|
|
326
|
+
### Coordinator died during startup
|
|
327
|
+
|
|
328
|
+
This error means the coordinator tmux session exited before the TUI became ready. The most common cause is slow shell initialization.
|
|
329
|
+
|
|
330
|
+
**Step 1: Measure shell startup time**
|
|
331
|
+
|
|
332
|
+
```bash
|
|
333
|
+
time zsh -i -c exit # For zsh
|
|
334
|
+
time bash -i -c exit # For bash
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
If startup takes more than 1 second, slow shell init is likely the cause.
|
|
338
|
+
|
|
339
|
+
**Step 2: Common slow-startup causes**
|
|
340
|
+
|
|
341
|
+
| Cause | Typical delay | Fix |
|
|
342
|
+
|-------|---------------|-----|
|
|
343
|
+
| oh-my-zsh with many plugins | 1-5s | Reduce plugins, switch to lighter framework (zinit with lazy loading) |
|
|
344
|
+
| nvm (Node Version Manager) | 1-3s | Use `--no-use` + lazy-load nvm, or switch to fnm/volta |
|
|
345
|
+
| pyenv init | 0.5-2s | Lazy-load pyenv |
|
|
346
|
+
| rbenv init | 0.5-1s | Lazy-load rbenv |
|
|
347
|
+
| starship prompt | 0.5-1s | Check starship timings |
|
|
348
|
+
| conda auto-activate | 1-3s | `auto_activate_base: false` in `.condarc` |
|
|
349
|
+
| Homebrew shellenv | 0.5-1s | Cache output instead of evaluating every shell start |
|
|
350
|
+
|
|
351
|
+
**Step 3: Configure `shellInitDelayMs`** in `.overstory/config.yaml`:
|
|
352
|
+
|
|
353
|
+
```yaml
|
|
354
|
+
runtime:
|
|
355
|
+
shellInitDelayMs: 3000
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
- Default: `0` (no delay)
|
|
359
|
+
- Typical values: `1000`–`5000` depending on shell startup time
|
|
360
|
+
- Values above `30000` (30s) trigger a warning
|
|
361
|
+
- Inserts a delay between tmux session creation and TUI readiness polling
|
|
362
|
+
|
|
363
|
+
**Step 4: Optimization examples**
|
|
364
|
+
|
|
365
|
+
Lazy-load nvm (add to `~/.zshrc` or `~/.bashrc`):
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
# Lazy-load nvm — only activates when you first call nvm/node/npm
|
|
369
|
+
export NVM_DIR="$HOME/.nvm"
|
|
370
|
+
nvm() { unset -f nvm node npm npx; [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"; nvm "$@"; }
|
|
371
|
+
node() { unset -f nvm node npm npx; [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"; node "$@"; }
|
|
372
|
+
npm() { unset -f nvm node npm npx; [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"; npm "$@"; }
|
|
373
|
+
npx() { unset -f nvm node npm npx; [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh"; npx "$@"; }
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
Reduce oh-my-zsh plugins (edit `~/.zshrc`):
|
|
377
|
+
|
|
378
|
+
```bash
|
|
379
|
+
# Before: plugins=(git zsh-autosuggestions zsh-syntax-highlighting node npm python ruby rbenv pyenv ...)
|
|
380
|
+
# After: keep only what you use regularly
|
|
381
|
+
plugins=(git)
|
|
382
|
+
```
|
|
383
|
+
|
|
282
384
|
## Part of os-eco
|
|
283
385
|
|
|
284
386
|
Overstory is part of the [os-eco](https://github.com/jayminwest/os-eco) AI agent tooling ecosystem.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.9",
|
|
4
4
|
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
@@ -5,7 +5,12 @@ import { join } from "node:path";
|
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
6
|
import { cleanupTempDir } from "../test-helpers.ts";
|
|
7
7
|
import type { AgentManifest, OverstoryConfig } from "../types.ts";
|
|
8
|
-
import {
|
|
8
|
+
import {
|
|
9
|
+
createManifestLoader,
|
|
10
|
+
expandAliasFromEnv,
|
|
11
|
+
resolveModel,
|
|
12
|
+
resolveProviderEnv,
|
|
13
|
+
} from "./manifest.ts";
|
|
9
14
|
|
|
10
15
|
const VALID_MANIFEST = {
|
|
11
16
|
version: "1.0",
|
|
@@ -673,6 +678,168 @@ describe("resolveModel", () => {
|
|
|
673
678
|
});
|
|
674
679
|
});
|
|
675
680
|
|
|
681
|
+
describe("expandAliasFromEnv", () => {
|
|
682
|
+
test("returns expanded model ID when env var is set", () => {
|
|
683
|
+
expect(
|
|
684
|
+
expandAliasFromEnv("haiku", {
|
|
685
|
+
ANTHROPIC_DEFAULT_HAIKU_MODEL: "us.anthropic.claude-3-5-haiku-20241022-v1:0",
|
|
686
|
+
}),
|
|
687
|
+
).toBe("us.anthropic.claude-3-5-haiku-20241022-v1:0");
|
|
688
|
+
});
|
|
689
|
+
|
|
690
|
+
test("returns alias unchanged when env var is unset", () => {
|
|
691
|
+
expect(expandAliasFromEnv("haiku", {})).toBe("haiku");
|
|
692
|
+
});
|
|
693
|
+
|
|
694
|
+
test("expands all three aliases via their env vars", () => {
|
|
695
|
+
const env = {
|
|
696
|
+
ANTHROPIC_DEFAULT_HAIKU_MODEL: "bedrock-haiku-id",
|
|
697
|
+
ANTHROPIC_DEFAULT_SONNET_MODEL: "bedrock-sonnet-id",
|
|
698
|
+
ANTHROPIC_DEFAULT_OPUS_MODEL: "bedrock-opus-id",
|
|
699
|
+
};
|
|
700
|
+
expect(expandAliasFromEnv("haiku", env)).toBe("bedrock-haiku-id");
|
|
701
|
+
expect(expandAliasFromEnv("sonnet", env)).toBe("bedrock-sonnet-id");
|
|
702
|
+
expect(expandAliasFromEnv("opus", env)).toBe("bedrock-opus-id");
|
|
703
|
+
});
|
|
704
|
+
|
|
705
|
+
test("trims whitespace from env var value", () => {
|
|
706
|
+
expect(
|
|
707
|
+
expandAliasFromEnv("sonnet", {
|
|
708
|
+
ANTHROPIC_DEFAULT_SONNET_MODEL: " bedrock-sonnet-id ",
|
|
709
|
+
}),
|
|
710
|
+
).toBe("bedrock-sonnet-id");
|
|
711
|
+
});
|
|
712
|
+
|
|
713
|
+
test("returns alias when env var is empty string", () => {
|
|
714
|
+
expect(expandAliasFromEnv("sonnet", { ANTHROPIC_DEFAULT_SONNET_MODEL: "" })).toBe("sonnet");
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
test("returns alias when env var is whitespace only", () => {
|
|
718
|
+
expect(expandAliasFromEnv("sonnet", { ANTHROPIC_DEFAULT_SONNET_MODEL: " " })).toBe("sonnet");
|
|
719
|
+
});
|
|
720
|
+
|
|
721
|
+
test("returns unknown alias unchanged", () => {
|
|
722
|
+
expect(expandAliasFromEnv("gpt-4", {})).toBe("gpt-4");
|
|
723
|
+
});
|
|
724
|
+
});
|
|
725
|
+
|
|
726
|
+
describe("resolveModel env var expansion", () => {
|
|
727
|
+
const baseManifest: AgentManifest = {
|
|
728
|
+
version: "1.0",
|
|
729
|
+
agents: {
|
|
730
|
+
scout: {
|
|
731
|
+
file: "scout.md",
|
|
732
|
+
model: "haiku",
|
|
733
|
+
tools: ["Read"],
|
|
734
|
+
capabilities: ["explore"],
|
|
735
|
+
canSpawn: false,
|
|
736
|
+
constraints: [],
|
|
737
|
+
},
|
|
738
|
+
builder: {
|
|
739
|
+
file: "builder.md",
|
|
740
|
+
model: "sonnet",
|
|
741
|
+
tools: ["Read", "Write"],
|
|
742
|
+
capabilities: ["implement"],
|
|
743
|
+
canSpawn: false,
|
|
744
|
+
constraints: [],
|
|
745
|
+
},
|
|
746
|
+
},
|
|
747
|
+
capabilityIndex: { explore: ["scout"], implement: ["builder"] },
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
function makeConfig(models: OverstoryConfig["models"] = {}): OverstoryConfig {
|
|
751
|
+
return {
|
|
752
|
+
project: { name: "test", root: "/tmp/test", canonicalBranch: "main" },
|
|
753
|
+
agents: {
|
|
754
|
+
manifestPath: ".overstory/agent-manifest.json",
|
|
755
|
+
baseDir: ".overstory/agent-defs",
|
|
756
|
+
maxConcurrent: 5,
|
|
757
|
+
staggerDelayMs: 1000,
|
|
758
|
+
maxDepth: 2,
|
|
759
|
+
maxSessionsPerRun: 0,
|
|
760
|
+
maxAgentsPerLead: 5,
|
|
761
|
+
},
|
|
762
|
+
worktrees: { baseDir: ".overstory/worktrees" },
|
|
763
|
+
taskTracker: { backend: "auto", enabled: false },
|
|
764
|
+
mulch: { enabled: false, domains: [], primeFormat: "markdown" },
|
|
765
|
+
merge: { aiResolveEnabled: false, reimagineEnabled: false },
|
|
766
|
+
providers: { anthropic: { type: "native" } },
|
|
767
|
+
watchdog: {
|
|
768
|
+
tier0Enabled: false,
|
|
769
|
+
tier0IntervalMs: 30000,
|
|
770
|
+
tier1Enabled: false,
|
|
771
|
+
tier2Enabled: false,
|
|
772
|
+
staleThresholdMs: 300000,
|
|
773
|
+
zombieThresholdMs: 600000,
|
|
774
|
+
nudgeIntervalMs: 60000,
|
|
775
|
+
},
|
|
776
|
+
models,
|
|
777
|
+
logging: { verbose: false, redactSecrets: true },
|
|
778
|
+
};
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
test("expands alias when env var is set", () => {
|
|
782
|
+
const saved = process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL;
|
|
783
|
+
process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL = "us.anthropic.claude-3-5-haiku-20241022-v1:0";
|
|
784
|
+
try {
|
|
785
|
+
const result = resolveModel(makeConfig(), baseManifest, "scout", "sonnet");
|
|
786
|
+
expect(result).toEqual({ model: "us.anthropic.claude-3-5-haiku-20241022-v1:0" });
|
|
787
|
+
} finally {
|
|
788
|
+
if (saved === undefined) {
|
|
789
|
+
delete process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL;
|
|
790
|
+
} else {
|
|
791
|
+
process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL = saved;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
});
|
|
795
|
+
|
|
796
|
+
test("passes alias through when env var is unset", () => {
|
|
797
|
+
const saved = process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL;
|
|
798
|
+
delete process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL;
|
|
799
|
+
try {
|
|
800
|
+
const result = resolveModel(makeConfig(), baseManifest, "scout", "sonnet");
|
|
801
|
+
expect(result).toEqual({ model: "haiku" });
|
|
802
|
+
} finally {
|
|
803
|
+
if (saved !== undefined) {
|
|
804
|
+
process.env.ANTHROPIC_DEFAULT_HAIKU_MODEL = saved;
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
});
|
|
808
|
+
|
|
809
|
+
test("config override to full model ID is not affected by env vars", () => {
|
|
810
|
+
const saved = process.env.ANTHROPIC_DEFAULT_SONNET_MODEL;
|
|
811
|
+
process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = "bedrock-sonnet";
|
|
812
|
+
try {
|
|
813
|
+
// Config overrides to a direct model string (not an alias)
|
|
814
|
+
const config = makeConfig({ builder: "claude-3-5-sonnet-20241022" });
|
|
815
|
+
const result = resolveModel(config, baseManifest, "builder", "haiku");
|
|
816
|
+
expect(result).toEqual({ model: "claude-3-5-sonnet-20241022" });
|
|
817
|
+
} finally {
|
|
818
|
+
if (saved === undefined) {
|
|
819
|
+
delete process.env.ANTHROPIC_DEFAULT_SONNET_MODEL;
|
|
820
|
+
} else {
|
|
821
|
+
process.env.ANTHROPIC_DEFAULT_SONNET_MODEL = saved;
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
});
|
|
825
|
+
|
|
826
|
+
test("config override to alias also expands via env var", () => {
|
|
827
|
+
const saved = process.env.ANTHROPIC_DEFAULT_OPUS_MODEL;
|
|
828
|
+
process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = "bedrock-opus-id";
|
|
829
|
+
try {
|
|
830
|
+
const config = makeConfig({ scout: "opus" });
|
|
831
|
+
const result = resolveModel(config, baseManifest, "scout", "haiku");
|
|
832
|
+
expect(result).toEqual({ model: "bedrock-opus-id" });
|
|
833
|
+
} finally {
|
|
834
|
+
if (saved === undefined) {
|
|
835
|
+
delete process.env.ANTHROPIC_DEFAULT_OPUS_MODEL;
|
|
836
|
+
} else {
|
|
837
|
+
process.env.ANTHROPIC_DEFAULT_OPUS_MODEL = saved;
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
});
|
|
841
|
+
});
|
|
842
|
+
|
|
676
843
|
describe("resolveProviderEnv", () => {
|
|
677
844
|
test("returns null for unknown provider", () => {
|
|
678
845
|
const result = resolveProviderEnv("unknown", "some/model", {});
|
package/src/agents/manifest.ts
CHANGED
|
@@ -34,6 +34,27 @@ interface RawManifest {
|
|
|
34
34
|
|
|
35
35
|
const MODEL_ALIASES = new Set(["sonnet", "opus", "haiku"]);
|
|
36
36
|
|
|
37
|
+
// Env var mapping: alias → ANTHROPIC_DEFAULT_{ALIAS}_MODEL
|
|
38
|
+
const ALIAS_ENV_VARS: Record<string, string> = {
|
|
39
|
+
haiku: "ANTHROPIC_DEFAULT_HAIKU_MODEL",
|
|
40
|
+
sonnet: "ANTHROPIC_DEFAULT_SONNET_MODEL",
|
|
41
|
+
opus: "ANTHROPIC_DEFAULT_OPUS_MODEL",
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Expand a model alias via its corresponding ANTHROPIC_DEFAULT_{ALIAS}_MODEL env var.
|
|
46
|
+
* Returns the env var value if set, otherwise the original alias.
|
|
47
|
+
*/
|
|
48
|
+
export function expandAliasFromEnv(
|
|
49
|
+
alias: string,
|
|
50
|
+
env: Record<string, string | undefined> = process.env as Record<string, string | undefined>,
|
|
51
|
+
): string {
|
|
52
|
+
const envVar = ALIAS_ENV_VARS[alias];
|
|
53
|
+
if (!envVar) return alias;
|
|
54
|
+
const value = env[envVar];
|
|
55
|
+
return value?.trim() || alias;
|
|
56
|
+
}
|
|
57
|
+
|
|
37
58
|
/**
|
|
38
59
|
* Validate that a raw parsed object conforms to the AgentDefinition shape.
|
|
39
60
|
* Returns a list of error messages for any violations.
|
|
@@ -333,9 +354,9 @@ export function resolveModel(
|
|
|
333
354
|
const configModel = config.models[role];
|
|
334
355
|
const rawModel = configModel ?? manifest.agents[role]?.model ?? fallback;
|
|
335
356
|
|
|
336
|
-
// Simple alias —
|
|
357
|
+
// Simple alias — expand via env var if set (e.g. ANTHROPIC_DEFAULT_SONNET_MODEL)
|
|
337
358
|
if (MODEL_ALIASES.has(rawModel)) {
|
|
338
|
-
return { model: rawModel };
|
|
359
|
+
return { model: expandAliasFromEnv(rawModel) };
|
|
339
360
|
}
|
|
340
361
|
|
|
341
362
|
// Provider-prefixed: split on first "/" to get provider name and model ID
|
package/src/commands/agents.ts
CHANGED
|
@@ -38,6 +38,7 @@ interface TmuxCallTracker {
|
|
|
38
38
|
env?: Record<string, string>;
|
|
39
39
|
}>;
|
|
40
40
|
isSessionAlive: Array<{ name: string; result: boolean }>;
|
|
41
|
+
checkSessionState: Array<{ name: string; result: "alive" | "dead" | "no_server" }>;
|
|
41
42
|
killSession: Array<{ name: string }>;
|
|
42
43
|
sendKeys: Array<{ name: string; keys: string }>;
|
|
43
44
|
waitForTuiReady: Array<{ name: string }>;
|
|
@@ -68,6 +69,7 @@ function makeFakeTmux(
|
|
|
68
69
|
options: {
|
|
69
70
|
waitForTuiReadyResult?: boolean;
|
|
70
71
|
ensureTmuxAvailableError?: Error;
|
|
72
|
+
checkSessionStateMap?: Record<string, "alive" | "dead" | "no_server">;
|
|
71
73
|
} = {},
|
|
72
74
|
): {
|
|
73
75
|
tmux: NonNullable<CoordinatorDeps["_tmux"]>;
|
|
@@ -76,6 +78,7 @@ function makeFakeTmux(
|
|
|
76
78
|
const calls: TmuxCallTracker = {
|
|
77
79
|
createSession: [],
|
|
78
80
|
isSessionAlive: [],
|
|
81
|
+
checkSessionState: [],
|
|
79
82
|
killSession: [],
|
|
80
83
|
sendKeys: [],
|
|
81
84
|
waitForTuiReady: [],
|
|
@@ -97,6 +100,13 @@ function makeFakeTmux(
|
|
|
97
100
|
calls.isSessionAlive.push({ name, result: alive });
|
|
98
101
|
return alive;
|
|
99
102
|
},
|
|
103
|
+
checkSessionState: async (name: string): Promise<"alive" | "dead" | "no_server"> => {
|
|
104
|
+
const stateMap = options.checkSessionStateMap ?? {};
|
|
105
|
+
// Default: derive from sessionAliveMap for backwards compat
|
|
106
|
+
const state = stateMap[name] ?? (sessionAliveMap[name] ? "alive" : "dead");
|
|
107
|
+
calls.checkSessionState.push({ name, result: state });
|
|
108
|
+
return state;
|
|
109
|
+
},
|
|
100
110
|
killSession: async (name: string): Promise<void> => {
|
|
101
111
|
calls.killSession.push({ name });
|
|
102
112
|
},
|
|
@@ -325,7 +335,11 @@ function makeDeps(
|
|
|
325
335
|
sessionAliveMap: Record<string, boolean> = {},
|
|
326
336
|
watchdogConfig?: { running?: boolean; startSuccess?: boolean; stopSuccess?: boolean },
|
|
327
337
|
monitorConfig?: { running?: boolean; startSuccess?: boolean; stopSuccess?: boolean },
|
|
328
|
-
tmuxOptions?: {
|
|
338
|
+
tmuxOptions?: {
|
|
339
|
+
waitForTuiReadyResult?: boolean;
|
|
340
|
+
ensureTmuxAvailableError?: Error;
|
|
341
|
+
checkSessionStateMap?: Record<string, "alive" | "dead" | "no_server">;
|
|
342
|
+
},
|
|
329
343
|
): {
|
|
330
344
|
deps: CoordinatorDeps;
|
|
331
345
|
calls: TmuxCallTracker;
|
|
@@ -606,7 +620,7 @@ describe("startCoordinator", () => {
|
|
|
606
620
|
|
|
607
621
|
test("rejects duplicate when coordinator is already running", async () => {
|
|
608
622
|
// Write an existing active coordinator session
|
|
609
|
-
const existing = makeCoordinatorSession({ state: "working" });
|
|
623
|
+
const existing = makeCoordinatorSession({ state: "working", pid: process.pid });
|
|
610
624
|
saveSessionsToDb([existing]);
|
|
611
625
|
|
|
612
626
|
// Mock tmux as alive for the existing session
|
|
@@ -623,6 +637,29 @@ describe("startCoordinator", () => {
|
|
|
623
637
|
}
|
|
624
638
|
});
|
|
625
639
|
|
|
640
|
+
test("rejects duplicate when pid is null but tmux session is alive", async () => {
|
|
641
|
+
// Session has null pid (e.g. migrated from older schema) but tmux is alive.
|
|
642
|
+
// Cannot prove it's a zombie without a pid, so treat as active.
|
|
643
|
+
const existing = makeCoordinatorSession({ state: "working", pid: null });
|
|
644
|
+
saveSessionsToDb([existing]);
|
|
645
|
+
|
|
646
|
+
const { deps } = makeDeps(
|
|
647
|
+
{ "overstory-test-project-coordinator": true },
|
|
648
|
+
undefined,
|
|
649
|
+
undefined,
|
|
650
|
+
{ checkSessionStateMap: { "overstory-test-project-coordinator": "alive" } },
|
|
651
|
+
);
|
|
652
|
+
|
|
653
|
+
try {
|
|
654
|
+
await coordinatorCommand(["start"], deps);
|
|
655
|
+
expect(true).toBe(false); // Should have thrown
|
|
656
|
+
} catch (err) {
|
|
657
|
+
expect(err).toBeInstanceOf(AgentError);
|
|
658
|
+
const ae = err as AgentError;
|
|
659
|
+
expect(ae.message).toContain("already running");
|
|
660
|
+
}
|
|
661
|
+
});
|
|
662
|
+
|
|
626
663
|
test("cleans up dead session and starts new one", async () => {
|
|
627
664
|
// Write an existing session that claims to be working
|
|
628
665
|
const deadSession = makeCoordinatorSession({
|
|
@@ -656,6 +693,98 @@ describe("startCoordinator", () => {
|
|
|
656
693
|
expect(newSession?.id).not.toBe("session-dead-coordinator");
|
|
657
694
|
});
|
|
658
695
|
|
|
696
|
+
test("cleans up zombie session when tmux alive but PID dead", async () => {
|
|
697
|
+
// Session is "working" in DB, tmux session exists, but the PID is dead
|
|
698
|
+
const zombieSession = makeCoordinatorSession({
|
|
699
|
+
id: "session-zombie-coordinator",
|
|
700
|
+
state: "working",
|
|
701
|
+
pid: 999999, // Non-existent PID
|
|
702
|
+
});
|
|
703
|
+
saveSessionsToDb([zombieSession]);
|
|
704
|
+
|
|
705
|
+
// Tmux session is alive (pane exists) but PID 999999 is not running
|
|
706
|
+
const { deps } = makeDeps(
|
|
707
|
+
{ "overstory-test-project-coordinator": true },
|
|
708
|
+
undefined,
|
|
709
|
+
undefined,
|
|
710
|
+
{ checkSessionStateMap: { "overstory-test-project-coordinator": "alive" } },
|
|
711
|
+
);
|
|
712
|
+
|
|
713
|
+
const originalSleep = Bun.sleep;
|
|
714
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
715
|
+
|
|
716
|
+
try {
|
|
717
|
+
await captureStdout(() => coordinatorCommand(["start"], deps));
|
|
718
|
+
} finally {
|
|
719
|
+
Bun.sleep = originalSleep;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
// Zombie session should be cleaned up and new one created
|
|
723
|
+
const sessions = loadSessionsFromDb();
|
|
724
|
+
expect(sessions).toHaveLength(1);
|
|
725
|
+
const newSession = sessions[0];
|
|
726
|
+
expect(newSession?.state).toBe("booting");
|
|
727
|
+
expect(newSession?.id).not.toBe("session-zombie-coordinator");
|
|
728
|
+
});
|
|
729
|
+
|
|
730
|
+
test("cleans up stale session when tmux server is not running", async () => {
|
|
731
|
+
// Session is "booting" in DB but tmux server crashed
|
|
732
|
+
const staleSession = makeCoordinatorSession({
|
|
733
|
+
id: "session-stale-coordinator",
|
|
734
|
+
state: "booting",
|
|
735
|
+
});
|
|
736
|
+
saveSessionsToDb([staleSession]);
|
|
737
|
+
|
|
738
|
+
// checkSessionState returns no_server
|
|
739
|
+
const { deps } = makeDeps(
|
|
740
|
+
{ "overstory-test-project-coordinator": false },
|
|
741
|
+
undefined,
|
|
742
|
+
undefined,
|
|
743
|
+
{ checkSessionStateMap: { "overstory-test-project-coordinator": "no_server" } },
|
|
744
|
+
);
|
|
745
|
+
|
|
746
|
+
const originalSleep = Bun.sleep;
|
|
747
|
+
Bun.sleep = (() => Promise.resolve()) as typeof Bun.sleep;
|
|
748
|
+
|
|
749
|
+
try {
|
|
750
|
+
await captureStdout(() => coordinatorCommand(["start"], deps));
|
|
751
|
+
} finally {
|
|
752
|
+
Bun.sleep = originalSleep;
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// Stale session cleaned up, new one created
|
|
756
|
+
const sessions = loadSessionsFromDb();
|
|
757
|
+
expect(sessions).toHaveLength(1);
|
|
758
|
+
const newSession = sessions[0];
|
|
759
|
+
expect(newSession?.state).toBe("booting");
|
|
760
|
+
expect(newSession?.id).not.toBe("session-stale-coordinator");
|
|
761
|
+
});
|
|
762
|
+
|
|
763
|
+
test("respects shellInitDelayMs config before polling TUI readiness", async () => {
|
|
764
|
+
// Append shellInitDelayMs to existing config (preserve tier2Enabled etc.)
|
|
765
|
+
const configPath = join(tempDir, ".overstory", "config.yaml");
|
|
766
|
+
const existing = await Bun.file(configPath).text();
|
|
767
|
+
await Bun.write(configPath, `${existing}\nruntime:\n shellInitDelayMs: 500\n`);
|
|
768
|
+
|
|
769
|
+
const { deps } = makeDeps();
|
|
770
|
+
|
|
771
|
+
const sleepCalls: number[] = [];
|
|
772
|
+
const originalSleep = Bun.sleep;
|
|
773
|
+
Bun.sleep = ((ms: number | Date) => {
|
|
774
|
+
if (typeof ms === "number") sleepCalls.push(ms);
|
|
775
|
+
return Promise.resolve();
|
|
776
|
+
}) as typeof Bun.sleep;
|
|
777
|
+
|
|
778
|
+
try {
|
|
779
|
+
await captureStdout(() => coordinatorCommand(["start"], deps));
|
|
780
|
+
} finally {
|
|
781
|
+
Bun.sleep = originalSleep;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// The 500ms shell init delay should appear in the sleep calls
|
|
785
|
+
expect(sleepCalls).toContain(500);
|
|
786
|
+
});
|
|
787
|
+
|
|
659
788
|
test("throws AgentError when tmux is not available", async () => {
|
|
660
789
|
const { deps } = makeDeps({}, undefined, undefined, {
|
|
661
790
|
ensureTmuxAvailableError: new AgentError(
|
|
@@ -27,7 +27,9 @@ import { createRunStore } from "../sessions/store.ts";
|
|
|
27
27
|
import { resolveBackend, trackerCliName } from "../tracker/factory.ts";
|
|
28
28
|
import type { AgentSession } from "../types.ts";
|
|
29
29
|
import { isProcessRunning } from "../watchdog/health.ts";
|
|
30
|
+
import type { SessionState } from "../worktree/tmux.ts";
|
|
30
31
|
import {
|
|
32
|
+
checkSessionState,
|
|
31
33
|
createSession,
|
|
32
34
|
ensureTmuxAvailable,
|
|
33
35
|
isSessionAlive,
|
|
@@ -58,6 +60,7 @@ export interface CoordinatorDeps {
|
|
|
58
60
|
env?: Record<string, string>,
|
|
59
61
|
) => Promise<number>;
|
|
60
62
|
isSessionAlive: (name: string) => Promise<boolean>;
|
|
63
|
+
checkSessionState: (name: string) => Promise<SessionState>;
|
|
61
64
|
killSession: (name: string) => Promise<void>;
|
|
62
65
|
sendKeys: (name: string, keys: string) => Promise<void>;
|
|
63
66
|
waitForTuiReady: (
|
|
@@ -275,6 +278,7 @@ async function startCoordinator(
|
|
|
275
278
|
const tmux = deps._tmux ?? {
|
|
276
279
|
createSession,
|
|
277
280
|
isSessionAlive,
|
|
281
|
+
checkSessionState,
|
|
278
282
|
killSession,
|
|
279
283
|
sendKeys,
|
|
280
284
|
waitForTuiReady,
|
|
@@ -308,15 +312,29 @@ async function startCoordinator(
|
|
|
308
312
|
existing.state !== "completed" &&
|
|
309
313
|
existing.state !== "zombie"
|
|
310
314
|
) {
|
|
311
|
-
const
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
)
|
|
315
|
+
const sessionState = await tmux.checkSessionState(existing.tmuxSession);
|
|
316
|
+
|
|
317
|
+
if (sessionState === "alive") {
|
|
318
|
+
// Tmux session exists -- but is the process inside still running?
|
|
319
|
+
// A crashed Claude Code leaves a zombie tmux pane that blocks retries.
|
|
320
|
+
if (existing.pid !== null && !isProcessRunning(existing.pid)) {
|
|
321
|
+
// Zombie: tmux pane exists but agent process has exited.
|
|
322
|
+
// Kill the empty session and reclaim the slot.
|
|
323
|
+
await tmux.killSession(existing.tmuxSession);
|
|
324
|
+
store.updateState(COORDINATOR_NAME, "completed");
|
|
325
|
+
} else {
|
|
326
|
+
// Either the process is genuinely running (pid alive), or pid is null
|
|
327
|
+
// (e.g. sessions migrated from an older schema). In both cases we
|
|
328
|
+
// cannot prove the session is a zombie, so treat it as active.
|
|
329
|
+
throw new AgentError(
|
|
330
|
+
`Coordinator is already running (tmux: ${existing.tmuxSession}, since: ${existing.startedAt})`,
|
|
331
|
+
{ agentName: COORDINATOR_NAME },
|
|
332
|
+
);
|
|
333
|
+
}
|
|
334
|
+
} else {
|
|
335
|
+
// Session is dead or tmux server is not running -- clean up stale DB entry.
|
|
336
|
+
store.updateState(COORDINATOR_NAME, "completed");
|
|
317
337
|
}
|
|
318
|
-
// Session recorded but tmux is dead — mark as completed and continue
|
|
319
|
-
store.updateState(COORDINATOR_NAME, "completed");
|
|
320
338
|
}
|
|
321
339
|
|
|
322
340
|
// Resolve model and runtime early (needed for deployConfig and spawn)
|
|
@@ -413,6 +431,12 @@ async function startCoordinator(
|
|
|
413
431
|
|
|
414
432
|
store.upsert(session);
|
|
415
433
|
|
|
434
|
+
// Give slow shells time to finish initializing before polling for TUI readiness.
|
|
435
|
+
const shellDelay = config.runtime?.shellInitDelayMs ?? 0;
|
|
436
|
+
if (shellDelay > 0) {
|
|
437
|
+
await Bun.sleep(shellDelay);
|
|
438
|
+
}
|
|
439
|
+
|
|
416
440
|
// Wait for Claude Code TUI to render before sending input
|
|
417
441
|
const tuiReady = await tmux.waitForTuiReady(tmuxSession, (content) =>
|
|
418
442
|
runtime.detectReady(content),
|
|
@@ -423,8 +447,13 @@ async function startCoordinator(
|
|
|
423
447
|
if (!alive) {
|
|
424
448
|
// Clean up the stale session record
|
|
425
449
|
store.updateState(COORDINATOR_NAME, "completed");
|
|
450
|
+
const sessionState = await tmux.checkSessionState(tmuxSession);
|
|
451
|
+
const detail =
|
|
452
|
+
sessionState === "no_server"
|
|
453
|
+
? "The tmux server is no longer running. It may have crashed or been killed externally."
|
|
454
|
+
: "The Claude Code process may have crashed or exited immediately. Check tmux logs or try running the claude command manually.";
|
|
426
455
|
throw new AgentError(
|
|
427
|
-
`Coordinator tmux session "${tmuxSession}" died during startup.
|
|
456
|
+
`Coordinator tmux session "${tmuxSession}" died during startup. ${detail}`,
|
|
428
457
|
{ agentName: COORDINATOR_NAME },
|
|
429
458
|
);
|
|
430
459
|
}
|
|
@@ -512,6 +541,7 @@ async function stopCoordinator(opts: { json: boolean }, deps: CoordinatorDeps =
|
|
|
512
541
|
const tmux = deps._tmux ?? {
|
|
513
542
|
createSession,
|
|
514
543
|
isSessionAlive,
|
|
544
|
+
checkSessionState,
|
|
515
545
|
killSession,
|
|
516
546
|
sendKeys,
|
|
517
547
|
waitForTuiReady,
|
|
@@ -626,6 +656,7 @@ async function statusCoordinator(
|
|
|
626
656
|
const tmux = deps._tmux ?? {
|
|
627
657
|
createSession,
|
|
628
658
|
isSessionAlive,
|
|
659
|
+
checkSessionState,
|
|
629
660
|
killSession,
|
|
630
661
|
sendKeys,
|
|
631
662
|
waitForTuiReady,
|