@os-eco/overstory-cli 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -9
- package/agents/builder.md +6 -0
- package/agents/coordinator.md +2 -2
- package/agents/lead.md +4 -1
- package/agents/merger.md +3 -2
- package/agents/monitor.md +1 -1
- package/agents/reviewer.md +1 -0
- package/agents/scout.md +1 -0
- package/package.json +2 -2
- package/src/agents/hooks-deployer.test.ts +6 -5
- package/src/agents/identity.test.ts +3 -2
- package/src/agents/manifest.test.ts +4 -3
- package/src/agents/overlay.test.ts +3 -2
- package/src/commands/agents.test.ts +5 -4
- package/src/commands/agents.ts +18 -8
- package/src/commands/completions.test.ts +8 -5
- package/src/commands/completions.ts +37 -1
- package/src/commands/costs.test.ts +4 -3
- package/src/commands/dashboard.test.ts +265 -6
- package/src/commands/dashboard.ts +367 -64
- package/src/commands/doctor.test.ts +3 -2
- package/src/commands/errors.test.ts +3 -2
- package/src/commands/feed.test.ts +3 -2
- package/src/commands/feed.ts +2 -29
- package/src/commands/inspect.test.ts +3 -2
- package/src/commands/log.test.ts +248 -8
- package/src/commands/log.ts +193 -110
- package/src/commands/logs.test.ts +3 -2
- package/src/commands/mail.test.ts +3 -2
- package/src/commands/metrics.test.ts +4 -3
- package/src/commands/nudge.test.ts +3 -2
- package/src/commands/prime.test.ts +3 -2
- package/src/commands/prime.ts +1 -16
- package/src/commands/replay.test.ts +3 -2
- package/src/commands/run.test.ts +2 -1
- package/src/commands/sling.test.ts +127 -0
- package/src/commands/sling.ts +101 -3
- package/src/commands/status.test.ts +8 -8
- package/src/commands/trace.test.ts +3 -2
- package/src/commands/watch.test.ts +3 -2
- package/src/config.test.ts +3 -3
- package/src/doctor/agents.test.ts +3 -2
- package/src/doctor/logs.test.ts +3 -2
- package/src/doctor/structure.test.ts +3 -2
- package/src/index.ts +3 -1
- package/src/logging/color.ts +1 -1
- package/src/logging/format.test.ts +110 -0
- package/src/logging/format.ts +42 -1
- package/src/logging/logger.test.ts +3 -2
- package/src/mail/client.test.ts +3 -2
- package/src/mail/store.test.ts +3 -2
- package/src/merge/queue.test.ts +3 -2
- package/src/merge/resolver.test.ts +39 -0
- package/src/merge/resolver.ts +1 -1
- package/src/metrics/pricing.ts +80 -0
- package/src/metrics/transcript.test.ts +58 -1
- package/src/metrics/transcript.ts +9 -68
- package/src/mulch/client.test.ts +63 -2
- package/src/mulch/client.ts +62 -1
- package/src/runtimes/claude.test.ts +4 -3
- package/src/runtimes/pi-guards.test.ts +55 -2
- package/src/runtimes/pi-guards.ts +26 -9
- package/src/schema-consistency.test.ts +4 -2
- package/src/sessions/compat.test.ts +3 -2
- package/src/sessions/store.test.ts +3 -2
- package/src/test-helpers.ts +20 -1
- package/src/tracker/beads.test.ts +454 -0
- package/src/tracker/seeds.test.ts +461 -0
- package/src/watchdog/daemon.test.ts +4 -3
- package/src/watchdog/triage.test.ts +3 -2
package/README.md
CHANGED
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
# Overstory
|
|
2
2
|
|
|
3
|
-
Multi-agent orchestration for
|
|
3
|
+
Multi-agent orchestration for AI coding agents.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/@os-eco/overstory-cli)
|
|
6
6
|
[](https://github.com/jayminwest/overstory/actions/workflows/ci.yml)
|
|
7
7
|
[](LICENSE)
|
|
8
8
|
|
|
9
|
-
Overstory turns a single
|
|
9
|
+
Overstory turns a single coding session into a multi-agent team by spawning worker agents in git worktrees via tmux, coordinating them through a custom SQLite mail system, and merging their work back with tiered conflict resolution. A pluggable `AgentRuntime` interface lets you swap between runtimes — Claude Code, [Pi](https://github.com/nichochar/pi-coding-agent), or your own adapter.
|
|
10
10
|
|
|
11
11
|
> **Warning: Agent swarms are not a universal solution.** Do not deploy Overstory without understanding the risks of multi-agent orchestration — compounding error rates, cost amplification, debugging complexity, and merge conflicts are the normal case, not edge cases. Read [STEELMAN.md](STEELMAN.md) for a full risk analysis and the [Agentic Engineering Book](https://github.com/jayminwest/agentic-engineering-book) ([web version](https://jayminwest.com/agentic-engineering-book)) before using this tool in production.
|
|
12
12
|
|
|
13
13
|
## Install
|
|
14
14
|
|
|
15
|
-
Requires [Bun](https://bun.sh) v1.0+,
|
|
15
|
+
Requires [Bun](https://bun.sh) v1.0+, git, and tmux. At least one supported agent runtime must be installed:
|
|
16
|
+
|
|
17
|
+
- [Claude Code](https://docs.anthropic.com/en/docs/claude-code) (`claude` CLI)
|
|
18
|
+
- [Pi](https://github.com/nichochar/pi-coding-agent) (`pi` CLI)
|
|
16
19
|
|
|
17
20
|
```bash
|
|
18
21
|
bun install -g @os-eco/overstory-cli
|
|
@@ -75,7 +78,7 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
75
78
|
| Command | Description |
|
|
76
79
|
|---------|-------------|
|
|
77
80
|
| `ov init` | Initialize `.overstory/` in current project (`--yes`, `--name`) |
|
|
78
|
-
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--runtime`, `--json`) |
|
|
81
|
+
| `ov sling <task-id>` | Spawn a worker agent (`--capability`, `--name`, `--spec`, `--files`, `--parent`, `--depth`, `--skip-scout`, `--skip-review`, `--max-agents`, `--dispatch-max-agents`, `--skip-task-check`, `--no-scout-check`, `--runtime`, `--json`) |
|
|
79
82
|
| `ov stop <agent-name>` | Terminate a running agent (`--clean-worktree`, `--json`) |
|
|
80
83
|
| `ov prime` | Load context for orchestrator/agent (`--agent`, `--compact`) |
|
|
81
84
|
| `ov spec write <task-id>` | Write a task specification (`--body`) |
|
|
@@ -158,11 +161,20 @@ Every command supports `--json` where noted. Global flags: `-q`/`--quiet`, `--ti
|
|
|
158
161
|
|
|
159
162
|
## Architecture
|
|
160
163
|
|
|
161
|
-
Overstory uses
|
|
164
|
+
Overstory uses instruction overlays and tool-call guards to turn agent sessions into orchestrated workers. Each agent runs in an isolated git worktree via tmux. Inter-agent messaging is handled by a custom SQLite mail system (WAL mode, ~1-5ms per query) with typed protocol messages and broadcast support. A FIFO merge queue with 4-tier conflict resolution merges agent branches back to canonical. A tiered watchdog system (Tier 0 mechanical daemon, Tier 1 AI-assisted triage, Tier 2 monitor agent) ensures fleet health. See [CLAUDE.md](CLAUDE.md) for full technical details.
|
|
165
|
+
|
|
166
|
+
### Runtime Adapters
|
|
167
|
+
|
|
168
|
+
Overstory is runtime-agnostic. The `AgentRuntime` interface (`src/runtimes/types.ts`) defines the contract — each adapter handles spawning, config deployment, guard enforcement, readiness detection, and transcript parsing for its runtime. Set the default in `config.yaml` or override per-agent with `ov sling --runtime <name>`.
|
|
169
|
+
|
|
170
|
+
| Runtime | CLI | Guard Mechanism | Status |
|
|
171
|
+
|---------|-----|-----------------|--------|
|
|
172
|
+
| Claude Code | `claude` | `settings.local.json` hooks | Stable |
|
|
173
|
+
| Pi | `pi` | `.pi/extensions/` guard extension | Active development |
|
|
162
174
|
|
|
163
175
|
## How It Works
|
|
164
176
|
|
|
165
|
-
|
|
177
|
+
Instruction overlays + tool-call guards + the `ov` CLI turn your coding session into a multi-agent orchestrator. A persistent coordinator agent manages task decomposition and dispatch, while a mechanical watchdog daemon monitors agent health in the background.
|
|
166
178
|
|
|
167
179
|
```
|
|
168
180
|
Coordinator (persistent orchestrator at project root)
|
|
@@ -190,10 +202,10 @@ Coordinator (persistent orchestrator at project root)
|
|
|
190
202
|
- **Worktrees**: Each agent gets an isolated git worktree — no file conflicts between agents
|
|
191
203
|
- **Merge**: FIFO merge queue (SQLite-backed) with 4-tier conflict resolution
|
|
192
204
|
- **Watchdog**: Tiered health monitoring — Tier 0 mechanical daemon (tmux/pid liveness), Tier 1 AI-assisted failure triage, Tier 2 monitor agent for continuous fleet patrol
|
|
193
|
-
- **Tool Enforcement**:
|
|
205
|
+
- **Tool Enforcement**: Runtime-specific guards (hooks for Claude Code, extensions for Pi) mechanically block file modifications for non-implementation agents and dangerous git operations for all agents
|
|
194
206
|
- **Task Groups**: Batch coordination with auto-close when all member issues complete
|
|
195
207
|
- **Session Lifecycle**: Checkpoint save/restore for compaction survivability, handoff orchestration for crash recovery
|
|
196
|
-
- **Token Instrumentation**: Session metrics extracted from
|
|
208
|
+
- **Token Instrumentation**: Session metrics extracted from runtime transcript files (JSONL)
|
|
197
209
|
|
|
198
210
|
## Project Structure
|
|
199
211
|
|
|
@@ -252,7 +264,7 @@ overstory/
|
|
|
252
264
|
merge/ FIFO queue + conflict resolution
|
|
253
265
|
watchdog/ Tiered health monitoring (daemon, triage, health)
|
|
254
266
|
logging/ Multi-format logger + sanitizer + reporter + color control + shared theme/format
|
|
255
|
-
metrics/ SQLite metrics + transcript parsing
|
|
267
|
+
metrics/ SQLite metrics + pricing + transcript parsing
|
|
256
268
|
doctor/ Health check modules (10 checks)
|
|
257
269
|
insights/ Session insight analyzer for auto-expertise
|
|
258
270
|
runtimes/ AgentRuntime abstraction (registry + adapters: Claude, Pi)
|
package/agents/builder.md
CHANGED
|
@@ -54,8 +54,10 @@ Your task-specific context (task ID, file scope, spec path, branch name, parent
|
|
|
54
54
|
5. **Record mulch learnings** -- review your work for insights worth preserving (conventions discovered, patterns applied, failures encountered, decisions made) and record them with outcome data:
|
|
55
55
|
```bash
|
|
56
56
|
ml record <domain> --type <convention|pattern|failure|decision> --description "..." \
|
|
57
|
+
--classification <foundational|tactical|observational> \
|
|
57
58
|
--outcome-status success --outcome-agent $OVERSTORY_AGENT_NAME
|
|
58
59
|
```
|
|
60
|
+
Classification guide: use `foundational` for stable conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for unverified one-off findings.
|
|
59
61
|
This is a required gate, not optional. Every implementation session produces learnings. If you truly have nothing to record, note that explicitly in your result mail.
|
|
60
62
|
6. Send `worker_done` mail to your parent with structured payload:
|
|
61
63
|
```bash
|
|
@@ -99,6 +101,10 @@ You are an implementation specialist. Given a spec and a set of files you own, y
|
|
|
99
101
|
### Expertise
|
|
100
102
|
- **Load context:** `ml prime [domain]` to load domain expertise before implementing
|
|
101
103
|
- **Record patterns:** `ml record <domain>` to capture useful patterns you discover
|
|
104
|
+
- **Classify records:** Always pass `--classification` when recording:
|
|
105
|
+
- `foundational` — core conventions confirmed across multiple sessions (e.g., "all SQLite DBs use WAL mode")
|
|
106
|
+
- `tactical` — session-specific patterns useful for similar tasks (default if omitted)
|
|
107
|
+
- `observational` — one-off findings or unverified hypotheses worth noting
|
|
102
108
|
|
|
103
109
|
## workflow
|
|
104
110
|
|
package/agents/coordinator.md
CHANGED
|
@@ -145,7 +145,7 @@ Coordinator (you, depth 0)
|
|
|
145
145
|
|
|
146
146
|
### Expertise
|
|
147
147
|
- **Load context:** `ml prime [domain]` to understand the problem space before planning
|
|
148
|
-
- **Record insights:** `ml record <domain> --type <type> --description "<insight>"` to capture orchestration patterns, dispatch decisions, and failure learnings
|
|
148
|
+
- **Record insights:** `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"` to capture orchestration patterns, dispatch decisions, and failure learnings. Use `foundational` for stable conventions, `tactical` for session-specific patterns, `observational` for unverified findings.
|
|
149
149
|
- **Search knowledge:** `ml search <query>` to find relevant past decisions
|
|
150
150
|
|
|
151
151
|
## workflow
|
|
@@ -243,7 +243,7 @@ When a batch is complete (task group auto-closed, all issues resolved):
|
|
|
243
243
|
1. Verify all issues are closed: run `{{TRACKER_CLI}} show <id>` for each issue in the group.
|
|
244
244
|
2. Verify all branches are merged: check `ov status` for unmerged branches.
|
|
245
245
|
3. Clean up worktrees: `ov worktree clean --completed`.
|
|
246
|
-
4. Record orchestration insights: `ml record <domain> --type <type> --description "<insight>"`.
|
|
246
|
+
4. Record orchestration insights: `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"`.
|
|
247
247
|
5. Report to the human operator: summarize what was accomplished, what was merged, any issues encountered.
|
|
248
248
|
6. Check for follow-up work: `{{TRACKER_CLI}} ready` to see if new issues surfaced during the batch.
|
|
249
249
|
|
package/agents/lead.md
CHANGED
|
@@ -121,6 +121,7 @@ ov sling <task-id> \
|
|
|
121
121
|
- **Load domain context:** `ml prime [domain]` to understand the problem space before decomposing
|
|
122
122
|
- **Record patterns:** `ml record <domain>` to capture orchestration insights
|
|
123
123
|
- **Record worker insights:** When worker result mails contain notable findings, record them via `ml record` if they represent reusable patterns or conventions.
|
|
124
|
+
- **Classify records:** Always pass `--classification` when recording. Use `foundational` for core conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for one-off findings.
|
|
124
125
|
|
|
125
126
|
## task-complexity-assessment
|
|
126
127
|
|
|
@@ -297,8 +298,10 @@ Good decomposition follows these principles:
|
|
|
297
298
|
3. Run integration tests if applicable: {{QUALITY_GATE_INLINE}}.
|
|
298
299
|
4. **Record mulch learnings** -- review your orchestration work for insights (decomposition strategies, worker coordination patterns, failures encountered, decisions made) and record them:
|
|
299
300
|
```bash
|
|
300
|
-
ml record <domain> --type <convention|pattern|failure|decision> --description "..."
|
|
301
|
+
ml record <domain> --type <convention|pattern|failure|decision> --description "..." \
|
|
302
|
+
--classification <foundational|tactical|observational>
|
|
301
303
|
```
|
|
304
|
+
Classification guide: use `foundational` for stable conventions confirmed across sessions, `tactical` for session-specific patterns (default), `observational` for unverified one-off findings.
|
|
302
305
|
This is required. Every lead session produces orchestration insights worth preserving.
|
|
303
306
|
5. Run `{{TRACKER_CLI}} close <task-id> --reason "<summary of what was accomplished>"`.
|
|
304
307
|
6. Send a `status` mail to the coordinator confirming all subtasks are complete.
|
package/agents/merger.md
CHANGED
|
@@ -51,7 +51,8 @@ Your task-specific context (task ID, branches to merge, target branch, merge ord
|
|
|
51
51
|
{{QUALITY_GATE_STEPS}}
|
|
52
52
|
4. **Record mulch learnings** -- capture merge resolution insights (conflict patterns, resolution strategies, branch integration issues):
|
|
53
53
|
```bash
|
|
54
|
-
ml record <domain> --type <convention|pattern|failure> --description "..."
|
|
54
|
+
ml record <domain> --type <convention|pattern|failure> --description "..." \
|
|
55
|
+
--classification <foundational|tactical|observational>
|
|
55
56
|
```
|
|
56
57
|
This is required for non-trivial merges (Tier 2+). Merge resolution patterns are highly reusable knowledge for future mergers. Skip for clean Tier 1 merges with no conflicts.
|
|
57
58
|
5. Send a `result` mail to your parent with: tier used, conflicts resolved (if any), test status.
|
|
@@ -92,7 +93,7 @@ You are a branch integration specialist. When workers complete their tasks on se
|
|
|
92
93
|
|
|
93
94
|
### Expertise
|
|
94
95
|
- **Load context:** `ml prime [domain]` to understand the code being merged
|
|
95
|
-
- **Record patterns:** `ml record <domain>` to capture merge resolution insights
|
|
96
|
+
- **Record patterns:** `ml record <domain> --classification <foundational|tactical|observational>` to capture merge resolution insights. Use `foundational` for stable merge conventions, `tactical` for resolution strategies, `observational` for one-off conflict patterns.
|
|
96
97
|
|
|
97
98
|
## workflow
|
|
98
99
|
|
package/agents/monitor.md
CHANGED
|
@@ -72,7 +72,7 @@ You are the watchdog's brain. While Tier 0 (mechanical daemon) checks tmux/pid l
|
|
|
72
72
|
|
|
73
73
|
### Expertise
|
|
74
74
|
- **Load context:** `ml prime [domain]` to understand project patterns
|
|
75
|
-
- **Record insights:** `ml record <domain> --type <type> --description "<insight>"` to capture monitoring patterns, failure signatures, and recovery strategies
|
|
75
|
+
- **Record insights:** `ml record <domain> --type <type> --classification <foundational|tactical|observational> --description "<insight>"` to capture monitoring patterns, failure signatures, and recovery strategies. Use `foundational` for stable monitoring conventions, `tactical` for incident-specific patterns, `observational` for unverified anomaly observations.
|
|
76
76
|
- **Search knowledge:** `ml search <query>` to find relevant past incidents
|
|
77
77
|
|
|
78
78
|
## workflow
|
package/agents/reviewer.md
CHANGED
|
@@ -91,6 +91,7 @@ You are a validation specialist. Given code to review, you check it for correctn
|
|
|
91
91
|
### Expertise
|
|
92
92
|
- **Load conventions:** `ml prime [domain]` to understand project standards
|
|
93
93
|
- **Surface insights:** Include notable findings (convention violations, code quality patterns) in your result mail so your parent has full context.
|
|
94
|
+
- **Classification guidance for parents:** When including notable findings in your result mail, indicate suggested classification: `foundational` (confirmed stable convention), `tactical` (task-specific pattern), or `observational` (unverified finding). This helps your parent record accurately.
|
|
94
95
|
|
|
95
96
|
## workflow
|
|
96
97
|
|
package/agents/scout.md
CHANGED
|
@@ -93,6 +93,7 @@ You perform reconnaissance. Given a research question, exploration target, or an
|
|
|
93
93
|
### Expertise
|
|
94
94
|
- **Query expertise:** `ml prime [domain]` to load relevant context
|
|
95
95
|
- **Surface insights:** Include notable findings (patterns, conventions, gotchas) in your result mail so your parent has full context for spec writing.
|
|
96
|
+
- **Classification guidance for parents:** When including notable findings in your result mail, indicate suggested classification: `foundational` (confirmed stable convention), `tactical` (task-specific pattern), or `observational` (unverified finding). This helps your parent record accurately.
|
|
96
97
|
|
|
97
98
|
## workflow
|
|
98
99
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@os-eco/overstory-cli",
|
|
3
|
-
"version": "0.7.
|
|
4
|
-
"description": "Multi-agent orchestration for
|
|
3
|
+
"version": "0.7.4",
|
|
4
|
+
"description": "Multi-agent orchestration for AI coding agents — spawn workers in git worktrees via tmux, coordinate through SQLite mail, merge with tiered conflict resolution. Pluggable runtime adapters for Claude Code, Pi, and more.",
|
|
5
5
|
"author": "Jaymin West",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "module",
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdtemp
|
|
2
|
+
import { mkdtemp } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
6
7
|
import {
|
|
7
8
|
buildBashFileGuardScript,
|
|
8
9
|
buildBashPathBoundaryScript,
|
|
@@ -26,7 +27,7 @@ describe("deployHooks", () => {
|
|
|
26
27
|
});
|
|
27
28
|
|
|
28
29
|
afterEach(async () => {
|
|
29
|
-
await
|
|
30
|
+
await cleanupTempDir(tempDir);
|
|
30
31
|
});
|
|
31
32
|
|
|
32
33
|
test("creates .claude/settings.local.json in worktree directory", async () => {
|
|
@@ -1482,7 +1483,7 @@ describe("structural enforcement integration", () => {
|
|
|
1482
1483
|
});
|
|
1483
1484
|
|
|
1484
1485
|
afterEach(async () => {
|
|
1485
|
-
await
|
|
1486
|
+
await cleanupTempDir(tempDir);
|
|
1486
1487
|
});
|
|
1487
1488
|
|
|
1488
1489
|
test("non-implementation agents have more guards than implementation agents", async () => {
|
|
@@ -2055,7 +2056,7 @@ describe("bash path boundary integration", () => {
|
|
|
2055
2056
|
});
|
|
2056
2057
|
|
|
2057
2058
|
afterEach(async () => {
|
|
2058
|
-
await
|
|
2059
|
+
await cleanupTempDir(tempDir);
|
|
2059
2060
|
});
|
|
2060
2061
|
|
|
2061
2062
|
test("builder gets Bash path boundary guard in deployed hooks", async () => {
|
|
@@ -2249,7 +2250,7 @@ describe("PATH prefix in deployed hooks", () => {
|
|
|
2249
2250
|
});
|
|
2250
2251
|
|
|
2251
2252
|
afterEach(async () => {
|
|
2252
|
-
await
|
|
2253
|
+
await cleanupTempDir(tempDir);
|
|
2253
2254
|
});
|
|
2254
2255
|
|
|
2255
2256
|
test("SessionStart hook commands include PATH prefix", async () => {
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdir, mkdtemp
|
|
2
|
+
import { mkdir, mkdtemp } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
6
7
|
import type { AgentIdentity } from "../types.ts";
|
|
7
8
|
import { createIdentity, loadIdentity, updateIdentity } from "./identity.ts";
|
|
8
9
|
|
|
@@ -14,7 +15,7 @@ describe("identity", () => {
|
|
|
14
15
|
});
|
|
15
16
|
|
|
16
17
|
afterEach(async () => {
|
|
17
|
-
await
|
|
18
|
+
await cleanupTempDir(tempDir);
|
|
18
19
|
});
|
|
19
20
|
|
|
20
21
|
describe("createIdentity", () => {
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdir, mkdtemp
|
|
2
|
+
import { mkdir, mkdtemp } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
6
7
|
import type { AgentManifest, OverstoryConfig } from "../types.ts";
|
|
7
8
|
import { createManifestLoader, resolveModel, resolveProviderEnv } from "./manifest.ts";
|
|
8
9
|
|
|
@@ -41,7 +42,7 @@ describe("createManifestLoader", () => {
|
|
|
41
42
|
});
|
|
42
43
|
|
|
43
44
|
afterEach(async () => {
|
|
44
|
-
await
|
|
45
|
+
await cleanupTempDir(tempDir);
|
|
45
46
|
});
|
|
46
47
|
|
|
47
48
|
/** Write the manifest JSON and create matching .md files. */
|
|
@@ -806,7 +807,7 @@ describe("manifest validation accepts arbitrary model strings", () => {
|
|
|
806
807
|
});
|
|
807
808
|
|
|
808
809
|
afterEach(async () => {
|
|
809
|
-
await
|
|
810
|
+
await cleanupTempDir(tempDir);
|
|
810
811
|
});
|
|
811
812
|
|
|
812
813
|
test("accepts provider-prefixed model string", async () => {
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
-
import { mkdir, mkdtemp
|
|
2
|
+
import { mkdir, mkdtemp } from "node:fs/promises";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import { join } from "node:path";
|
|
5
5
|
import { AgentError } from "../errors.ts";
|
|
6
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
6
7
|
import type { OverlayConfig, QualityGate } from "../types.ts";
|
|
7
8
|
import {
|
|
8
9
|
formatQualityGatesBash,
|
|
@@ -584,7 +585,7 @@ describe("writeOverlay", () => {
|
|
|
584
585
|
});
|
|
585
586
|
|
|
586
587
|
afterEach(async () => {
|
|
587
|
-
await
|
|
588
|
+
await cleanupTempDir(tempDir);
|
|
588
589
|
});
|
|
589
590
|
|
|
590
591
|
test("creates .claude/CLAUDE.md in worktree directory", async () => {
|
|
@@ -3,10 +3,11 @@
|
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
5
|
import { afterEach, beforeEach, describe, expect, it, mock } from "bun:test";
|
|
6
|
-
import { mkdtemp
|
|
6
|
+
import { mkdtemp } from "node:fs/promises";
|
|
7
7
|
import { tmpdir } from "node:os";
|
|
8
8
|
import { join } from "node:path";
|
|
9
9
|
import { createSessionStore } from "../sessions/store.ts";
|
|
10
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
10
11
|
import type { AgentSession } from "../types.ts";
|
|
11
12
|
import { agentsCommand, discoverAgents, extractFileScope } from "./agents.ts";
|
|
12
13
|
|
|
@@ -65,7 +66,7 @@ Some expertise here.
|
|
|
65
66
|
});
|
|
66
67
|
|
|
67
68
|
afterEach(async () => {
|
|
68
|
-
await
|
|
69
|
+
await cleanupTempDir(tempDir);
|
|
69
70
|
});
|
|
70
71
|
});
|
|
71
72
|
|
|
@@ -235,7 +236,7 @@ describe("discoverAgents", () => {
|
|
|
235
236
|
});
|
|
236
237
|
|
|
237
238
|
afterEach(async () => {
|
|
238
|
-
await
|
|
239
|
+
await cleanupTempDir(tempDir);
|
|
239
240
|
});
|
|
240
241
|
});
|
|
241
242
|
|
|
@@ -322,6 +323,6 @@ logging:
|
|
|
322
323
|
afterEach(async () => {
|
|
323
324
|
process.stdout.write = originalStdoutWrite;
|
|
324
325
|
process.chdir(originalCwd);
|
|
325
|
-
await
|
|
326
|
+
await cleanupTempDir(tempDir);
|
|
326
327
|
});
|
|
327
328
|
});
|
package/src/commands/agents.ts
CHANGED
|
@@ -29,9 +29,15 @@ export interface DiscoveredAgent {
|
|
|
29
29
|
lastActivity: string;
|
|
30
30
|
}
|
|
31
31
|
|
|
32
|
+
/** Known instruction file paths, tried in order until one exists. */
|
|
33
|
+
const KNOWN_INSTRUCTION_PATHS = [
|
|
34
|
+
join(".claude", "CLAUDE.md"), // Claude Code, Pi
|
|
35
|
+
"AGENTS.md", // Codex (future)
|
|
36
|
+
];
|
|
37
|
+
|
|
32
38
|
/**
|
|
33
|
-
* Extract file scope from an agent's overlay
|
|
34
|
-
* Returns empty array if overlay
|
|
39
|
+
* Extract file scope from an agent's overlay instruction file.
|
|
40
|
+
* Returns empty array if no overlay exists, has no file scope restrictions,
|
|
35
41
|
* or can't be read.
|
|
36
42
|
*
|
|
37
43
|
* @param worktreePath - Absolute path to the agent's worktree
|
|
@@ -39,15 +45,19 @@ export interface DiscoveredAgent {
|
|
|
39
45
|
*/
|
|
40
46
|
export async function extractFileScope(worktreePath: string): Promise<string[]> {
|
|
41
47
|
try {
|
|
42
|
-
|
|
43
|
-
const
|
|
44
|
-
|
|
45
|
-
|
|
48
|
+
let content: string | null = null;
|
|
49
|
+
for (const relPath of KNOWN_INSTRUCTION_PATHS) {
|
|
50
|
+
const overlayPath = join(worktreePath, relPath);
|
|
51
|
+
const overlayFile = Bun.file(overlayPath);
|
|
52
|
+
if (await overlayFile.exists()) {
|
|
53
|
+
content = await overlayFile.text();
|
|
54
|
+
break;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
if (content === null) {
|
|
46
58
|
return [];
|
|
47
59
|
}
|
|
48
60
|
|
|
49
|
-
const content = await overlayFile.text();
|
|
50
|
-
|
|
51
61
|
// Find the section between "## File Scope (exclusive ownership)" and "## Expertise"
|
|
52
62
|
const startMarker = "## File Scope (exclusive ownership)";
|
|
53
63
|
const endMarker = "## Expertise";
|
|
@@ -12,8 +12,8 @@ import {
|
|
|
12
12
|
} from "./completions.ts";
|
|
13
13
|
|
|
14
14
|
describe("COMMANDS array", () => {
|
|
15
|
-
it("should have exactly
|
|
16
|
-
expect(COMMANDS).toHaveLength(
|
|
15
|
+
it("should have exactly 33 commands", () => {
|
|
16
|
+
expect(COMMANDS).toHaveLength(33);
|
|
17
17
|
});
|
|
18
18
|
|
|
19
19
|
it("should include all expected command names", () => {
|
|
@@ -48,6 +48,9 @@ describe("COMMANDS array", () => {
|
|
|
48
48
|
expect(names).toContain("feed");
|
|
49
49
|
expect(names).toContain("logs");
|
|
50
50
|
expect(names).toContain("stop");
|
|
51
|
+
expect(names).toContain("ecosystem");
|
|
52
|
+
expect(names).toContain("upgrade");
|
|
53
|
+
expect(names).toContain("completions");
|
|
51
54
|
});
|
|
52
55
|
});
|
|
53
56
|
|
|
@@ -59,7 +62,7 @@ describe("generateBash", () => {
|
|
|
59
62
|
expect(script).toContain("_init_completion");
|
|
60
63
|
});
|
|
61
64
|
|
|
62
|
-
it("should include all
|
|
65
|
+
it("should include all 33 command names", () => {
|
|
63
66
|
const script = generateBash();
|
|
64
67
|
for (const cmd of COMMANDS) {
|
|
65
68
|
expect(script).toContain(cmd.name);
|
|
@@ -93,7 +96,7 @@ describe("generateZsh", () => {
|
|
|
93
96
|
expect(script).toContain("_arguments");
|
|
94
97
|
});
|
|
95
98
|
|
|
96
|
-
it("should include all
|
|
99
|
+
it("should include all 33 command names", () => {
|
|
97
100
|
const script = generateZsh();
|
|
98
101
|
for (const cmd of COMMANDS) {
|
|
99
102
|
expect(script).toContain(cmd.name);
|
|
@@ -123,7 +126,7 @@ describe("generateFish", () => {
|
|
|
123
126
|
expect(script).toContain("__fish_use_subcommand");
|
|
124
127
|
});
|
|
125
128
|
|
|
126
|
-
it("should include all
|
|
129
|
+
it("should include all 33 command names", () => {
|
|
127
130
|
const script = generateFish();
|
|
128
131
|
for (const cmd of COMMANDS) {
|
|
129
132
|
expect(script).toContain(cmd.name);
|
|
@@ -58,6 +58,9 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
58
58
|
desc: "Initialize .overstory/ in current project",
|
|
59
59
|
flags: [
|
|
60
60
|
{ name: "--force", desc: "Overwrite existing configuration" },
|
|
61
|
+
{ name: "--yes", desc: "Accept all defaults without prompting" },
|
|
62
|
+
{ name: "-y", desc: "Alias for --yes" },
|
|
63
|
+
{ name: "--name", desc: "Project name", takesValue: true },
|
|
61
64
|
{ name: "--help", desc: "Show help" },
|
|
62
65
|
],
|
|
63
66
|
},
|
|
@@ -76,7 +79,13 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
76
79
|
{ name: "--files", desc: "Exclusive file scope (comma-separated)", takesValue: true },
|
|
77
80
|
{ name: "--parent", desc: "Parent agent name", takesValue: true },
|
|
78
81
|
{ name: "--depth", desc: "Current hierarchy depth", takesValue: true },
|
|
82
|
+
{ name: "--skip-scout", desc: "Skip scout phase for lead agents" },
|
|
83
|
+
{ name: "--skip-review", desc: "Skip review phase for lead agents" },
|
|
84
|
+
{ name: "--skip-task-check", desc: "Skip task existence validation" },
|
|
79
85
|
{ name: "--force-hierarchy", desc: "Bypass hierarchy validation" },
|
|
86
|
+
{ name: "--max-agents", desc: "Max children per lead", takesValue: true },
|
|
87
|
+
{ name: "--dispatch-max-agents", desc: "Per-lead max agents ceiling", takesValue: true },
|
|
88
|
+
{ name: "--runtime", desc: "Runtime adapter", takesValue: true },
|
|
80
89
|
{ name: "--json", desc: "JSON output" },
|
|
81
90
|
{ name: "--help", desc: "Show help" },
|
|
82
91
|
],
|
|
@@ -107,6 +116,7 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
107
116
|
{ name: "--json", desc: "JSON output" },
|
|
108
117
|
{ name: "--verbose", desc: "Extra per-agent detail" },
|
|
109
118
|
{ name: "--agent", desc: "Filter by agent", takesValue: true },
|
|
119
|
+
{ name: "--all", desc: "Show sessions from all runs" },
|
|
110
120
|
{ name: "--watch", desc: "Watch mode" },
|
|
111
121
|
{ name: "--interval", desc: "Poll interval in ms", takesValue: true },
|
|
112
122
|
{ name: "--help", desc: "Show help" },
|
|
@@ -138,6 +148,7 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
138
148
|
flags: [
|
|
139
149
|
{ name: "--branch", desc: "Specific branch to merge", takesValue: true },
|
|
140
150
|
{ name: "--all", desc: "All completed branches" },
|
|
151
|
+
{ name: "--into", desc: "Target branch to merge into", takesValue: true },
|
|
141
152
|
{ name: "--dry-run", desc: "Check for conflicts only" },
|
|
142
153
|
{ name: "--json", desc: "JSON output" },
|
|
143
154
|
{ name: "--help", desc: "Show help" },
|
|
@@ -190,8 +201,10 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
190
201
|
"merge",
|
|
191
202
|
"logs",
|
|
192
203
|
"version",
|
|
204
|
+
"ecosystem",
|
|
193
205
|
],
|
|
194
206
|
},
|
|
207
|
+
{ name: "--fix", desc: "Attempt to auto-fix issues" },
|
|
195
208
|
{ name: "--help", desc: "Show help" },
|
|
196
209
|
],
|
|
197
210
|
},
|
|
@@ -606,6 +619,29 @@ export const COMMANDS: readonly CommandDef[] = [
|
|
|
606
619
|
},
|
|
607
620
|
],
|
|
608
621
|
},
|
|
622
|
+
{
|
|
623
|
+
name: "ecosystem",
|
|
624
|
+
desc: "Show a summary dashboard of all installed os-eco tools",
|
|
625
|
+
flags: [
|
|
626
|
+
{ name: "--json", desc: "JSON output" },
|
|
627
|
+
{ name: "--help", desc: "Show help" },
|
|
628
|
+
],
|
|
629
|
+
},
|
|
630
|
+
{
|
|
631
|
+
name: "upgrade",
|
|
632
|
+
desc: "Upgrade overstory to the latest version",
|
|
633
|
+
flags: [
|
|
634
|
+
{ name: "--check", desc: "Compare current vs latest without installing" },
|
|
635
|
+
{ name: "--all", desc: "Upgrade all os-eco tools" },
|
|
636
|
+
{ name: "--json", desc: "JSON output" },
|
|
637
|
+
{ name: "--help", desc: "Show help" },
|
|
638
|
+
],
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
name: "completions",
|
|
642
|
+
desc: "Generate shell completions",
|
|
643
|
+
flags: [{ name: "--help", desc: "Show help" }],
|
|
644
|
+
},
|
|
609
645
|
] as const;
|
|
610
646
|
|
|
611
647
|
export function generateBash(): string {
|
|
@@ -618,7 +654,7 @@ export function generateBash(): string {
|
|
|
618
654
|
" local cur prev words cword",
|
|
619
655
|
" _init_completion || return",
|
|
620
656
|
"",
|
|
621
|
-
" local commands='init sling prime stop status dashboard inspect merge nudge clean doctor log logs watch trace errors feed replay costs metrics spec coordinator supervisor hooks monitor mail group worktree run'",
|
|
657
|
+
" local commands='agents init sling prime stop status dashboard inspect merge nudge clean doctor log logs watch trace errors feed replay costs metrics spec coordinator supervisor hooks monitor mail group worktree run ecosystem upgrade completions'",
|
|
622
658
|
"",
|
|
623
659
|
" # Top-level completion",
|
|
624
660
|
" if [[ $cword -eq 1 ]]; then",
|
|
@@ -9,12 +9,13 @@
|
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
11
|
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
12
|
-
import { mkdir, mkdtemp
|
|
12
|
+
import { mkdir, mkdtemp } from "node:fs/promises";
|
|
13
13
|
import { tmpdir } from "node:os";
|
|
14
14
|
import { join } from "node:path";
|
|
15
15
|
import { ValidationError } from "../errors.ts";
|
|
16
16
|
import { createMetricsStore } from "../metrics/store.ts";
|
|
17
17
|
import { createSessionStore } from "../sessions/store.ts";
|
|
18
|
+
import { cleanupTempDir } from "../test-helpers.ts";
|
|
18
19
|
import type { SessionMetrics } from "../types.ts";
|
|
19
20
|
import { costsCommand } from "./costs.ts";
|
|
20
21
|
|
|
@@ -72,7 +73,7 @@ describe("costsCommand", () => {
|
|
|
72
73
|
afterEach(async () => {
|
|
73
74
|
process.stdout.write = originalWrite;
|
|
74
75
|
process.chdir(originalCwd);
|
|
75
|
-
await
|
|
76
|
+
await cleanupTempDir(tempDir);
|
|
76
77
|
});
|
|
77
78
|
|
|
78
79
|
function output(): string {
|
|
@@ -1052,7 +1053,7 @@ describe("costsCommand", () => {
|
|
|
1052
1053
|
|
|
1053
1054
|
afterEach(async () => {
|
|
1054
1055
|
process.env.HOME = originalHome;
|
|
1055
|
-
await
|
|
1056
|
+
await cleanupTempDir(tempHome);
|
|
1056
1057
|
});
|
|
1057
1058
|
|
|
1058
1059
|
test("--self shows orchestrator cost when transcript exists", async () => {
|