@agentplate/cli 1.1.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/agents/coordinator.md +6 -0
- package/agents/lead.md +3 -1
- package/package.json +1 -1
- package/src/agents/capacity.test.ts +55 -0
- package/src/agents/capacity.ts +50 -0
- package/src/agents/drive.test.ts +155 -0
- package/src/agents/drive.ts +270 -0
- package/src/agents/turn-runner.test.ts +67 -0
- package/src/agents/turn-runner.ts +18 -1
- package/src/commands/sling.ts +46 -117
- package/src/commands/turn.test.ts +101 -0
- package/src/commands/turn.ts +88 -0
- package/src/commands/watch.test.ts +136 -0
- package/src/commands/watch.ts +151 -0
- package/src/config.test.ts +32 -0
- package/src/config.ts +16 -1
- package/src/errors.ts +11 -0
- package/src/index.ts +4 -0
- package/src/insights/quality-gates.test.ts +43 -0
- package/src/insights/quality-gates.ts +30 -31
- package/src/merge/auto.test.ts +157 -0
- package/src/merge/auto.ts +118 -0
- package/src/runtimes/registry.test.ts +16 -2
- package/src/runtimes/registry.ts +13 -0
- package/src/runtimes/resolve.test.ts +49 -0
- package/src/runtimes/resolve.ts +11 -7
- package/src/sessions/store.test.ts +13 -0
- package/src/sessions/store.ts +20 -0
- package/src/types.ts +30 -1
- package/src/version.ts +1 -1
- package/src/wizard/setup.test.ts +45 -0
- package/src/wizard/setup.ts +181 -2
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,52 @@ All notable changes to Agentplate are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/), and the project aims to adhere to
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [1.3.0] — 2026-06-02
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **`agentplate watch`** — the mail pump that makes warm-start automatic: it
|
|
12
|
+
advances every **idle** agent with unread mail to its next (resumed) turn,
|
|
13
|
+
driving eligible agents concurrently up to `agents.maxConcurrent`. Modes:
|
|
14
|
+
`--once`, `--until-idle`, or loop until Ctrl-C.
|
|
15
|
+
- **Hard per-turn timeout** (`agents.turnTimeoutMinutes`, 0 = off) — kills a turn
|
|
16
|
+
that runs past the cap even while still streaming (idle reaping only catches
|
|
17
|
+
inactivity).
|
|
18
|
+
- **Per-capability runtime** — `runtime.capabilities[capability]` now selects the
|
|
19
|
+
runtime adapter per role (previously defined but unused).
|
|
20
|
+
- **Speed shortcuts** — `agents.skipScout` / `skipReview` (surfaced as lead overlay
|
|
21
|
+
constraints) and `agents.skipGates` / `skipSkills` (honored on the turn path).
|
|
22
|
+
- **Wizard** — a gated "advanced limits" step (concurrency, turn-timeout, skips).
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
|
|
26
|
+
- The turn path is shared via `driveTurn` / `driveAgentTurn`, used by `sling`
|
|
27
|
+
(turn 1), `agentplate turn` (single follow-up), and `agentplate watch`.
|
|
28
|
+
|
|
29
|
+
## [1.2.0] — 2026-06-02
|
|
30
|
+
|
|
31
|
+
### Added
|
|
32
|
+
|
|
33
|
+
- **Auto-merge** (`merge.autoMerge`: `off` / `on-gates-pass` / `on-complete`,
|
|
34
|
+
default `off`). When enabled, a completed worker's branch lands on the canonical
|
|
35
|
+
branch automatically (queue + lock + tiered resolve), reporting `merged` /
|
|
36
|
+
`merge_failed` mail. Configured in `ap setup`.
|
|
37
|
+
- **`agentplate turn <agent>`** — runs the next turn for an idle agent, **resuming**
|
|
38
|
+
the runtime session (warm start) instead of cold-starting. The shared `driveTurn`
|
|
39
|
+
core backs both the first turn (`sling`) and follow-ups.
|
|
40
|
+
- **Per-capability model tiering** — `providers[id].models` lets a faster/cheaper
|
|
41
|
+
model drive read-only roles (scout, reviewer) while the strong model handles the
|
|
42
|
+
rest. Optional prompt in `ap setup`.
|
|
43
|
+
- **Quality-gates prompt in `ap setup`** — detected from `package.json` scripts.
|
|
44
|
+
|
|
45
|
+
### Changed
|
|
46
|
+
|
|
47
|
+
- **Quality gates run concurrently** (was sequential); the outcome is reused for
|
|
48
|
+
both skill distillation and auto-merge.
|
|
49
|
+
- **Orchestration limits are now enforced.** `agents.maxConcurrent`,
|
|
50
|
+
`maxAgentsPerLead`, and `maxDepth` were validated but ignored; `sling` now
|
|
51
|
+
refuses a spawn that would exceed them with a typed `CapacityError`.
|
|
52
|
+
|
|
7
53
|
## [1.1.0] — 2026-06-02
|
|
8
54
|
|
|
9
55
|
### Added
|
package/agents/coordinator.md
CHANGED
|
@@ -90,6 +90,12 @@ Discipline when dispatching:
|
|
|
90
90
|
if integration itself is non-trivial.
|
|
91
91
|
- Re-dispatch on failure: if a lead escalates something it cannot finish, decide
|
|
92
92
|
whether to re-scope and re-dispatch, or escalate to the operator.
|
|
93
|
+
- Auto-merge (when `merge.autoMerge` is enabled in config): a worker's branch
|
|
94
|
+
lands on the canonical branch automatically when it finishes, and you receive a
|
|
95
|
+
`merged` or `merge_failed` mail per landing. You do **not** run `agentplate merge`
|
|
96
|
+
for those slices — just act on `merge_failed` (re-dispatch a merger or resolve),
|
|
97
|
+
and still own cross-slice integration. When auto-merge is `off` (the default),
|
|
98
|
+
you drive merges yourself as below.
|
|
93
99
|
|
|
94
100
|
## Communication Protocol
|
|
95
101
|
|
package/agents/lead.md
CHANGED
|
@@ -50,7 +50,9 @@ Discipline when delegating:
|
|
|
50
50
|
merge — unless your overlay says `--skip-review`.
|
|
51
51
|
- **Respect the budget.** Do not exceed your `max-agents` ceiling or the
|
|
52
52
|
configured depth limit. You are an internal node; your children are leaves and
|
|
53
|
-
cannot spawn further.
|
|
53
|
+
cannot spawn further. These limits are now **enforced**: a `sling` that would
|
|
54
|
+
exceed `agents.maxConcurrent`, `agents.maxAgentsPerLead`, or `agents.maxDepth`
|
|
55
|
+
is refused with a capacity error — wait for a child to finish, then retry.
|
|
54
56
|
|
|
55
57
|
## Coordinating Children
|
|
56
58
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for assertCapacity — the spawn-time orchestration limit gate.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, test } from "bun:test";
|
|
6
|
+
import { CapacityError } from "../errors.ts";
|
|
7
|
+
import { assertCapacity, type CapacityCheck } from "./capacity.ts";
|
|
8
|
+
|
|
9
|
+
const base: CapacityCheck = {
|
|
10
|
+
depth: 1,
|
|
11
|
+
active: 0,
|
|
12
|
+
parentAgent: "lead-1",
|
|
13
|
+
parentActiveChildren: 0,
|
|
14
|
+
limits: { maxDepth: 2, maxConcurrent: 10, maxAgentsPerLead: 5 },
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
describe("assertCapacity", () => {
|
|
18
|
+
test("passes when under every limit", () => {
|
|
19
|
+
expect(() => assertCapacity(base)).not.toThrow();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("refuses when depth exceeds maxDepth", () => {
|
|
23
|
+
expect(() => assertCapacity({ ...base, depth: 3 })).toThrow(CapacityError);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test("allows depth exactly at maxDepth", () => {
|
|
27
|
+
expect(() => assertCapacity({ ...base, depth: 2 })).not.toThrow();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("refuses when active is at maxConcurrent", () => {
|
|
31
|
+
expect(() => assertCapacity({ ...base, active: 10 })).toThrow(CapacityError);
|
|
32
|
+
// one below the cap is still allowed
|
|
33
|
+
expect(() => assertCapacity({ ...base, active: 9 })).not.toThrow();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("refuses when the parent is at maxAgentsPerLead", () => {
|
|
37
|
+
expect(() => assertCapacity({ ...base, parentActiveChildren: 5 })).toThrow(CapacityError);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("ignores the per-lead cap for a top-level spawn (no parent)", () => {
|
|
41
|
+
expect(() =>
|
|
42
|
+
assertCapacity({ ...base, parentAgent: null, parentActiveChildren: 99 }),
|
|
43
|
+
).not.toThrow();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("the error is a CapacityError with the CAPACITY_EXCEEDED code", () => {
|
|
47
|
+
try {
|
|
48
|
+
assertCapacity({ ...base, active: 10 });
|
|
49
|
+
throw new Error("expected to throw");
|
|
50
|
+
} catch (e) {
|
|
51
|
+
expect(e).toBeInstanceOf(CapacityError);
|
|
52
|
+
expect((e as CapacityError).code).toBe("CAPACITY_EXCEEDED");
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
});
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestration capacity limits — enforced at spawn time.
|
|
3
|
+
*
|
|
4
|
+
* `agents.maxConcurrent`, `agents.maxAgentsPerLead`, and `agents.maxDepth` are
|
|
5
|
+
* configured (and validated) but were previously decorative — nothing consulted
|
|
6
|
+
* them. {@link assertCapacity} is the single gate `sling` calls before creating a
|
|
7
|
+
* worktree, so a runaway fan-out is refused with a typed {@link CapacityError}
|
|
8
|
+
* rather than spawning unbounded agents.
|
|
9
|
+
*
|
|
10
|
+
* Pure (counts are passed in) so it is unit-tested without a session store.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { CapacityError } from "../errors.ts";
|
|
14
|
+
|
|
15
|
+
export interface CapacityLimits {
|
|
16
|
+
maxDepth: number;
|
|
17
|
+
maxConcurrent: number;
|
|
18
|
+
maxAgentsPerLead: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface CapacityCheck {
|
|
22
|
+
/** Depth the new agent would occupy. */
|
|
23
|
+
depth: number;
|
|
24
|
+
/** Active agents in the run right now (excluding the one being spawned). */
|
|
25
|
+
active: number;
|
|
26
|
+
/** Spawning parent, or null for a top-level spawn. */
|
|
27
|
+
parentAgent: string | null;
|
|
28
|
+
/** Active children the parent already has (ignored when parentAgent is null). */
|
|
29
|
+
parentActiveChildren: number;
|
|
30
|
+
limits: CapacityLimits;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Throw {@link CapacityError} if spawning would exceed any configured limit. */
|
|
34
|
+
export function assertCapacity(c: CapacityCheck): void {
|
|
35
|
+
if (c.depth > c.limits.maxDepth) {
|
|
36
|
+
throw new CapacityError(
|
|
37
|
+
`Cannot spawn at depth ${c.depth}: exceeds agents.maxDepth (${c.limits.maxDepth}).`,
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
if (c.active >= c.limits.maxConcurrent) {
|
|
41
|
+
throw new CapacityError(
|
|
42
|
+
`Cannot spawn: ${c.active} agent(s) already active, at agents.maxConcurrent (${c.limits.maxConcurrent}). Wait for some to finish.`,
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
if (c.parentAgent && c.parentActiveChildren >= c.limits.maxAgentsPerLead) {
|
|
46
|
+
throw new CapacityError(
|
|
47
|
+
`Cannot spawn: ${c.parentAgent} already has ${c.parentActiveChildren} active child(ren), at agents.maxAgentsPerLead (${c.limits.maxAgentsPerLead}).`,
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for driveTurn — the shared turn core. Real stores + a real (mock) runtime
|
|
3
|
+
* subprocess. A SpyRuntime records the DirectSpawnOpts so we can prove the warm
|
|
4
|
+
* start: a follow-up turn threads `resumeSessionId` through to the runtime.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
8
|
+
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
|
9
|
+
import { tmpdir } from "node:os";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import { DEFAULT_CONFIG } from "../config.ts";
|
|
12
|
+
import { createEventStore, type EventStore } from "../events/store.ts";
|
|
13
|
+
import { createMailClient, type MailClient } from "../mail/client.ts";
|
|
14
|
+
import { eventsDbPath, sessionsDbPath } from "../paths.ts";
|
|
15
|
+
import { MockRuntime } from "../runtimes/mock.ts";
|
|
16
|
+
import type { DirectSpawnOpts } from "../runtimes/types.ts";
|
|
17
|
+
import { createSessionStore, type SessionStore } from "../sessions/store.ts";
|
|
18
|
+
import type { AgentplateConfig, AgentSession } from "../types.ts";
|
|
19
|
+
import { driveTurn } from "./drive.ts";
|
|
20
|
+
|
|
21
|
+
/** Mock runtime that records the spawn opts (so we can assert the resume id). */
|
|
22
|
+
class SpyRuntime extends MockRuntime {
|
|
23
|
+
lastOpts: DirectSpawnOpts | null = null;
|
|
24
|
+
override buildDirectSpawn(opts: DirectSpawnOpts): string[] {
|
|
25
|
+
this.lastOpts = opts;
|
|
26
|
+
return super.buildDirectSpawn(opts);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let root: string;
|
|
31
|
+
let worktree: string;
|
|
32
|
+
let store: SessionStore;
|
|
33
|
+
let events: EventStore;
|
|
34
|
+
let mail: MailClient;
|
|
35
|
+
|
|
36
|
+
function cfg(): AgentplateConfig {
|
|
37
|
+
const c = structuredClone(DEFAULT_CONFIG);
|
|
38
|
+
c.project.root = root;
|
|
39
|
+
c.project.canonicalBranch = "main";
|
|
40
|
+
return c;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function makeSession(over: Partial<AgentSession> = {}): AgentSession {
|
|
44
|
+
const now = new Date().toISOString();
|
|
45
|
+
return {
|
|
46
|
+
id: `session-${crypto.randomUUID()}`,
|
|
47
|
+
agentName: "builder-1",
|
|
48
|
+
capability: "builder",
|
|
49
|
+
taskId: "task-1",
|
|
50
|
+
runId: "run-1",
|
|
51
|
+
worktreePath: worktree,
|
|
52
|
+
branchName: "agentplate/builder-1",
|
|
53
|
+
state: "idle",
|
|
54
|
+
parentAgent: "lead-1",
|
|
55
|
+
depth: 1,
|
|
56
|
+
pid: null,
|
|
57
|
+
runtimeSessionId: null,
|
|
58
|
+
startedAt: now,
|
|
59
|
+
lastActivity: now,
|
|
60
|
+
...over,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
root = mkdtempSync(join(tmpdir(), "agentplate-drive-"));
|
|
66
|
+
mkdirSync(join(root, ".agentplate"), { recursive: true });
|
|
67
|
+
worktree = mkdtempSync(join(tmpdir(), "agentplate-drive-wt-"));
|
|
68
|
+
store = createSessionStore(sessionsDbPath(root));
|
|
69
|
+
events = createEventStore(eventsDbPath(root));
|
|
70
|
+
mail = createMailClient(root);
|
|
71
|
+
process.env.AGENTPLATE_MOCK_CMD = "true"; // no-op turn, exits 0
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
afterEach(() => {
|
|
75
|
+
store.close();
|
|
76
|
+
events.close();
|
|
77
|
+
mail.close();
|
|
78
|
+
rmSync(root, { recursive: true, force: true });
|
|
79
|
+
rmSync(worktree, { recursive: true, force: true });
|
|
80
|
+
process.env.AGENTPLATE_MOCK_CMD = undefined;
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
describe("driveTurn — warm start", () => {
|
|
84
|
+
test("threads resumeSessionId through to the runtime spawn (follow-up turn)", async () => {
|
|
85
|
+
const session = makeSession();
|
|
86
|
+
store.upsertSession(session);
|
|
87
|
+
const runtime = new SpyRuntime();
|
|
88
|
+
|
|
89
|
+
const out = await driveTurn({
|
|
90
|
+
root,
|
|
91
|
+
config: cfg(),
|
|
92
|
+
runtime,
|
|
93
|
+
store,
|
|
94
|
+
events,
|
|
95
|
+
mail,
|
|
96
|
+
session,
|
|
97
|
+
model: { model: "m", env: {} },
|
|
98
|
+
prompt: "continue",
|
|
99
|
+
resumeSessionId: "sess-abc",
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
expect(runtime.lastOpts?.resumeSessionId).toBe("sess-abc"); // warm start
|
|
103
|
+
expect(out.finalState).toBe("idle"); // no terminal mail emitted → paused
|
|
104
|
+
expect(store.getSession(session.id)?.state).toBe("idle");
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("omits resume on the first turn (cold start)", async () => {
|
|
108
|
+
const session = makeSession();
|
|
109
|
+
store.upsertSession(session);
|
|
110
|
+
const runtime = new SpyRuntime();
|
|
111
|
+
await driveTurn({
|
|
112
|
+
root,
|
|
113
|
+
config: cfg(),
|
|
114
|
+
runtime,
|
|
115
|
+
store,
|
|
116
|
+
events,
|
|
117
|
+
mail,
|
|
118
|
+
session,
|
|
119
|
+
model: { model: "m", env: {} },
|
|
120
|
+
prompt: "begin",
|
|
121
|
+
});
|
|
122
|
+
expect(runtime.lastOpts?.resumeSessionId).toBeUndefined();
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe("driveTurn — state transition", () => {
|
|
127
|
+
test("becomes 'completed' when the agent has emitted its terminal mail", async () => {
|
|
128
|
+
const session = makeSession();
|
|
129
|
+
store.upsertSession(session);
|
|
130
|
+
// The agent's own worker_done mail marks the task complete.
|
|
131
|
+
mail.send({
|
|
132
|
+
from: session.agentName,
|
|
133
|
+
to: "lead-1",
|
|
134
|
+
subject: "done",
|
|
135
|
+
body: "",
|
|
136
|
+
type: "worker_done",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
const config = cfg();
|
|
140
|
+
config.skills.enabled = false; // keep the completed path free of distillation work
|
|
141
|
+
const out = await driveTurn({
|
|
142
|
+
root,
|
|
143
|
+
config,
|
|
144
|
+
runtime: new SpyRuntime(),
|
|
145
|
+
store,
|
|
146
|
+
events,
|
|
147
|
+
mail,
|
|
148
|
+
session,
|
|
149
|
+
model: { model: "m", env: {} },
|
|
150
|
+
prompt: "finish",
|
|
151
|
+
});
|
|
152
|
+
expect(out.finalState).toBe("completed");
|
|
153
|
+
expect(store.getSession(session.id)?.state).toBe("completed");
|
|
154
|
+
});
|
|
155
|
+
});
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* driveTurn — run ONE headless turn for an agent and handle its aftermath.
|
|
3
|
+
*
|
|
4
|
+
* This is the shared core behind both the first turn (`sling`, which opens a
|
|
5
|
+
* fresh runtime session) and every follow-up turn (`agentplate turn`, which
|
|
6
|
+
* **resumes** the session via `runtimeSessionId` so turns 2+ do not pay the
|
|
7
|
+
* runtime's cold-start cost — the "warm start"). Keeping it in one place means
|
|
8
|
+
* the post-turn handling (state transition, the self-improving skills loop, and
|
|
9
|
+
* auto-merge) is identical no matter which turn it is.
|
|
10
|
+
*
|
|
11
|
+
* Spawn-per-turn is preserved: each call spawns a fresh runtime subprocess
|
|
12
|
+
* (resumed when `resumeSessionId` is given) — there is no long-lived agent.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { existsSync } from "node:fs";
|
|
16
|
+
import type { EventStore } from "../events/store.ts";
|
|
17
|
+
import { runQualityGates } from "../insights/quality-gates.ts";
|
|
18
|
+
import type { MailClient } from "../mail/client.ts";
|
|
19
|
+
import { createMailStore } from "../mail/store.ts";
|
|
20
|
+
import { maybeAutoMerge } from "../merge/auto.ts";
|
|
21
|
+
import { mailDbPath, manifestFilePath } from "../paths.ts";
|
|
22
|
+
import { getRuntime, runtimeNameForCapability } from "../runtimes/registry.ts";
|
|
23
|
+
import { resolveModel } from "../runtimes/resolve.ts";
|
|
24
|
+
import type { AgentRuntime } from "../runtimes/types.ts";
|
|
25
|
+
import type { SessionStore } from "../sessions/store.ts";
|
|
26
|
+
import { runSkillFeedbackAndDistill } from "../skills/lifecycle.ts";
|
|
27
|
+
import type {
|
|
28
|
+
AgentplateConfig,
|
|
29
|
+
AgentSession,
|
|
30
|
+
Capability,
|
|
31
|
+
OutcomeStatus,
|
|
32
|
+
ResolvedModel,
|
|
33
|
+
SessionState,
|
|
34
|
+
} from "../types.ts";
|
|
35
|
+
import { updateIdentity } from "./identity.ts";
|
|
36
|
+
import { buildDefaultManifest, getDefinition, loadManifest } from "./manifest.ts";
|
|
37
|
+
import { runTurn } from "./turn-runner.ts";
|
|
38
|
+
|
|
39
|
+
/** Terminal mail types whose presence marks a capability's work complete. */
|
|
40
|
+
export function terminalTypesFor(capability: Capability): string[] {
|
|
41
|
+
return capability === "merger" ? ["merged", "merge_failed"] : ["worker_done"];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Resolve a turn's end state from the agent's own mail + exit code:
|
|
46
|
+
* - emitted terminal mail → `completed`
|
|
47
|
+
* - clean exit, no terminal mail → `idle` (paused, awaiting its next turn)
|
|
48
|
+
* - non-zero exit → `failed`
|
|
49
|
+
*/
|
|
50
|
+
export function resolveFinalState(
|
|
51
|
+
root: string,
|
|
52
|
+
name: string,
|
|
53
|
+
capability: Capability,
|
|
54
|
+
exitCode: number,
|
|
55
|
+
): SessionState {
|
|
56
|
+
const terminal = terminalTypesFor(capability);
|
|
57
|
+
const store = createMailStore(mailDbPath(root));
|
|
58
|
+
try {
|
|
59
|
+
const sent = store.list({ from: name });
|
|
60
|
+
if (sent.some((m) => terminal.includes(m.type))) return "completed";
|
|
61
|
+
} finally {
|
|
62
|
+
store.close();
|
|
63
|
+
}
|
|
64
|
+
return exitCode === 0 ? "idle" : "failed";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export interface DriveTurnCtx {
|
|
68
|
+
root: string;
|
|
69
|
+
config: AgentplateConfig;
|
|
70
|
+
runtime: AgentRuntime;
|
|
71
|
+
store: SessionStore;
|
|
72
|
+
events: EventStore;
|
|
73
|
+
mail: MailClient;
|
|
74
|
+
/** The session this turn runs for (existing or just-created). */
|
|
75
|
+
session: AgentSession;
|
|
76
|
+
/** Resolved concrete model + provider env for this capability. */
|
|
77
|
+
model: ResolvedModel;
|
|
78
|
+
/** The user-turn text (dispatch / injected mail / nudge). */
|
|
79
|
+
prompt: string;
|
|
80
|
+
/** Prior runtime session id to resume — omit on the first turn (warm start). */
|
|
81
|
+
resumeSessionId?: string;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface DriveTurnResult {
|
|
85
|
+
finalState: SessionState;
|
|
86
|
+
exitCode: number;
|
|
87
|
+
gateStatus: OutcomeStatus | null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** Run one turn for `ctx.session` and apply the post-turn lifecycle. */
|
|
91
|
+
export async function driveTurn(ctx: DriveTurnCtx): Promise<DriveTurnResult> {
|
|
92
|
+
const { root, config, runtime, store, events, mail, session, model } = ctx;
|
|
93
|
+
const {
|
|
94
|
+
id: sessionId,
|
|
95
|
+
agentName: name,
|
|
96
|
+
capability,
|
|
97
|
+
taskId,
|
|
98
|
+
runId,
|
|
99
|
+
worktreePath,
|
|
100
|
+
branchName,
|
|
101
|
+
} = session;
|
|
102
|
+
|
|
103
|
+
store.updateSessionState(sessionId, "working");
|
|
104
|
+
|
|
105
|
+
let sawError = false;
|
|
106
|
+
const turn = await runTurn({
|
|
107
|
+
runtime,
|
|
108
|
+
worktreePath,
|
|
109
|
+
model: model.model,
|
|
110
|
+
prompt: ctx.prompt,
|
|
111
|
+
env: model.env,
|
|
112
|
+
resumeSessionId: ctx.resumeSessionId,
|
|
113
|
+
timeoutMs:
|
|
114
|
+
config.agents.turnTimeoutMinutes > 0 ? config.agents.turnTimeoutMinutes * 60_000 : undefined,
|
|
115
|
+
onEvent: (event) => {
|
|
116
|
+
if (event.error || event.type === "error") sawError = true;
|
|
117
|
+
// Prefer the error message (so a failed agent's reason is visible in the
|
|
118
|
+
// feed/logs), else the token/cost JSON the Costs page aggregates.
|
|
119
|
+
const detail = event.error
|
|
120
|
+
? event.error
|
|
121
|
+
: event.usage
|
|
122
|
+
? JSON.stringify({ tokens: event.usage.tokens, cost: event.usage.costUsd })
|
|
123
|
+
: null;
|
|
124
|
+
events.record({ agentName: name, runId, type: event.type, tool: event.tool ?? null, detail });
|
|
125
|
+
// Bump last_activity on every streamed event so a long but active turn
|
|
126
|
+
// keeps itself fresh and is never reaped as "idle".
|
|
127
|
+
store.touch(sessionId);
|
|
128
|
+
},
|
|
129
|
+
});
|
|
130
|
+
if (turn.runtimeSessionId) store.setRuntimeSessionId(sessionId, turn.runtimeSessionId);
|
|
131
|
+
|
|
132
|
+
// Record a clear reason when the wall-clock cap killed the turn.
|
|
133
|
+
if (turn.timedOut) {
|
|
134
|
+
events.record({
|
|
135
|
+
agentName: name,
|
|
136
|
+
runId,
|
|
137
|
+
type: "error",
|
|
138
|
+
tool: null,
|
|
139
|
+
detail: `Turn killed: exceeded agents.turnTimeoutMinutes (${config.agents.turnTimeoutMinutes}m).`,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// A non-zero exit with no error event means the runtime failed via stderr;
|
|
144
|
+
// record it so the failure reason is visible instead of a blank "failed".
|
|
145
|
+
if (turn.exitCode !== 0 && !sawError) {
|
|
146
|
+
const reason = turn.stderr.trim();
|
|
147
|
+
if (reason) {
|
|
148
|
+
events.record({
|
|
149
|
+
agentName: name,
|
|
150
|
+
runId,
|
|
151
|
+
type: "error",
|
|
152
|
+
tool: null,
|
|
153
|
+
detail: reason.length > 1000 ? `${reason.slice(0, 1000)}…` : reason,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
const finalState = resolveFinalState(root, name, capability, turn.exitCode);
|
|
159
|
+
store.updateSessionState(sessionId, finalState);
|
|
160
|
+
store.touch(sessionId);
|
|
161
|
+
updateIdentity(root, name, {
|
|
162
|
+
taskId,
|
|
163
|
+
summary: `${capability} ran a turn for ${taskId} → ${finalState}`,
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// Quality gates run once when EITHER the self-improving loop or auto-merge
|
|
167
|
+
// needs them (and gates aren't skipped); the outcome feeds both. Best-effort.
|
|
168
|
+
const runSkills = config.skills.enabled && !config.agents.skipSkills;
|
|
169
|
+
const autoMergeWants =
|
|
170
|
+
config.merge.autoMerge !== "off" && capability !== "scout" && capability !== "merger";
|
|
171
|
+
const wantGates = !config.agents.skipGates && (runSkills || autoMergeWants);
|
|
172
|
+
let gateStatus: OutcomeStatus | null = null;
|
|
173
|
+
if (finalState === "completed" && (wantGates || runSkills)) {
|
|
174
|
+
try {
|
|
175
|
+
if (wantGates) {
|
|
176
|
+
const gateOutcome = await runQualityGates(config.project.qualityGates ?? [], worktreePath);
|
|
177
|
+
gateStatus = gateOutcome?.status ?? null;
|
|
178
|
+
}
|
|
179
|
+
if (runSkills) {
|
|
180
|
+
await runSkillFeedbackAndDistill({
|
|
181
|
+
root,
|
|
182
|
+
agentName: name,
|
|
183
|
+
capability,
|
|
184
|
+
taskId,
|
|
185
|
+
worktreePath,
|
|
186
|
+
baseRef: config.project.canonicalBranch,
|
|
187
|
+
runtime,
|
|
188
|
+
outcomeStatus: gateStatus,
|
|
189
|
+
skills: config.skills,
|
|
190
|
+
model: model.model,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
} catch {
|
|
194
|
+
// Skill loop is advisory; a failure here must not fail the turn.
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Auto-merge the branch onto the canonical branch when configured (off by
|
|
199
|
+
// default). Best-effort — a landing must never fail the turn.
|
|
200
|
+
if (finalState === "completed") {
|
|
201
|
+
try {
|
|
202
|
+
await maybeAutoMerge({
|
|
203
|
+
root,
|
|
204
|
+
branchName,
|
|
205
|
+
targetBranch: config.project.canonicalBranch,
|
|
206
|
+
capability,
|
|
207
|
+
agentName: name,
|
|
208
|
+
taskId,
|
|
209
|
+
parent: session.parentAgent,
|
|
210
|
+
mode: config.merge.autoMerge,
|
|
211
|
+
aiResolveEnabled: config.merge.aiResolveEnabled,
|
|
212
|
+
gateStatus,
|
|
213
|
+
mail,
|
|
214
|
+
});
|
|
215
|
+
} catch {
|
|
216
|
+
// Auto-merge is best-effort; never fail the turn over a landing.
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return { finalState, exitCode: turn.exitCode, gateStatus };
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export interface DriveAgentTurnCtx {
|
|
224
|
+
root: string;
|
|
225
|
+
config: AgentplateConfig;
|
|
226
|
+
session: AgentSession;
|
|
227
|
+
store: SessionStore;
|
|
228
|
+
events: EventStore;
|
|
229
|
+
mail: MailClient;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Run the next (resumed) turn for an existing session: resolve its runtime + model
|
|
234
|
+
* + manifest def, inject its unread mail as the prompt, and {@link driveTurn} with
|
|
235
|
+
* the stored `runtimeSessionId` (warm start). Shared by `agentplate turn` (single)
|
|
236
|
+
* and `agentplate watch` (the mail pump). Assumes the caller has already decided
|
|
237
|
+
* the session is drivable.
|
|
238
|
+
*/
|
|
239
|
+
export async function driveAgentTurn(ctx: DriveAgentTurnCtx): Promise<DriveTurnResult> {
|
|
240
|
+
const { root, config, session } = ctx;
|
|
241
|
+
const manifestPath = manifestFilePath(root);
|
|
242
|
+
const manifest = existsSync(manifestPath) ? loadManifest(manifestPath) : buildDefaultManifest();
|
|
243
|
+
const def = getDefinition(manifest, session.capability);
|
|
244
|
+
const runtime = getRuntime(
|
|
245
|
+
runtimeNameForCapability(config.runtime, session.capability),
|
|
246
|
+
config.runtime.default,
|
|
247
|
+
);
|
|
248
|
+
const model = resolveModel(config, root, def.model, session.capability);
|
|
249
|
+
|
|
250
|
+
// The turn's user text is the agent's unread mail (a child's reply / operator
|
|
251
|
+
// direction); fall back to a continue nudge. checkInject marks it read.
|
|
252
|
+
const injected = ctx.mail.checkInject(session.agentName);
|
|
253
|
+
const prompt =
|
|
254
|
+
injected.trim().length > 0
|
|
255
|
+
? injected
|
|
256
|
+
: "Continue your task. If it is complete, send your terminal mail.";
|
|
257
|
+
|
|
258
|
+
return driveTurn({
|
|
259
|
+
root,
|
|
260
|
+
config,
|
|
261
|
+
runtime,
|
|
262
|
+
store: ctx.store,
|
|
263
|
+
events: ctx.events,
|
|
264
|
+
mail: ctx.mail,
|
|
265
|
+
session,
|
|
266
|
+
model,
|
|
267
|
+
prompt,
|
|
268
|
+
resumeSessionId: session.runtimeSessionId ?? undefined,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for runTurn — focused on the hard wall-clock cap. Real subprocesses via
|
|
3
|
+
* the mock runtime (a `bash -lc` snippet), so we exercise true kill behavior.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
7
|
+
import { mkdtempSync, rmSync } from "node:fs";
|
|
8
|
+
import { tmpdir } from "node:os";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
import { MockRuntime } from "../runtimes/mock.ts";
|
|
11
|
+
import { runTurn } from "./turn-runner.ts";
|
|
12
|
+
|
|
13
|
+
let cwd: string;
|
|
14
|
+
const runtime = new MockRuntime();
|
|
15
|
+
|
|
16
|
+
beforeEach(() => {
|
|
17
|
+
cwd = mkdtempSync(join(tmpdir(), "agentplate-turnrunner-"));
|
|
18
|
+
});
|
|
19
|
+
afterEach(() => {
|
|
20
|
+
rmSync(cwd, { recursive: true, force: true });
|
|
21
|
+
process.env.AGENTPLATE_MOCK_CMD = undefined;
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
describe("runTurn — turn timeout", () => {
|
|
25
|
+
test("kills a turn that exceeds timeoutMs and flags timedOut", async () => {
|
|
26
|
+
process.env.AGENTPLATE_MOCK_CMD = "sleep 10"; // would hang well past the cap
|
|
27
|
+
const started = performance.now();
|
|
28
|
+
const result = await runTurn({
|
|
29
|
+
runtime,
|
|
30
|
+
worktreePath: cwd,
|
|
31
|
+
model: "m",
|
|
32
|
+
prompt: "",
|
|
33
|
+
timeoutMs: 200,
|
|
34
|
+
});
|
|
35
|
+
const elapsed = performance.now() - started;
|
|
36
|
+
|
|
37
|
+
expect(result.timedOut).toBe(true);
|
|
38
|
+
expect(result.exitCode).not.toBe(0); // killed → non-zero
|
|
39
|
+
expect(elapsed).toBeLessThan(3000); // resolved at the cap, not after 10s
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test("does not flag timedOut when the turn finishes within the cap", async () => {
|
|
43
|
+
process.env.AGENTPLATE_MOCK_CMD = "true"; // instant exit 0
|
|
44
|
+
const result = await runTurn({
|
|
45
|
+
runtime,
|
|
46
|
+
worktreePath: cwd,
|
|
47
|
+
model: "m",
|
|
48
|
+
prompt: "",
|
|
49
|
+
timeoutMs: 5000,
|
|
50
|
+
});
|
|
51
|
+
expect(result.timedOut).toBe(false);
|
|
52
|
+
expect(result.exitCode).toBe(0);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
test("no cap when timeoutMs is omitted/zero", async () => {
|
|
56
|
+
process.env.AGENTPLATE_MOCK_CMD = "true";
|
|
57
|
+
const result = await runTurn({
|
|
58
|
+
runtime,
|
|
59
|
+
worktreePath: cwd,
|
|
60
|
+
model: "m",
|
|
61
|
+
prompt: "",
|
|
62
|
+
timeoutMs: 0,
|
|
63
|
+
});
|
|
64
|
+
expect(result.timedOut).toBe(false);
|
|
65
|
+
expect(result.exitCode).toBe(0);
|
|
66
|
+
});
|
|
67
|
+
});
|