@agentplate/cli 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,30 @@ All notable changes to Agentplate are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/), and the project aims to adhere to
5
5
  [Semantic Versioning](https://semver.org/).
6
6
 
7
+ ## [1.2.0] — 2026-06-02
8
+
9
+ ### Added
10
+
11
+ - **Auto-merge** (`merge.autoMerge`: `off` / `on-gates-pass` / `on-complete`,
12
+ default `off`). When enabled, a completed worker's branch lands on the canonical
13
+ branch automatically (queue + lock + tiered resolve), reporting `merged` /
14
+ `merge_failed` mail. Configured in `ap setup`.
15
+ - **`agentplate turn <agent>`** — runs the next turn for an idle agent, **resuming**
16
+ the runtime session (warm start) instead of cold-starting. The shared `driveTurn`
17
+ core backs both the first turn (`sling`) and follow-ups.
18
+ - **Per-capability model tiering** — `providers[id].models` lets a faster/cheaper
19
+ model drive read-only roles (scout, reviewer) while the strong model handles the
20
+ rest. Optional prompt in `ap setup`.
21
+ - **Quality-gates prompt in `ap setup`** — detected from `package.json` scripts.
22
+
23
+ ### Changed
24
+
25
+ - **Quality gates run concurrently** (was sequential); the outcome is reused for
26
+ both skill distillation and auto-merge.
27
+ - **Orchestration limits are now enforced.** `agents.maxConcurrent`,
28
+ `maxAgentsPerLead`, and `maxDepth` were validated but ignored; `sling` now
29
+ refuses a spawn that would exceed them with a typed `CapacityError`.
30
+
7
31
  ## [1.1.0] — 2026-06-02
8
32
 
9
33
  ### Added
@@ -90,6 +90,12 @@ Discipline when dispatching:
90
90
  if integration itself is non-trivial.
91
91
  - Re-dispatch on failure: if a lead escalates something it cannot finish, decide
92
92
  whether to re-scope and re-dispatch, or escalate to the operator.
93
+ - Auto-merge (when `merge.autoMerge` is enabled in config): a worker's branch
94
+ lands on the canonical branch automatically when it finishes, and you receive a
95
+ `merged` or `merge_failed` mail per landing. You do **not** run `agentplate merge`
96
+ for those slices — just act on `merge_failed` (re-dispatch a merger or resolve),
97
+ and still own cross-slice integration. When auto-merge is `off` (the default),
98
+ you drive merges yourself as below.
93
99
 
94
100
  ## Communication Protocol
95
101
 
package/agents/lead.md CHANGED
@@ -50,7 +50,9 @@ Discipline when delegating:
50
50
  merge — unless your overlay says `--skip-review`.
51
51
  - **Respect the budget.** Do not exceed your `max-agents` ceiling or the
52
52
  configured depth limit. You are an internal node; your children are leaves and
53
- cannot spawn further.
53
+ cannot spawn further. These limits are now **enforced**: a `sling` that would
54
+ exceed `agents.maxConcurrent`, `agents.maxAgentsPerLead`, or `agents.maxDepth`
55
+ is refused with a capacity error — wait for a child to finish, then retry.
54
56
 
55
57
  ## Coordinating Children
56
58
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentplate/cli",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
@@ -0,0 +1,55 @@
1
+ /**
2
+ * Tests for assertCapacity — the spawn-time orchestration limit gate.
3
+ */
4
+
5
+ import { describe, expect, test } from "bun:test";
6
+ import { CapacityError } from "../errors.ts";
7
+ import { assertCapacity, type CapacityCheck } from "./capacity.ts";
8
+
9
+ const base: CapacityCheck = {
10
+ depth: 1,
11
+ active: 0,
12
+ parentAgent: "lead-1",
13
+ parentActiveChildren: 0,
14
+ limits: { maxDepth: 2, maxConcurrent: 10, maxAgentsPerLead: 5 },
15
+ };
16
+
17
+ describe("assertCapacity", () => {
18
+ test("passes when under every limit", () => {
19
+ expect(() => assertCapacity(base)).not.toThrow();
20
+ });
21
+
22
+ test("refuses when depth exceeds maxDepth", () => {
23
+ expect(() => assertCapacity({ ...base, depth: 3 })).toThrow(CapacityError);
24
+ });
25
+
26
+ test("allows depth exactly at maxDepth", () => {
27
+ expect(() => assertCapacity({ ...base, depth: 2 })).not.toThrow();
28
+ });
29
+
30
+ test("refuses when active is at maxConcurrent", () => {
31
+ expect(() => assertCapacity({ ...base, active: 10 })).toThrow(CapacityError);
32
+ // one below the cap is still allowed
33
+ expect(() => assertCapacity({ ...base, active: 9 })).not.toThrow();
34
+ });
35
+
36
+ test("refuses when the parent is at maxAgentsPerLead", () => {
37
+ expect(() => assertCapacity({ ...base, parentActiveChildren: 5 })).toThrow(CapacityError);
38
+ });
39
+
40
+ test("ignores the per-lead cap for a top-level spawn (no parent)", () => {
41
+ expect(() =>
42
+ assertCapacity({ ...base, parentAgent: null, parentActiveChildren: 99 }),
43
+ ).not.toThrow();
44
+ });
45
+
46
+ test("the error is a CapacityError with the CAPACITY_EXCEEDED code", () => {
47
+ try {
48
+ assertCapacity({ ...base, active: 10 });
49
+ throw new Error("expected to throw");
50
+ } catch (e) {
51
+ expect(e).toBeInstanceOf(CapacityError);
52
+ expect((e as CapacityError).code).toBe("CAPACITY_EXCEEDED");
53
+ }
54
+ });
55
+ });
@@ -0,0 +1,50 @@
1
+ /**
2
+ * Orchestration capacity limits — enforced at spawn time.
3
+ *
4
+ * `agents.maxConcurrent`, `agents.maxAgentsPerLead`, and `agents.maxDepth` are
5
+ * configured (and validated) but were previously decorative — nothing consulted
6
+ * them. {@link assertCapacity} is the single gate `sling` calls before creating a
7
+ * worktree, so a runaway fan-out is refused with a typed {@link CapacityError}
8
+ * rather than spawning unbounded agents.
9
+ *
10
+ * Pure (counts are passed in) so it is unit-tested without a session store.
11
+ */
12
+
13
+ import { CapacityError } from "../errors.ts";
14
+
15
+ export interface CapacityLimits {
16
+ maxDepth: number;
17
+ maxConcurrent: number;
18
+ maxAgentsPerLead: number;
19
+ }
20
+
21
+ export interface CapacityCheck {
22
+ /** Depth the new agent would occupy. */
23
+ depth: number;
24
+ /** Active agents in the run right now (excluding the one being spawned). */
25
+ active: number;
26
+ /** Spawning parent, or null for a top-level spawn. */
27
+ parentAgent: string | null;
28
+ /** Active children the parent already has (ignored when parentAgent is null). */
29
+ parentActiveChildren: number;
30
+ limits: CapacityLimits;
31
+ }
32
+
33
+ /** Throw {@link CapacityError} if spawning would exceed any configured limit. */
34
+ export function assertCapacity(c: CapacityCheck): void {
35
+ if (c.depth > c.limits.maxDepth) {
36
+ throw new CapacityError(
37
+ `Cannot spawn at depth ${c.depth}: exceeds agents.maxDepth (${c.limits.maxDepth}).`,
38
+ );
39
+ }
40
+ if (c.active >= c.limits.maxConcurrent) {
41
+ throw new CapacityError(
42
+ `Cannot spawn: ${c.active} agent(s) already active, at agents.maxConcurrent (${c.limits.maxConcurrent}). Wait for some to finish.`,
43
+ );
44
+ }
45
+ if (c.parentAgent && c.parentActiveChildren >= c.limits.maxAgentsPerLead) {
46
+ throw new CapacityError(
47
+ `Cannot spawn: ${c.parentAgent} already has ${c.parentActiveChildren} active child(ren), at agents.maxAgentsPerLead (${c.limits.maxAgentsPerLead}).`,
48
+ );
49
+ }
50
+ }
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Tests for driveTurn — the shared turn core. Real stores + a real (mock) runtime
3
+ * subprocess. A SpyRuntime records the DirectSpawnOpts so we can prove the warm
4
+ * start: a follow-up turn threads `resumeSessionId` through to the runtime.
5
+ */
6
+
7
+ import { afterEach, beforeEach, describe, expect, test } from "bun:test";
8
+ import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
9
+ import { tmpdir } from "node:os";
10
+ import { join } from "node:path";
11
+ import { DEFAULT_CONFIG } from "../config.ts";
12
+ import { createEventStore, type EventStore } from "../events/store.ts";
13
+ import { createMailClient, type MailClient } from "../mail/client.ts";
14
+ import { eventsDbPath, sessionsDbPath } from "../paths.ts";
15
+ import { MockRuntime } from "../runtimes/mock.ts";
16
+ import type { DirectSpawnOpts } from "../runtimes/types.ts";
17
+ import { createSessionStore, type SessionStore } from "../sessions/store.ts";
18
+ import type { AgentplateConfig, AgentSession } from "../types.ts";
19
+ import { driveTurn } from "./drive.ts";
20
+
21
+ /** Mock runtime that records the spawn opts (so we can assert the resume id). */
22
+ class SpyRuntime extends MockRuntime {
23
+ lastOpts: DirectSpawnOpts | null = null;
24
+ override buildDirectSpawn(opts: DirectSpawnOpts): string[] {
25
+ this.lastOpts = opts;
26
+ return super.buildDirectSpawn(opts);
27
+ }
28
+ }
29
+
30
+ let root: string;
31
+ let worktree: string;
32
+ let store: SessionStore;
33
+ let events: EventStore;
34
+ let mail: MailClient;
35
+
36
+ function cfg(): AgentplateConfig {
37
+ const c = structuredClone(DEFAULT_CONFIG);
38
+ c.project.root = root;
39
+ c.project.canonicalBranch = "main";
40
+ return c;
41
+ }
42
+
43
+ function makeSession(over: Partial<AgentSession> = {}): AgentSession {
44
+ const now = new Date().toISOString();
45
+ return {
46
+ id: `session-${crypto.randomUUID()}`,
47
+ agentName: "builder-1",
48
+ capability: "builder",
49
+ taskId: "task-1",
50
+ runId: "run-1",
51
+ worktreePath: worktree,
52
+ branchName: "agentplate/builder-1",
53
+ state: "idle",
54
+ parentAgent: "lead-1",
55
+ depth: 1,
56
+ pid: null,
57
+ runtimeSessionId: null,
58
+ startedAt: now,
59
+ lastActivity: now,
60
+ ...over,
61
+ };
62
+ }
63
+
64
+ beforeEach(() => {
65
+ root = mkdtempSync(join(tmpdir(), "agentplate-drive-"));
66
+ mkdirSync(join(root, ".agentplate"), { recursive: true });
67
+ worktree = mkdtempSync(join(tmpdir(), "agentplate-drive-wt-"));
68
+ store = createSessionStore(sessionsDbPath(root));
69
+ events = createEventStore(eventsDbPath(root));
70
+ mail = createMailClient(root);
71
+ process.env.AGENTPLATE_MOCK_CMD = "true"; // no-op turn, exits 0
72
+ });
73
+
74
+ afterEach(() => {
75
+ store.close();
76
+ events.close();
77
+ mail.close();
78
+ rmSync(root, { recursive: true, force: true });
79
+ rmSync(worktree, { recursive: true, force: true });
80
+ process.env.AGENTPLATE_MOCK_CMD = undefined;
81
+ });
82
+
83
+ describe("driveTurn — warm start", () => {
84
+ test("threads resumeSessionId through to the runtime spawn (follow-up turn)", async () => {
85
+ const session = makeSession();
86
+ store.upsertSession(session);
87
+ const runtime = new SpyRuntime();
88
+
89
+ const out = await driveTurn({
90
+ root,
91
+ config: cfg(),
92
+ runtime,
93
+ store,
94
+ events,
95
+ mail,
96
+ session,
97
+ model: { model: "m", env: {} },
98
+ prompt: "continue",
99
+ resumeSessionId: "sess-abc",
100
+ });
101
+
102
+ expect(runtime.lastOpts?.resumeSessionId).toBe("sess-abc"); // warm start
103
+ expect(out.finalState).toBe("idle"); // no terminal mail emitted → paused
104
+ expect(store.getSession(session.id)?.state).toBe("idle");
105
+ });
106
+
107
+ test("omits resume on the first turn (cold start)", async () => {
108
+ const session = makeSession();
109
+ store.upsertSession(session);
110
+ const runtime = new SpyRuntime();
111
+ await driveTurn({
112
+ root,
113
+ config: cfg(),
114
+ runtime,
115
+ store,
116
+ events,
117
+ mail,
118
+ session,
119
+ model: { model: "m", env: {} },
120
+ prompt: "begin",
121
+ });
122
+ expect(runtime.lastOpts?.resumeSessionId).toBeUndefined();
123
+ });
124
+ });
125
+
126
+ describe("driveTurn — state transition", () => {
127
+ test("becomes 'completed' when the agent has emitted its terminal mail", async () => {
128
+ const session = makeSession();
129
+ store.upsertSession(session);
130
+ // The agent's own worker_done mail marks the task complete.
131
+ mail.send({
132
+ from: session.agentName,
133
+ to: "lead-1",
134
+ subject: "done",
135
+ body: "",
136
+ type: "worker_done",
137
+ });
138
+
139
+ const config = cfg();
140
+ config.skills.enabled = false; // keep the completed path free of distillation work
141
+ const out = await driveTurn({
142
+ root,
143
+ config,
144
+ runtime: new SpyRuntime(),
145
+ store,
146
+ events,
147
+ mail,
148
+ session,
149
+ model: { model: "m", env: {} },
150
+ prompt: "finish",
151
+ });
152
+ expect(out.finalState).toBe("completed");
153
+ expect(store.getSession(session.id)?.state).toBe("completed");
154
+ });
155
+ });
@@ -0,0 +1,200 @@
1
+ /**
2
+ * driveTurn — run ONE headless turn for an agent and handle its aftermath.
3
+ *
4
+ * This is the shared core behind both the first turn (`sling`, which opens a
5
+ * fresh runtime session) and every follow-up turn (`agentplate turn`, which
6
+ * **resumes** the session via `runtimeSessionId` so turns 2+ do not pay the
7
+ * runtime's cold-start cost — the "warm start"). Keeping it in one place means
8
+ * the post-turn handling (state transition, the self-improving skills loop, and
9
+ * auto-merge) is identical no matter which turn it is.
10
+ *
11
+ * Spawn-per-turn is preserved: each call spawns a fresh runtime subprocess
12
+ * (resumed when `resumeSessionId` is given) — there is no long-lived agent.
13
+ */
14
+
15
+ import type { EventStore } from "../events/store.ts";
16
+ import { runQualityGates } from "../insights/quality-gates.ts";
17
+ import type { MailClient } from "../mail/client.ts";
18
+ import { createMailStore } from "../mail/store.ts";
19
+ import { maybeAutoMerge } from "../merge/auto.ts";
20
+ import { mailDbPath } from "../paths.ts";
21
+ import type { AgentRuntime } from "../runtimes/types.ts";
22
+ import type { SessionStore } from "../sessions/store.ts";
23
+ import { runSkillFeedbackAndDistill } from "../skills/lifecycle.ts";
24
+ import type {
25
+ AgentplateConfig,
26
+ AgentSession,
27
+ Capability,
28
+ OutcomeStatus,
29
+ ResolvedModel,
30
+ SessionState,
31
+ } from "../types.ts";
32
+ import { updateIdentity } from "./identity.ts";
33
+ import { runTurn } from "./turn-runner.ts";
34
+
35
+ /** Terminal mail types whose presence marks a capability's work complete. */
36
+ export function terminalTypesFor(capability: Capability): string[] {
37
+ return capability === "merger" ? ["merged", "merge_failed"] : ["worker_done"];
38
+ }
39
+
40
+ /**
41
+ * Resolve a turn's end state from the agent's own mail + exit code:
42
+ * - emitted terminal mail → `completed`
43
+ * - clean exit, no terminal mail → `idle` (paused, awaiting its next turn)
44
+ * - non-zero exit → `failed`
45
+ */
46
+ export function resolveFinalState(
47
+ root: string,
48
+ name: string,
49
+ capability: Capability,
50
+ exitCode: number,
51
+ ): SessionState {
52
+ const terminal = terminalTypesFor(capability);
53
+ const store = createMailStore(mailDbPath(root));
54
+ try {
55
+ const sent = store.list({ from: name });
56
+ if (sent.some((m) => terminal.includes(m.type))) return "completed";
57
+ } finally {
58
+ store.close();
59
+ }
60
+ return exitCode === 0 ? "idle" : "failed";
61
+ }
62
+
63
+ export interface DriveTurnCtx {
64
+ root: string;
65
+ config: AgentplateConfig;
66
+ runtime: AgentRuntime;
67
+ store: SessionStore;
68
+ events: EventStore;
69
+ mail: MailClient;
70
+ /** The session this turn runs for (existing or just-created). */
71
+ session: AgentSession;
72
+ /** Resolved concrete model + provider env for this capability. */
73
+ model: ResolvedModel;
74
+ /** The user-turn text (dispatch / injected mail / nudge). */
75
+ prompt: string;
76
+ /** Prior runtime session id to resume — omit on the first turn (warm start). */
77
+ resumeSessionId?: string;
78
+ }
79
+
80
+ export interface DriveTurnResult {
81
+ finalState: SessionState;
82
+ exitCode: number;
83
+ gateStatus: OutcomeStatus | null;
84
+ }
85
+
86
+ /** Run one turn for `ctx.session` and apply the post-turn lifecycle. */
87
+ export async function driveTurn(ctx: DriveTurnCtx): Promise<DriveTurnResult> {
88
+ const { root, config, runtime, store, events, mail, session, model } = ctx;
89
+ const {
90
+ id: sessionId,
91
+ agentName: name,
92
+ capability,
93
+ taskId,
94
+ runId,
95
+ worktreePath,
96
+ branchName,
97
+ } = session;
98
+
99
+ store.updateSessionState(sessionId, "working");
100
+
101
+ let sawError = false;
102
+ const turn = await runTurn({
103
+ runtime,
104
+ worktreePath,
105
+ model: model.model,
106
+ prompt: ctx.prompt,
107
+ env: model.env,
108
+ resumeSessionId: ctx.resumeSessionId,
109
+ onEvent: (event) => {
110
+ if (event.error || event.type === "error") sawError = true;
111
+ // Prefer the error message (so a failed agent's reason is visible in the
112
+ // feed/logs), else the token/cost JSON the Costs page aggregates.
113
+ const detail = event.error
114
+ ? event.error
115
+ : event.usage
116
+ ? JSON.stringify({ tokens: event.usage.tokens, cost: event.usage.costUsd })
117
+ : null;
118
+ events.record({ agentName: name, runId, type: event.type, tool: event.tool ?? null, detail });
119
+ // Bump last_activity on every streamed event so a long but active turn
120
+ // keeps itself fresh and is never reaped as "idle".
121
+ store.touch(sessionId);
122
+ },
123
+ });
124
+ if (turn.runtimeSessionId) store.setRuntimeSessionId(sessionId, turn.runtimeSessionId);
125
+
126
+ // A non-zero exit with no error event means the runtime failed via stderr;
127
+ // record it so the failure reason is visible instead of a blank "failed".
128
+ if (turn.exitCode !== 0 && !sawError) {
129
+ const reason = turn.stderr.trim();
130
+ if (reason) {
131
+ events.record({
132
+ agentName: name,
133
+ runId,
134
+ type: "error",
135
+ tool: null,
136
+ detail: reason.length > 1000 ? `${reason.slice(0, 1000)}…` : reason,
137
+ });
138
+ }
139
+ }
140
+
141
+ const finalState = resolveFinalState(root, name, capability, turn.exitCode);
142
+ store.updateSessionState(sessionId, finalState);
143
+ store.touch(sessionId);
144
+ updateIdentity(root, name, {
145
+ taskId,
146
+ summary: `${capability} ran a turn for ${taskId} → ${finalState}`,
147
+ });
148
+
149
+ // Quality gates run once when EITHER the self-improving loop or auto-merge
150
+ // needs them; the outcome feeds both. Best-effort — never fails the turn.
151
+ const autoMergeWants =
152
+ config.merge.autoMerge !== "off" && capability !== "scout" && capability !== "merger";
153
+ let gateStatus: OutcomeStatus | null = null;
154
+ if (finalState === "completed" && (config.skills.enabled || autoMergeWants)) {
155
+ try {
156
+ const gateOutcome = await runQualityGates(config.project.qualityGates ?? [], worktreePath);
157
+ gateStatus = gateOutcome?.status ?? null;
158
+ if (config.skills.enabled) {
159
+ await runSkillFeedbackAndDistill({
160
+ root,
161
+ agentName: name,
162
+ capability,
163
+ taskId,
164
+ worktreePath,
165
+ baseRef: config.project.canonicalBranch,
166
+ runtime,
167
+ outcomeStatus: gateStatus,
168
+ skills: config.skills,
169
+ model: model.model,
170
+ });
171
+ }
172
+ } catch {
173
+ // Skill loop is advisory; a failure here must not fail the turn.
174
+ }
175
+ }
176
+
177
+ // Auto-merge the branch onto the canonical branch when configured (off by
178
+ // default). Best-effort — a landing must never fail the turn.
179
+ if (finalState === "completed") {
180
+ try {
181
+ await maybeAutoMerge({
182
+ root,
183
+ branchName,
184
+ targetBranch: config.project.canonicalBranch,
185
+ capability,
186
+ agentName: name,
187
+ taskId,
188
+ parent: session.parentAgent,
189
+ mode: config.merge.autoMerge,
190
+ aiResolveEnabled: config.merge.aiResolveEnabled,
191
+ gateStatus,
192
+ mail,
193
+ });
194
+ } catch {
195
+ // Auto-merge is best-effort; never fail the turn over a landing.
196
+ }
197
+ }
198
+
199
+ return { finalState, exitCode: turn.exitCode, gateStatus };
200
+ }