@agentplate/cli 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -0
- package/agents/coordinator.md +43 -13
- package/agents/lead.md +8 -1
- package/package.json +5 -5
- package/src/agents/capacity.test.ts +55 -0
- package/src/agents/capacity.ts +50 -0
- package/src/agents/drive.test.ts +155 -0
- package/src/agents/drive.ts +200 -0
- package/src/agents/system-prompt.ts +2 -1
- package/src/commands/sling.test.ts +84 -0
- package/src/commands/sling.ts +73 -117
- package/src/commands/spec.test.ts +142 -0
- package/src/commands/spec.ts +192 -0
- package/src/commands/turn.test.ts +101 -0
- package/src/commands/turn.ts +113 -0
- package/src/config.test.ts +18 -0
- package/src/config.ts +6 -1
- package/src/errors.ts +11 -0
- package/src/index.ts +4 -0
- package/src/insights/quality-gates.test.ts +43 -0
- package/src/insights/quality-gates.ts +30 -31
- package/src/merge/auto.test.ts +157 -0
- package/src/merge/auto.ts +118 -0
- package/src/paths.ts +2 -1
- package/src/runtimes/resolve.test.ts +49 -0
- package/src/runtimes/resolve.ts +11 -7
- package/src/sessions/store.test.ts +13 -0
- package/src/sessions/store.ts +20 -0
- package/src/types.ts +16 -1
- package/src/version.ts +1 -1
- package/src/wizard/setup.test.ts +45 -0
- package/src/wizard/setup.ts +119 -6
- package/ui/dist/assets/index-DAq3_wei.css +1 -0
- package/ui/dist/assets/index-DjRGeS6V.js +4227 -0
- package/ui/dist/index.html +2 -2
- package/ui/dist/assets/index-C7rXIMER.css +0 -1
- package/ui/dist/assets/index-W4kbr4by.js +0 -4526
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,56 @@ All notable changes to Agentplate are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/), and the project aims to adhere to
|
|
5
5
|
[Semantic Versioning](https://semver.org/).
|
|
6
6
|
|
|
7
|
+
## [1.2.0] — 2026-06-02
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **Auto-merge** (`merge.autoMerge`: `off` / `on-gates-pass` / `on-complete`,
|
|
12
|
+
default `off`). When enabled, a completed worker's branch lands on the canonical
|
|
13
|
+
branch automatically (queue + lock + tiered resolve), reporting `merged` /
|
|
14
|
+
`merge_failed` mail. Configured in `ap setup`.
|
|
15
|
+
- **`agentplate turn <agent>`** — runs the next turn for an idle agent, **resuming**
|
|
16
|
+
the runtime session (warm start) instead of cold-starting. The shared `driveTurn`
|
|
17
|
+
core backs both the first turn (`sling`) and follow-ups.
|
|
18
|
+
- **Per-capability model tiering** — `providers[id].models` lets a faster/cheaper
|
|
19
|
+
model drive read-only roles (scout, reviewer) while the strong model handles the
|
|
20
|
+
rest. Optional prompt in `ap setup`.
|
|
21
|
+
- **Quality-gates prompt in `ap setup`** — detected from `package.json` scripts.
|
|
22
|
+
|
|
23
|
+
### Changed
|
|
24
|
+
|
|
25
|
+
- **Quality gates run concurrently** (was sequential); the outcome is reused for
|
|
26
|
+
both skill distillation and auto-merge.
|
|
27
|
+
- **Orchestration limits are now enforced.** `agents.maxConcurrent`,
|
|
28
|
+
`maxAgentsPerLead`, and `maxDepth` were validated but ignored; `sling` now
|
|
29
|
+
refuses a spawn that would exceed them with a typed `CapacityError`.
|
|
30
|
+
|
|
31
|
+
## [1.1.0] — 2026-06-02
|
|
32
|
+
|
|
33
|
+
### Added
|
|
34
|
+
|
|
35
|
+
- **`agentplate spec` command** (`write` / `show` / `list` / `path`) — a
|
|
36
|
+
first-class, role-clean way to author the dispatch **contract** a lead or worker
|
|
37
|
+
launches with, written to `.agentplate/specs/<taskId>.md`.
|
|
38
|
+
|
|
39
|
+
### Fixed
|
|
40
|
+
|
|
41
|
+
- **Coordinator→lead contract race.** A slung agent reads its inbox once at launch
|
|
42
|
+
and starts immediately, so a brief mailed *after* `sling` arrived too late and the
|
|
43
|
+
agent worked from inherited (wrong) branch content. Contracts are now delivered
|
|
44
|
+
**in-band at launch**: `sling --spec` validates the spec exists and is non-empty
|
|
45
|
+
(failing loudly otherwise) and **inlines** its content into the agent's first
|
|
46
|
+
prompt. Coordinator/lead guidance now requires authoring the spec before slinging
|
|
47
|
+
and forbids delivering a contract by mail afterward.
|
|
48
|
+
|
|
49
|
+
### Changed
|
|
50
|
+
|
|
51
|
+
- Updated root dependencies to current majors (`@clack/prompts` 1.x, `commander`
|
|
52
|
+
15, `typescript` 6, `biome` 2.4). Dependabot now batches only minor/patch updates,
|
|
53
|
+
so majors arrive as individual reviewable PRs.
|
|
54
|
+
- Repository hardening for public contributions: Code of Conduct, CODEOWNERS,
|
|
55
|
+
Dependabot config, and branch protection on `main`.
|
|
56
|
+
|
|
7
57
|
## [1.0.0] — 2026-06-01
|
|
8
58
|
|
|
9
59
|
Initial public release of Agentplate as `@agentplate/cli`.
|
package/agents/coordinator.md
CHANGED
|
@@ -6,12 +6,16 @@ orchestrator for a run. You take the overall goal, break it into major slices,
|
|
|
6
6
|
run to completion. You sit at the top of the hierarchy (depth 0): you spawn
|
|
7
7
|
**leads**, and leads spawn the leaf workers.
|
|
8
8
|
|
|
9
|
-
**You are a dispatcher, not an implementer.** Never edit
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
**You are a dispatcher, not an implementer.** Never edit the codebase or run the
|
|
10
|
+
build/tests to "just fix" something — every change to the **work product** is made
|
|
11
|
+
by an agent you `agentplate sling`. The one artifact you *do* author is the **spec**
|
|
12
|
+
for each slice: a spec is a dispatch input (`.agentplate/specs/<taskId>.md`), not
|
|
13
|
+
the work product, so writing it with `agentplate spec write` is dispatching, not
|
|
14
|
+
implementing — do it freely. Always **fan out**: decompose the goal into
|
|
15
|
+
independent, parallel slices and dispatch a lead per slice; for anything beyond a
|
|
16
|
+
single trivial change, dispatch **at least two leads** so work proceeds in
|
|
17
|
+
parallel. If you find yourself about to touch a file *in the codebase*, sling an
|
|
18
|
+
agent instead.
|
|
15
19
|
|
|
16
20
|
The reusable HOW lives in this file. The per-run WHAT (the goal, the task set,
|
|
17
21
|
your agent name) comes from your overlay instruction file (`CLAUDE.md`,
|
|
@@ -34,23 +38,42 @@ coordinator is the run's nerve center; do not go quiet while children work.
|
|
|
34
38
|
|
|
35
39
|
## Dispatching Leads
|
|
36
40
|
|
|
37
|
-
|
|
38
|
-
the
|
|
41
|
+
For each slice, **author the spec first, then sling against it.** The spec is the
|
|
42
|
+
contract — goal, the exact base branch/content to work from, scope/files,
|
|
43
|
+
constraints, acceptance criteria. It must exist *before* you sling, because
|
|
44
|
+
`--spec` is loaded into the lead's task **at launch**:
|
|
39
45
|
|
|
40
46
|
```bash
|
|
47
|
+
# 1. Write the contract (here from a heredoc on stdin; --body/--file also work).
|
|
48
|
+
agentplate spec write <taskId> --stdin <<'SPEC'
|
|
49
|
+
# <taskId>
|
|
50
|
+
Goal: …
|
|
51
|
+
Base branch / starting content: …
|
|
52
|
+
Scope (files this slice owns): …
|
|
53
|
+
Constraints: …
|
|
54
|
+
Acceptance criteria: …
|
|
55
|
+
SPEC
|
|
56
|
+
|
|
57
|
+
# 2. Dispatch the lead against it, naming yourself as the parent.
|
|
41
58
|
agentplate sling <taskId> --capability lead --parent <self> \
|
|
42
59
|
--spec .agentplate/specs/<taskId>.md
|
|
43
60
|
```
|
|
44
61
|
|
|
62
|
+
**Never deliver a lead's contract by mail after slinging.** A slung lead reads its
|
|
63
|
+
inbox once at launch and then starts working; a brief mailed a few seconds later
|
|
64
|
+
arrives too late, and the lead proceeds from inherited (wrong) branch content. The
|
|
65
|
+
contract goes in the **spec, at launch** — mail to a lead is only for *mid-run*
|
|
66
|
+
direction once it is already working. (`sling` refuses a missing or empty `--spec`,
|
|
67
|
+
so a contract can never be silently dropped.)
|
|
68
|
+
|
|
45
69
|
Discipline when dispatching:
|
|
46
70
|
|
|
47
71
|
- **One owner per slice.** Each lead owns a coherent, independent slice with its
|
|
48
72
|
own area of the codebase, so leads' teams do not collide.
|
|
49
73
|
- **Disjoint slices.** Carve the work so two leads are not editing the same files
|
|
50
74
|
in parallel. Cross-slice integration is your concern, not theirs.
|
|
51
|
-
- **Specs first.**
|
|
52
|
-
|
|
53
|
-
against specs.
|
|
75
|
+
- **Specs first.** Every slice gets a spec authored with `agentplate spec write`
|
|
76
|
+
*before* its lead is slung; leads delegate against that spec. No spec, no sling.
|
|
54
77
|
- **Respect depth.** You spawn leads only. Leads spawn the leaf workers
|
|
55
78
|
(scout/builder/reviewer/merger). Do not spawn leaf workers directly except for
|
|
56
79
|
a quick read-only scout when you need to scope the run yourself.
|
|
@@ -67,14 +90,21 @@ Discipline when dispatching:
|
|
|
67
90
|
if integration itself is non-trivial.
|
|
68
91
|
- Re-dispatch on failure: if a lead escalates something it cannot finish, decide
|
|
69
92
|
whether to re-scope and re-dispatch, or escalate to the operator.
|
|
93
|
+
- Auto-merge (when `merge.autoMerge` is enabled in config): a worker's branch
|
|
94
|
+
lands on the canonical branch automatically when it finishes, and you receive a
|
|
95
|
+
`merged` or `merge_failed` mail per landing. You do **not** run `agentplate merge`
|
|
96
|
+
for those slices — just act on `merge_failed` (re-dispatch a merger or resolve),
|
|
97
|
+
and still own cross-slice integration. When auto-merge is `off` (the default),
|
|
98
|
+
you drive merges yourself as below.
|
|
70
99
|
|
|
71
100
|
## Communication Protocol
|
|
72
101
|
|
|
73
102
|
- **Up to the operator (or orchestrator):** `--type status` for run-level
|
|
74
103
|
progress; `--type escalation` for decisions that need a human or a higher-level
|
|
75
104
|
call; `--type result` for the final outcome of the run.
|
|
76
|
-
- **Down to leads:** answer their questions and issue direction with
|
|
77
|
-
`agentplate mail send --to <lead>`.
|
|
105
|
+
- **Down to leads:** answer their questions and issue *mid-run* direction with
|
|
106
|
+
`agentplate mail send --to <lead>`. Never use mail to deliver the initial
|
|
107
|
+
contract — that belongs in the spec the lead launched with.
|
|
78
108
|
|
|
79
109
|
## Completion Protocol
|
|
80
110
|
|
package/agents/lead.md
CHANGED
|
@@ -33,6 +33,11 @@ agentplate sling <taskId> --capability builder --parent <self> \
|
|
|
33
33
|
--files src/foo.ts,src/foo.test.ts --spec .agentplate/specs/<taskId>.md
|
|
34
34
|
```
|
|
35
35
|
|
|
36
|
+
Author each child's spec with `agentplate spec write` *before* you sling it — the
|
|
37
|
+
spec loads at launch, so it is the only race-free way to hand a child its contract.
|
|
38
|
+
Never mail a child its task after slinging (it has already read its inbox once and
|
|
39
|
+
started); mail is for mid-run direction only.
|
|
40
|
+
|
|
36
41
|
Capabilities you may spawn: `scout`, `builder`, `reviewer`, `merger`.
|
|
37
42
|
|
|
38
43
|
Discipline when delegating:
|
|
@@ -45,7 +50,9 @@ Discipline when delegating:
|
|
|
45
50
|
merge — unless your overlay says `--skip-review`.
|
|
46
51
|
- **Respect the budget.** Do not exceed your `max-agents` ceiling or the
|
|
47
52
|
configured depth limit. You are an internal node; your children are leaves and
|
|
48
|
-
cannot spawn further.
|
|
53
|
+
cannot spawn further. These limits are now **enforced**: a `sling` that would
|
|
54
|
+
exceed `agents.maxConcurrent`, `agents.maxAgentsPerLead`, or `agents.maxDepth`
|
|
55
|
+
is refused with a capacity error — wait for a child to finish, then retry.
|
|
49
56
|
|
|
50
57
|
## Coordinating Children
|
|
51
58
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentplate/cli",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -50,15 +50,15 @@
|
|
|
50
50
|
"prepack": "bun run build:ui"
|
|
51
51
|
},
|
|
52
52
|
"dependencies": {
|
|
53
|
-
"@clack/prompts": "^
|
|
53
|
+
"@clack/prompts": "^1.5.0",
|
|
54
54
|
"chalk": "^5.6.2",
|
|
55
|
-
"commander": "^
|
|
55
|
+
"commander": "^15.0.0",
|
|
56
56
|
"js-yaml": "^4.1.1"
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
|
-
"@biomejs/biome": "2.
|
|
59
|
+
"@biomejs/biome": "2.4.16",
|
|
60
60
|
"@types/bun": "latest",
|
|
61
61
|
"@types/js-yaml": "^4.0.9",
|
|
62
|
-
"typescript": "^
|
|
62
|
+
"typescript": "^6.0.3"
|
|
63
63
|
}
|
|
64
64
|
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for assertCapacity — the spawn-time orchestration limit gate.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, expect, test } from "bun:test";
|
|
6
|
+
import { CapacityError } from "../errors.ts";
|
|
7
|
+
import { assertCapacity, type CapacityCheck } from "./capacity.ts";
|
|
8
|
+
|
|
9
|
+
const base: CapacityCheck = {
|
|
10
|
+
depth: 1,
|
|
11
|
+
active: 0,
|
|
12
|
+
parentAgent: "lead-1",
|
|
13
|
+
parentActiveChildren: 0,
|
|
14
|
+
limits: { maxDepth: 2, maxConcurrent: 10, maxAgentsPerLead: 5 },
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
describe("assertCapacity", () => {
|
|
18
|
+
test("passes when under every limit", () => {
|
|
19
|
+
expect(() => assertCapacity(base)).not.toThrow();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("refuses when depth exceeds maxDepth", () => {
|
|
23
|
+
expect(() => assertCapacity({ ...base, depth: 3 })).toThrow(CapacityError);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
test("allows depth exactly at maxDepth", () => {
|
|
27
|
+
expect(() => assertCapacity({ ...base, depth: 2 })).not.toThrow();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("refuses when active is at maxConcurrent", () => {
|
|
31
|
+
expect(() => assertCapacity({ ...base, active: 10 })).toThrow(CapacityError);
|
|
32
|
+
// one below the cap is still allowed
|
|
33
|
+
expect(() => assertCapacity({ ...base, active: 9 })).not.toThrow();
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
test("refuses when the parent is at maxAgentsPerLead", () => {
|
|
37
|
+
expect(() => assertCapacity({ ...base, parentActiveChildren: 5 })).toThrow(CapacityError);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("ignores the per-lead cap for a top-level spawn (no parent)", () => {
|
|
41
|
+
expect(() =>
|
|
42
|
+
assertCapacity({ ...base, parentAgent: null, parentActiveChildren: 99 }),
|
|
43
|
+
).not.toThrow();
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
test("the error is a CapacityError with the CAPACITY_EXCEEDED code", () => {
|
|
47
|
+
try {
|
|
48
|
+
assertCapacity({ ...base, active: 10 });
|
|
49
|
+
throw new Error("expected to throw");
|
|
50
|
+
} catch (e) {
|
|
51
|
+
expect(e).toBeInstanceOf(CapacityError);
|
|
52
|
+
expect((e as CapacityError).code).toBe("CAPACITY_EXCEEDED");
|
|
53
|
+
}
|
|
54
|
+
});
|
|
55
|
+
});
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Orchestration capacity limits — enforced at spawn time.
|
|
3
|
+
*
|
|
4
|
+
* `agents.maxConcurrent`, `agents.maxAgentsPerLead`, and `agents.maxDepth` are
|
|
5
|
+
* configured (and validated) but were previously decorative — nothing consulted
|
|
6
|
+
* them. {@link assertCapacity} is the single gate `sling` calls before creating a
|
|
7
|
+
* worktree, so a runaway fan-out is refused with a typed {@link CapacityError}
|
|
8
|
+
* rather than spawning unbounded agents.
|
|
9
|
+
*
|
|
10
|
+
* Pure (counts are passed in) so it is unit-tested without a session store.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { CapacityError } from "../errors.ts";
|
|
14
|
+
|
|
15
|
+
export interface CapacityLimits {
|
|
16
|
+
maxDepth: number;
|
|
17
|
+
maxConcurrent: number;
|
|
18
|
+
maxAgentsPerLead: number;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface CapacityCheck {
|
|
22
|
+
/** Depth the new agent would occupy. */
|
|
23
|
+
depth: number;
|
|
24
|
+
/** Active agents in the run right now (excluding the one being spawned). */
|
|
25
|
+
active: number;
|
|
26
|
+
/** Spawning parent, or null for a top-level spawn. */
|
|
27
|
+
parentAgent: string | null;
|
|
28
|
+
/** Active children the parent already has (ignored when parentAgent is null). */
|
|
29
|
+
parentActiveChildren: number;
|
|
30
|
+
limits: CapacityLimits;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/** Throw {@link CapacityError} if spawning would exceed any configured limit. */
|
|
34
|
+
export function assertCapacity(c: CapacityCheck): void {
|
|
35
|
+
if (c.depth > c.limits.maxDepth) {
|
|
36
|
+
throw new CapacityError(
|
|
37
|
+
`Cannot spawn at depth ${c.depth}: exceeds agents.maxDepth (${c.limits.maxDepth}).`,
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
if (c.active >= c.limits.maxConcurrent) {
|
|
41
|
+
throw new CapacityError(
|
|
42
|
+
`Cannot spawn: ${c.active} agent(s) already active, at agents.maxConcurrent (${c.limits.maxConcurrent}). Wait for some to finish.`,
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
if (c.parentAgent && c.parentActiveChildren >= c.limits.maxAgentsPerLead) {
|
|
46
|
+
throw new CapacityError(
|
|
47
|
+
`Cannot spawn: ${c.parentAgent} already has ${c.parentActiveChildren} active child(ren), at agents.maxAgentsPerLead (${c.limits.maxAgentsPerLead}).`,
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for driveTurn — the shared turn core. Real stores + a real (mock) runtime
|
|
3
|
+
* subprocess. A SpyRuntime records the DirectSpawnOpts so we can prove the warm
|
|
4
|
+
* start: a follow-up turn threads `resumeSessionId` through to the runtime.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
8
|
+
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
|
9
|
+
import { tmpdir } from "node:os";
|
|
10
|
+
import { join } from "node:path";
|
|
11
|
+
import { DEFAULT_CONFIG } from "../config.ts";
|
|
12
|
+
import { createEventStore, type EventStore } from "../events/store.ts";
|
|
13
|
+
import { createMailClient, type MailClient } from "../mail/client.ts";
|
|
14
|
+
import { eventsDbPath, sessionsDbPath } from "../paths.ts";
|
|
15
|
+
import { MockRuntime } from "../runtimes/mock.ts";
|
|
16
|
+
import type { DirectSpawnOpts } from "../runtimes/types.ts";
|
|
17
|
+
import { createSessionStore, type SessionStore } from "../sessions/store.ts";
|
|
18
|
+
import type { AgentplateConfig, AgentSession } from "../types.ts";
|
|
19
|
+
import { driveTurn } from "./drive.ts";
|
|
20
|
+
|
|
21
|
+
/** Mock runtime that records the spawn opts (so we can assert the resume id). */
|
|
22
|
+
class SpyRuntime extends MockRuntime {
|
|
23
|
+
lastOpts: DirectSpawnOpts | null = null;
|
|
24
|
+
override buildDirectSpawn(opts: DirectSpawnOpts): string[] {
|
|
25
|
+
this.lastOpts = opts;
|
|
26
|
+
return super.buildDirectSpawn(opts);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
let root: string;
|
|
31
|
+
let worktree: string;
|
|
32
|
+
let store: SessionStore;
|
|
33
|
+
let events: EventStore;
|
|
34
|
+
let mail: MailClient;
|
|
35
|
+
|
|
36
|
+
function cfg(): AgentplateConfig {
|
|
37
|
+
const c = structuredClone(DEFAULT_CONFIG);
|
|
38
|
+
c.project.root = root;
|
|
39
|
+
c.project.canonicalBranch = "main";
|
|
40
|
+
return c;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function makeSession(over: Partial<AgentSession> = {}): AgentSession {
|
|
44
|
+
const now = new Date().toISOString();
|
|
45
|
+
return {
|
|
46
|
+
id: `session-${crypto.randomUUID()}`,
|
|
47
|
+
agentName: "builder-1",
|
|
48
|
+
capability: "builder",
|
|
49
|
+
taskId: "task-1",
|
|
50
|
+
runId: "run-1",
|
|
51
|
+
worktreePath: worktree,
|
|
52
|
+
branchName: "agentplate/builder-1",
|
|
53
|
+
state: "idle",
|
|
54
|
+
parentAgent: "lead-1",
|
|
55
|
+
depth: 1,
|
|
56
|
+
pid: null,
|
|
57
|
+
runtimeSessionId: null,
|
|
58
|
+
startedAt: now,
|
|
59
|
+
lastActivity: now,
|
|
60
|
+
...over,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
beforeEach(() => {
|
|
65
|
+
root = mkdtempSync(join(tmpdir(), "agentplate-drive-"));
|
|
66
|
+
mkdirSync(join(root, ".agentplate"), { recursive: true });
|
|
67
|
+
worktree = mkdtempSync(join(tmpdir(), "agentplate-drive-wt-"));
|
|
68
|
+
store = createSessionStore(sessionsDbPath(root));
|
|
69
|
+
events = createEventStore(eventsDbPath(root));
|
|
70
|
+
mail = createMailClient(root);
|
|
71
|
+
process.env.AGENTPLATE_MOCK_CMD = "true"; // no-op turn, exits 0
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
afterEach(() => {
|
|
75
|
+
store.close();
|
|
76
|
+
events.close();
|
|
77
|
+
mail.close();
|
|
78
|
+
rmSync(root, { recursive: true, force: true });
|
|
79
|
+
rmSync(worktree, { recursive: true, force: true });
|
|
80
|
+
process.env.AGENTPLATE_MOCK_CMD = undefined;
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
describe("driveTurn — warm start", () => {
|
|
84
|
+
test("threads resumeSessionId through to the runtime spawn (follow-up turn)", async () => {
|
|
85
|
+
const session = makeSession();
|
|
86
|
+
store.upsertSession(session);
|
|
87
|
+
const runtime = new SpyRuntime();
|
|
88
|
+
|
|
89
|
+
const out = await driveTurn({
|
|
90
|
+
root,
|
|
91
|
+
config: cfg(),
|
|
92
|
+
runtime,
|
|
93
|
+
store,
|
|
94
|
+
events,
|
|
95
|
+
mail,
|
|
96
|
+
session,
|
|
97
|
+
model: { model: "m", env: {} },
|
|
98
|
+
prompt: "continue",
|
|
99
|
+
resumeSessionId: "sess-abc",
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
expect(runtime.lastOpts?.resumeSessionId).toBe("sess-abc"); // warm start
|
|
103
|
+
expect(out.finalState).toBe("idle"); // no terminal mail emitted → paused
|
|
104
|
+
expect(store.getSession(session.id)?.state).toBe("idle");
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
test("omits resume on the first turn (cold start)", async () => {
|
|
108
|
+
const session = makeSession();
|
|
109
|
+
store.upsertSession(session);
|
|
110
|
+
const runtime = new SpyRuntime();
|
|
111
|
+
await driveTurn({
|
|
112
|
+
root,
|
|
113
|
+
config: cfg(),
|
|
114
|
+
runtime,
|
|
115
|
+
store,
|
|
116
|
+
events,
|
|
117
|
+
mail,
|
|
118
|
+
session,
|
|
119
|
+
model: { model: "m", env: {} },
|
|
120
|
+
prompt: "begin",
|
|
121
|
+
});
|
|
122
|
+
expect(runtime.lastOpts?.resumeSessionId).toBeUndefined();
|
|
123
|
+
});
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
describe("driveTurn — state transition", () => {
|
|
127
|
+
test("becomes 'completed' when the agent has emitted its terminal mail", async () => {
|
|
128
|
+
const session = makeSession();
|
|
129
|
+
store.upsertSession(session);
|
|
130
|
+
// The agent's own worker_done mail marks the task complete.
|
|
131
|
+
mail.send({
|
|
132
|
+
from: session.agentName,
|
|
133
|
+
to: "lead-1",
|
|
134
|
+
subject: "done",
|
|
135
|
+
body: "",
|
|
136
|
+
type: "worker_done",
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
const config = cfg();
|
|
140
|
+
config.skills.enabled = false; // keep the completed path free of distillation work
|
|
141
|
+
const out = await driveTurn({
|
|
142
|
+
root,
|
|
143
|
+
config,
|
|
144
|
+
runtime: new SpyRuntime(),
|
|
145
|
+
store,
|
|
146
|
+
events,
|
|
147
|
+
mail,
|
|
148
|
+
session,
|
|
149
|
+
model: { model: "m", env: {} },
|
|
150
|
+
prompt: "finish",
|
|
151
|
+
});
|
|
152
|
+
expect(out.finalState).toBe("completed");
|
|
153
|
+
expect(store.getSession(session.id)?.state).toBe("completed");
|
|
154
|
+
});
|
|
155
|
+
});
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* driveTurn — run ONE headless turn for an agent and handle its aftermath.
|
|
3
|
+
*
|
|
4
|
+
* This is the shared core behind both the first turn (`sling`, which opens a
|
|
5
|
+
* fresh runtime session) and every follow-up turn (`agentplate turn`, which
|
|
6
|
+
* **resumes** the session via `runtimeSessionId` so turns 2+ do not pay the
|
|
7
|
+
* runtime's cold-start cost — the "warm start"). Keeping it in one place means
|
|
8
|
+
* the post-turn handling (state transition, the self-improving skills loop, and
|
|
9
|
+
* auto-merge) is identical no matter which turn it is.
|
|
10
|
+
*
|
|
11
|
+
* Spawn-per-turn is preserved: each call spawns a fresh runtime subprocess
|
|
12
|
+
* (resumed when `resumeSessionId` is given) — there is no long-lived agent.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { EventStore } from "../events/store.ts";
|
|
16
|
+
import { runQualityGates } from "../insights/quality-gates.ts";
|
|
17
|
+
import type { MailClient } from "../mail/client.ts";
|
|
18
|
+
import { createMailStore } from "../mail/store.ts";
|
|
19
|
+
import { maybeAutoMerge } from "../merge/auto.ts";
|
|
20
|
+
import { mailDbPath } from "../paths.ts";
|
|
21
|
+
import type { AgentRuntime } from "../runtimes/types.ts";
|
|
22
|
+
import type { SessionStore } from "../sessions/store.ts";
|
|
23
|
+
import { runSkillFeedbackAndDistill } from "../skills/lifecycle.ts";
|
|
24
|
+
import type {
|
|
25
|
+
AgentplateConfig,
|
|
26
|
+
AgentSession,
|
|
27
|
+
Capability,
|
|
28
|
+
OutcomeStatus,
|
|
29
|
+
ResolvedModel,
|
|
30
|
+
SessionState,
|
|
31
|
+
} from "../types.ts";
|
|
32
|
+
import { updateIdentity } from "./identity.ts";
|
|
33
|
+
import { runTurn } from "./turn-runner.ts";
|
|
34
|
+
|
|
35
|
+
/** Terminal mail types whose presence marks a capability's work complete. */
|
|
36
|
+
export function terminalTypesFor(capability: Capability): string[] {
|
|
37
|
+
return capability === "merger" ? ["merged", "merge_failed"] : ["worker_done"];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Resolve a turn's end state from the agent's own mail + exit code:
|
|
42
|
+
* - emitted terminal mail → `completed`
|
|
43
|
+
* - clean exit, no terminal mail → `idle` (paused, awaiting its next turn)
|
|
44
|
+
* - non-zero exit → `failed`
|
|
45
|
+
*/
|
|
46
|
+
export function resolveFinalState(
|
|
47
|
+
root: string,
|
|
48
|
+
name: string,
|
|
49
|
+
capability: Capability,
|
|
50
|
+
exitCode: number,
|
|
51
|
+
): SessionState {
|
|
52
|
+
const terminal = terminalTypesFor(capability);
|
|
53
|
+
const store = createMailStore(mailDbPath(root));
|
|
54
|
+
try {
|
|
55
|
+
const sent = store.list({ from: name });
|
|
56
|
+
if (sent.some((m) => terminal.includes(m.type))) return "completed";
|
|
57
|
+
} finally {
|
|
58
|
+
store.close();
|
|
59
|
+
}
|
|
60
|
+
return exitCode === 0 ? "idle" : "failed";
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface DriveTurnCtx {
|
|
64
|
+
root: string;
|
|
65
|
+
config: AgentplateConfig;
|
|
66
|
+
runtime: AgentRuntime;
|
|
67
|
+
store: SessionStore;
|
|
68
|
+
events: EventStore;
|
|
69
|
+
mail: MailClient;
|
|
70
|
+
/** The session this turn runs for (existing or just-created). */
|
|
71
|
+
session: AgentSession;
|
|
72
|
+
/** Resolved concrete model + provider env for this capability. */
|
|
73
|
+
model: ResolvedModel;
|
|
74
|
+
/** The user-turn text (dispatch / injected mail / nudge). */
|
|
75
|
+
prompt: string;
|
|
76
|
+
/** Prior runtime session id to resume — omit on the first turn (warm start). */
|
|
77
|
+
resumeSessionId?: string;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface DriveTurnResult {
|
|
81
|
+
finalState: SessionState;
|
|
82
|
+
exitCode: number;
|
|
83
|
+
gateStatus: OutcomeStatus | null;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/** Run one turn for `ctx.session` and apply the post-turn lifecycle. */
|
|
87
|
+
export async function driveTurn(ctx: DriveTurnCtx): Promise<DriveTurnResult> {
|
|
88
|
+
const { root, config, runtime, store, events, mail, session, model } = ctx;
|
|
89
|
+
const {
|
|
90
|
+
id: sessionId,
|
|
91
|
+
agentName: name,
|
|
92
|
+
capability,
|
|
93
|
+
taskId,
|
|
94
|
+
runId,
|
|
95
|
+
worktreePath,
|
|
96
|
+
branchName,
|
|
97
|
+
} = session;
|
|
98
|
+
|
|
99
|
+
store.updateSessionState(sessionId, "working");
|
|
100
|
+
|
|
101
|
+
let sawError = false;
|
|
102
|
+
const turn = await runTurn({
|
|
103
|
+
runtime,
|
|
104
|
+
worktreePath,
|
|
105
|
+
model: model.model,
|
|
106
|
+
prompt: ctx.prompt,
|
|
107
|
+
env: model.env,
|
|
108
|
+
resumeSessionId: ctx.resumeSessionId,
|
|
109
|
+
onEvent: (event) => {
|
|
110
|
+
if (event.error || event.type === "error") sawError = true;
|
|
111
|
+
// Prefer the error message (so a failed agent's reason is visible in the
|
|
112
|
+
// feed/logs), else the token/cost JSON the Costs page aggregates.
|
|
113
|
+
const detail = event.error
|
|
114
|
+
? event.error
|
|
115
|
+
: event.usage
|
|
116
|
+
? JSON.stringify({ tokens: event.usage.tokens, cost: event.usage.costUsd })
|
|
117
|
+
: null;
|
|
118
|
+
events.record({ agentName: name, runId, type: event.type, tool: event.tool ?? null, detail });
|
|
119
|
+
// Bump last_activity on every streamed event so a long but active turn
|
|
120
|
+
// keeps itself fresh and is never reaped as "idle".
|
|
121
|
+
store.touch(sessionId);
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
if (turn.runtimeSessionId) store.setRuntimeSessionId(sessionId, turn.runtimeSessionId);
|
|
125
|
+
|
|
126
|
+
// A non-zero exit with no error event means the runtime failed via stderr;
|
|
127
|
+
// record it so the failure reason is visible instead of a blank "failed".
|
|
128
|
+
if (turn.exitCode !== 0 && !sawError) {
|
|
129
|
+
const reason = turn.stderr.trim();
|
|
130
|
+
if (reason) {
|
|
131
|
+
events.record({
|
|
132
|
+
agentName: name,
|
|
133
|
+
runId,
|
|
134
|
+
type: "error",
|
|
135
|
+
tool: null,
|
|
136
|
+
detail: reason.length > 1000 ? `${reason.slice(0, 1000)}…` : reason,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const finalState = resolveFinalState(root, name, capability, turn.exitCode);
|
|
142
|
+
store.updateSessionState(sessionId, finalState);
|
|
143
|
+
store.touch(sessionId);
|
|
144
|
+
updateIdentity(root, name, {
|
|
145
|
+
taskId,
|
|
146
|
+
summary: `${capability} ran a turn for ${taskId} → ${finalState}`,
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
// Quality gates run once when EITHER the self-improving loop or auto-merge
|
|
150
|
+
// needs them; the outcome feeds both. Best-effort — never fails the turn.
|
|
151
|
+
const autoMergeWants =
|
|
152
|
+
config.merge.autoMerge !== "off" && capability !== "scout" && capability !== "merger";
|
|
153
|
+
let gateStatus: OutcomeStatus | null = null;
|
|
154
|
+
if (finalState === "completed" && (config.skills.enabled || autoMergeWants)) {
|
|
155
|
+
try {
|
|
156
|
+
const gateOutcome = await runQualityGates(config.project.qualityGates ?? [], worktreePath);
|
|
157
|
+
gateStatus = gateOutcome?.status ?? null;
|
|
158
|
+
if (config.skills.enabled) {
|
|
159
|
+
await runSkillFeedbackAndDistill({
|
|
160
|
+
root,
|
|
161
|
+
agentName: name,
|
|
162
|
+
capability,
|
|
163
|
+
taskId,
|
|
164
|
+
worktreePath,
|
|
165
|
+
baseRef: config.project.canonicalBranch,
|
|
166
|
+
runtime,
|
|
167
|
+
outcomeStatus: gateStatus,
|
|
168
|
+
skills: config.skills,
|
|
169
|
+
model: model.model,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
} catch {
|
|
173
|
+
// Skill loop is advisory; a failure here must not fail the turn.
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Auto-merge the branch onto the canonical branch when configured (off by
|
|
178
|
+
// default). Best-effort — a landing must never fail the turn.
|
|
179
|
+
if (finalState === "completed") {
|
|
180
|
+
try {
|
|
181
|
+
await maybeAutoMerge({
|
|
182
|
+
root,
|
|
183
|
+
branchName,
|
|
184
|
+
targetBranch: config.project.canonicalBranch,
|
|
185
|
+
capability,
|
|
186
|
+
agentName: name,
|
|
187
|
+
taskId,
|
|
188
|
+
parent: session.parentAgent,
|
|
189
|
+
mode: config.merge.autoMerge,
|
|
190
|
+
aiResolveEnabled: config.merge.aiResolveEnabled,
|
|
191
|
+
gateStatus,
|
|
192
|
+
mail,
|
|
193
|
+
});
|
|
194
|
+
} catch {
|
|
195
|
+
// Auto-merge is best-effort; never fail the turn over a landing.
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return { finalState, exitCode: turn.exitCode, gateStatus };
|
|
200
|
+
}
|
|
@@ -62,7 +62,8 @@ export function buildCoordinatorSystemPrompt(ctx: CoordinatorPromptContext): str
|
|
|
62
62
|
"Key commands:",
|
|
63
63
|
"",
|
|
64
64
|
`- Check mail: \`agentplate mail check --agent ${ctx.agentName}\``,
|
|
65
|
-
|
|
65
|
+
"- Author a task's spec FIRST (the contract; loaded at launch — never mail it after): `agentplate spec write <task-id> --stdin`",
|
|
66
|
+
`- Dispatch a lead against it: \`agentplate sling <task-id> --capability lead --parent ${ctx.agentName} --spec .agentplate/specs/<task-id>.md\``,
|
|
66
67
|
"- Fleet status: `agentplate status`",
|
|
67
68
|
"- Merge completed work: `agentplate merge --all`",
|
|
68
69
|
"",
|