@forwardimpact/libeval 0.1.47 → 0.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -16
- package/bin/fit-benchmark.js +1 -1
- package/package.json +1 -1
- package/src/commands/discuss.js +1 -1
- package/src/discuss-tools.js +51 -59
- package/src/discusser.js +57 -28
- package/src/facilitator.js +41 -45
- package/src/index.js +6 -1
- package/src/orchestration-toolkit.js +86 -16
- package/src/profile-prompt.js +53 -8
- package/src/render/tool-hints.js +2 -1
- package/src/render/turn-renderer.js +1 -2
- package/src/supervisor.js +38 -38
- package/src/tee-writer.js +8 -8
- package/src/trace-collector.js +2 -2
package/README.md
CHANGED
|
@@ -69,6 +69,20 @@ Inbox lines on resume:
|
|
|
69
69
|
Async means the lead can issue Asks, end its turn, and plan in the gap
|
|
70
70
|
while participants work in parallel — nothing blocks the LLM thread.
|
|
71
71
|
|
|
72
|
+
### Discuss-mode replies
|
|
73
|
+
|
|
74
|
+
In discussion mode, Answer calls routed to the lead are captured as
|
|
75
|
+
thread replies delivered via the bridge callback. The lead delegates work
|
|
76
|
+
via Ask; each agent's Answer becomes a separate reply posted to the
|
|
77
|
+
discussion thread. No explicit reply tool is needed on the lead surface —
|
|
78
|
+
the message bus intercepts answers and appends them to `ctx.replies[]`.
|
|
79
|
+
|
|
80
|
+
`RequestForComment` is a separate coordination tool available on agent
|
|
81
|
+
roles (facilitated agents and discuss agents). It queues an intent to
|
|
82
|
+
open a new Discussion thread for long-horizon coordination on open
|
|
83
|
+
questions; these are accumulated in `ctx.rfcs[]`, separate from the
|
|
84
|
+
thread replies in `ctx.replies[]`.
|
|
85
|
+
|
|
72
86
|
## Orchestration loop
|
|
73
87
|
|
|
74
88
|
Each participant drains the bus (or waits), runs/resumes the LLM with
|
|
@@ -84,15 +98,15 @@ only feeds the summary's `success`/`verdict`.
|
|
|
84
98
|
|
|
85
99
|
## Tool surface, by role
|
|
86
100
|
|
|
87
|
-
| Role | Ask | Answer | Announce | RollCall | Conclude | Other
|
|
88
|
-
| ------------ | --- | ------ | -------- | -------- | -------- |
|
|
89
|
-
| Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
90
|
-
| Fac. agent | ✓ | ✓ | ✓ | ✓ | |
|
|
91
|
-
| Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
92
|
-
| Sup. agent | ✓ | ✓ | ✓ | ✓ | |
|
|
93
|
-
| Discuss lead | ✓ | ✓ | ✓ | ✓ | | `
|
|
94
|
-
| Discuss agt | ✓ | ✓ | ✓ | ✓ | |
|
|
95
|
-
| Judge | | | | | ✓ |
|
|
101
|
+
| Role | Ask | Answer | Announce | RollCall | Conclude | Other |
|
|
102
|
+
| ------------ | --- | ------ | -------- | -------- | -------- | ------------------------------ |
|
|
103
|
+
| Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
|
|
104
|
+
| Fac. agent | ✓ | ✓ | ✓ | ✓ | | `RequestForComment` |
|
|
105
|
+
| Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
|
|
106
|
+
| Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
|
|
107
|
+
| Discuss lead | ✓ | ✓ | ✓ | ✓ | | `Recess`, `Adjourn` |
|
|
108
|
+
| Discuss agt | ✓ | ✓ | ✓ | ✓ | | `RequestForComment` |
|
|
109
|
+
| Judge | | | | | ✓ | |
|
|
96
110
|
|
|
97
111
|
Ask's `to` accepts a participant name on multi-participant roles
|
|
98
112
|
(facilitator, discuss lead, all participants). The supervise pair has
|
|
@@ -152,21 +166,19 @@ downloadable through retention.
|
|
|
152
166
|
| ----------------------------------------------------------- | -------------------------------------------------------------------- |
|
|
153
167
|
| `agent-runner.js` | One Claude Agent SDK session; emits NDJSON via the redactor. |
|
|
154
168
|
| `message-bus.js` | Per-participant queues + `waitForMessages` Promise wakeup. |
|
|
155
|
-
| `orchestration-toolkit.js` | Shared Ask/Answer/Announce/Conclude/RollCall handlers + builders.
|
|
169
|
+
| `orchestration-toolkit.js` | Shared Ask/Answer/Announce/Conclude/RollCall/RequestForComment handlers + builders. |
|
|
156
170
|
| `orchestration-loop.js` | Unified lead+participant loop; reminder/violation handling. |
|
|
157
171
|
| `facilitator.js` / `supervisor.js` / `discusser.js` / `judge.js` | Per-mode class + factory + system prompt. |
|
|
158
|
-
| `discuss-tools.js` | Discuss-only `
|
|
172
|
+
| `discuss-tools.js` | Discuss-only `Recess`/`Adjourn`. |
|
|
159
173
|
| `trace-collector.js` / `trace-query.js` / `trace-github.js` | Trace ingestion / querying / GitHub-attachment helpers. |
|
|
160
174
|
| `redaction.js` | Env-var allowlist + credential-shape pattern redaction. |
|
|
161
175
|
|
|
162
176
|
## fit-selfedit
|
|
163
177
|
|
|
164
178
|
A narrow, audited bypass for sessions where `Edit`/`Write` (and bash
|
|
165
|
-
writes) are blocked against paths the project's own allowlist permits
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
original episodes. Reads stdin, writes the target, exits 0 / 2
|
|
169
|
-
(safeguard violation) / 1 (I/O error).
|
|
179
|
+
writes) are blocked against paths the project's own allowlist permits.
|
|
180
|
+
Reads stdin, writes the target, exits 0 / 2 (safeguard violation) / 1
|
|
181
|
+
(I/O error).
|
|
170
182
|
|
|
171
183
|
```sh
|
|
172
184
|
echo "<content>" | bunx fit-selfedit <path>
|
package/bin/fit-benchmark.js
CHANGED
|
@@ -134,7 +134,7 @@ export const definition = {
|
|
|
134
134
|
"fit-benchmark run --family=./families/coding --runs=10 --agent-model=claude-sonnet-4-6",
|
|
135
135
|
"fit-benchmark score --family=./families/coding --task=todo-api --workdir=./benchmark-runs/runs/todo-api/0",
|
|
136
136
|
"fit-benchmark report --format=text",
|
|
137
|
-
"fit-benchmark report --input=./runs/
|
|
137
|
+
"fit-benchmark report --input=./runs/today --k=1,3,5 --format=text",
|
|
138
138
|
],
|
|
139
139
|
documentation: [
|
|
140
140
|
{
|
package/package.json
CHANGED
package/src/commands/discuss.js
CHANGED
|
@@ -52,7 +52,7 @@ export function parseDiscussOptions(values) {
|
|
|
52
52
|
taskContent,
|
|
53
53
|
taskAmend,
|
|
54
54
|
agentConfigs,
|
|
55
|
-
leadProfile: values["lead-profile"] ??
|
|
55
|
+
leadProfile: values["lead-profile"] ?? undefined,
|
|
56
56
|
leadModel: values["lead-model"] ?? "claude-opus-4-7[1m]",
|
|
57
57
|
agentModel: values["agent-model"] ?? "claude-opus-4-7[1m]",
|
|
58
58
|
maxTurns,
|
package/src/discuss-tools.js
CHANGED
|
@@ -1,66 +1,77 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* DiscussTools — discuss-mode tool servers. The lead's surface extends the
|
|
3
|
-
* base set with
|
|
3
|
+
* base set with two discuss-only terminal tools:
|
|
4
4
|
*
|
|
5
|
-
* - `RequestForComment` posts a fire-and-forget message to a human channel
|
|
6
|
-
* via the bridge; the reply arrives on a later workflow run.
|
|
7
5
|
* - `Recess` suspends the session with a resumption trigger.
|
|
8
6
|
* - `Adjourn` ends the discussion with a verdict.
|
|
9
7
|
*
|
|
10
|
-
* `Conclude` is absent — discuss mode ends via Adjourn or Recess.
|
|
11
|
-
*
|
|
12
|
-
*
|
|
8
|
+
* `Conclude` is absent — discuss mode ends via Adjourn or Recess.
|
|
9
|
+
*
|
|
10
|
+
* `RequestForComment` is an agent-level coordination tool — available on
|
|
11
|
+
* discuss agents and facilitated agents (not leads). It opens a new
|
|
12
|
+
* Discussion thread for long-horizon coordination on open questions.
|
|
13
|
+
*
|
|
14
|
+
* In discuss mode, each agent Answer routed to the lead is captured as a
|
|
15
|
+
* thread reply delivered via the bridge callback — no explicit reply tool
|
|
16
|
+
* is needed on the lead surface.
|
|
13
17
|
*/
|
|
14
18
|
|
|
15
19
|
import { tool } from "@anthropic-ai/claude-agent-sdk";
|
|
16
20
|
import { z } from "zod";
|
|
17
21
|
|
|
18
22
|
import {
|
|
23
|
+
ADJOURN_DESC,
|
|
19
24
|
baseTools,
|
|
20
25
|
concludeSession,
|
|
21
26
|
orchestrationServer,
|
|
27
|
+
RECESS_DESC,
|
|
28
|
+
requestForCommentTool,
|
|
29
|
+
requireNoPendingAsks,
|
|
22
30
|
} from "./orchestration-toolkit.js";
|
|
23
31
|
|
|
24
|
-
/** System prompt
|
|
32
|
+
/** System prompt for discuss-mode agent participants. L0 mechanics only per COALIGNED. */
|
|
25
33
|
export const DISCUSS_AGENT_SYSTEM_PROMPT =
|
|
26
|
-
"You
|
|
27
|
-
"Each question
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
34
|
+
"You are a participant in a discussion.\n" +
|
|
35
|
+
"Each question arrives as `[ask#N] <name>: <text>` in your inbox.\n" +
|
|
36
|
+
"Quote N as askId on your `Answer` to route the reply correctly.\n" +
|
|
37
|
+
"Your `Answer` is posted to the discussion thread as a separate reply.\n" +
|
|
38
|
+
"If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
|
|
39
|
+
"Do not redo completed work.";
|
|
32
40
|
|
|
33
|
-
const RESUME_TRIGGER_SCHEMA = z
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
const RESUME_TRIGGER_SCHEMA = z.discriminatedUnion("kind", [
|
|
42
|
+
z
|
|
43
|
+
.object({
|
|
44
|
+
kind: z.literal("missing_input"),
|
|
45
|
+
replies: z.number().int().positive(),
|
|
46
|
+
})
|
|
47
|
+
.strict(),
|
|
48
|
+
z
|
|
49
|
+
.object({
|
|
50
|
+
kind: z.literal("escalation_needed"),
|
|
51
|
+
signal: z.string().min(1),
|
|
52
|
+
})
|
|
53
|
+
.strict(),
|
|
54
|
+
z
|
|
55
|
+
.object({
|
|
56
|
+
kind: z.literal("elapsed"),
|
|
57
|
+
elapsed: z.string().min(1),
|
|
58
|
+
})
|
|
59
|
+
.strict(),
|
|
60
|
+
]);
|
|
40
61
|
|
|
41
62
|
/** Discuss-mode lead tool server. */
|
|
42
63
|
export function createDiscussLeadToolServer(ctx) {
|
|
43
64
|
return orchestrationServer([
|
|
44
65
|
...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
|
|
45
|
-
tool(
|
|
46
|
-
"RequestForComment",
|
|
47
|
-
"Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
|
|
48
|
-
{
|
|
49
|
-
channel: z.string(),
|
|
50
|
-
body: z.string(),
|
|
51
|
-
addressees: z.array(z.string()).optional(),
|
|
52
|
-
},
|
|
53
|
-
createRequestForCommentHandler(ctx),
|
|
54
|
-
),
|
|
55
66
|
tool(
|
|
56
67
|
"Recess",
|
|
57
|
-
|
|
68
|
+
RECESS_DESC,
|
|
58
69
|
{ reason: z.string(), trigger: RESUME_TRIGGER_SCHEMA },
|
|
59
70
|
createRecessHandler(ctx),
|
|
60
71
|
),
|
|
61
72
|
tool(
|
|
62
73
|
"Adjourn",
|
|
63
|
-
|
|
74
|
+
ADJOURN_DESC,
|
|
64
75
|
{
|
|
65
76
|
verdict: z.enum(["adjourned", "failed"]),
|
|
66
77
|
summary: z.string(),
|
|
@@ -73,33 +84,10 @@ export function createDiscussLeadToolServer(ctx) {
|
|
|
73
84
|
|
|
74
85
|
/** Discuss-mode agent tool server. */
|
|
75
86
|
export function createDiscussAgentToolServer(ctx, { from }) {
|
|
76
|
-
return orchestrationServer(
|
|
77
|
-
baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
/** RequestForComment handler — queues structured replies on `ctx.replies[]`. */
|
|
82
|
-
export function createRequestForCommentHandler(ctx) {
|
|
83
|
-
return async ({ channel, body, addressees }) => {
|
|
84
|
-
const correlationId = `rfc_${++ctx.rfcCounter}`;
|
|
85
|
-
const addresseeList = addressees?.length ? addressees : [null];
|
|
86
|
-
for (const addressee of addresseeList) {
|
|
87
|
-
ctx.replies.push({
|
|
88
|
-
...(addressee && { addressee }),
|
|
89
|
-
body,
|
|
90
|
-
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
91
|
-
correlation_id: correlationId,
|
|
92
|
-
});
|
|
93
|
-
}
|
|
94
|
-
return {
|
|
95
|
-
content: [
|
|
96
|
-
{
|
|
97
|
-
type: "text",
|
|
98
|
-
text: JSON.stringify({ correlation_id: correlationId, channel }),
|
|
99
|
-
},
|
|
100
|
-
],
|
|
101
|
-
};
|
|
102
|
-
};
|
|
87
|
+
return orchestrationServer([
|
|
88
|
+
...baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
|
|
89
|
+
requestForCommentTool(ctx),
|
|
90
|
+
]);
|
|
103
91
|
}
|
|
104
92
|
|
|
105
93
|
/**
|
|
@@ -111,6 +99,8 @@ export function createRequestForCommentHandler(ctx) {
|
|
|
111
99
|
*/
|
|
112
100
|
export function createRecessHandler(ctx) {
|
|
113
101
|
return async ({ reason, trigger }) => {
|
|
102
|
+
const guard = requireNoPendingAsks(ctx);
|
|
103
|
+
if (guard) return guard;
|
|
114
104
|
ctx.recessTrigger = trigger;
|
|
115
105
|
concludeSession(ctx, {
|
|
116
106
|
verdict: "recessed",
|
|
@@ -124,6 +114,8 @@ export function createRecessHandler(ctx) {
|
|
|
124
114
|
/** Adjourn handler — ends the discussion with a verdict. */
|
|
125
115
|
export function createAdjournHandler(ctx) {
|
|
126
116
|
return async ({ verdict, summary, outcome }) => {
|
|
117
|
+
const guard = requireNoPendingAsks(ctx);
|
|
118
|
+
if (guard) return guard;
|
|
127
119
|
if (outcome !== undefined) ctx.outcome = outcome;
|
|
128
120
|
concludeSession(ctx, {
|
|
129
121
|
verdict,
|
package/src/discusser.js
CHANGED
|
@@ -1,19 +1,23 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Discusser — async, suspendable orchestration on top of a within-run
|
|
3
|
-
* `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess
|
|
4
|
-
*
|
|
3
|
+
* `OrchestrationLoop`. The lead role uses `DiscussTools` (Adjourn / Recess)
|
|
4
|
+
* instead of the facilitator's Conclude.
|
|
5
5
|
*
|
|
6
6
|
* Discuss mode is a sibling of facilitate mode, not a subset of it. The
|
|
7
7
|
* within-run turn loop is shared via `OrchestrationLoop`, but the lead
|
|
8
8
|
* role, tool set, system prompts, and participant naming all stay
|
|
9
9
|
* mode-local.
|
|
10
|
+
*
|
|
11
|
+
* Each agent Answer routed to the lead is captured as a thread reply
|
|
12
|
+
* delivered via the bridge callback — no explicit reply tool is needed
|
|
13
|
+
* on the lead surface.
|
|
10
14
|
*/
|
|
11
15
|
|
|
12
16
|
import { Writable } from "node:stream";
|
|
13
17
|
import { resolve } from "node:path";
|
|
14
18
|
|
|
15
19
|
import { createAgentRunner } from "./agent-runner.js";
|
|
16
|
-
import {
|
|
20
|
+
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
17
21
|
import { SequenceCounter } from "./sequence-counter.js";
|
|
18
22
|
import { createMessageBus } from "./message-bus.js";
|
|
19
23
|
import { createOrchestrationContext } from "./orchestration-toolkit.js";
|
|
@@ -24,18 +28,19 @@ import {
|
|
|
24
28
|
} from "./discuss-tools.js";
|
|
25
29
|
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
26
30
|
|
|
27
|
-
/** System prompt
|
|
31
|
+
/** System prompt for the discuss-mode lead. L0 mechanics only per COALIGNED. */
|
|
28
32
|
export const DISCUSS_SYSTEM_PROMPT =
|
|
29
|
-
"You lead
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
33
|
+
"You lead a discussion.\n" +
|
|
34
|
+
"You have no tools to perform work yourself.\n" +
|
|
35
|
+
"Use `RollCall` to list participants.\n" +
|
|
36
|
+
"Use `Ask` to delegate work to the best-suited participant.\n" +
|
|
37
|
+
"Participants are domain experts; state the task, not how to do it.\n" +
|
|
38
|
+
"Each participant's `Answer` is posted to the discussion thread as a separate reply.\n" +
|
|
39
|
+
"`Ask` is async and returns {askIds:[N,…]} immediately.\n" +
|
|
40
|
+
"Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
|
|
41
|
+
"End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
|
|
42
|
+
"Multiple `Ask` calls in one turn run participants in parallel.\n" +
|
|
43
|
+
"End the discussion by calling `Adjourn` with a verdict and summary, or `Recess` only to wait on an external reply or duration.";
|
|
39
44
|
|
|
40
45
|
/**
|
|
41
46
|
* Augment a base orchestration context with discuss-mode fields.
|
|
@@ -47,6 +52,7 @@ export function augmentContextForDiscuss(ctx, discussionId) {
|
|
|
47
52
|
ctx.discussionId = discussionId;
|
|
48
53
|
ctx.recessTrigger = null;
|
|
49
54
|
ctx.replies = [];
|
|
55
|
+
ctx.rfcs = [];
|
|
50
56
|
ctx.rfcCounter = 0;
|
|
51
57
|
ctx.outcome = null;
|
|
52
58
|
return ctx;
|
|
@@ -141,6 +147,7 @@ export class Discusser {
|
|
|
141
147
|
...(this.ctx.summary && { summary: this.ctx.summary }),
|
|
142
148
|
...(this.ctx.outcome && { outcome: this.ctx.outcome }),
|
|
143
149
|
replies: this.ctx.replies,
|
|
150
|
+
...(this.ctx.rfcs?.length && { rfcs: this.ctx.rfcs }),
|
|
144
151
|
...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
|
|
145
152
|
...(this.discussionId && { discussion_id: this.discussionId }),
|
|
146
153
|
};
|
|
@@ -228,6 +235,20 @@ export function createDiscusser({
|
|
|
228
235
|
const messageBus = createMessageBus({
|
|
229
236
|
participants: ["lead", ...resolvedConfigs.map((a) => a.name)],
|
|
230
237
|
});
|
|
238
|
+
|
|
239
|
+
// Intercept answers routed to the lead — each becomes a discussion reply.
|
|
240
|
+
const originalAnswer = messageBus.answer.bind(messageBus);
|
|
241
|
+
messageBus.answer = (from, to, text, askId) => {
|
|
242
|
+
if (to === "lead" && from !== "@orchestrator") {
|
|
243
|
+
ctx.replies.push({
|
|
244
|
+
body: text,
|
|
245
|
+
agent: from,
|
|
246
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
originalAnswer(from, to, text, askId);
|
|
250
|
+
};
|
|
251
|
+
|
|
231
252
|
ctx.messageBus = messageBus;
|
|
232
253
|
if (ctx.participants.length === 0) {
|
|
233
254
|
ctx.participants = [
|
|
@@ -236,16 +257,6 @@ export function createDiscusser({
|
|
|
236
257
|
];
|
|
237
258
|
}
|
|
238
259
|
|
|
239
|
-
const systemPromptFor = (profile, trailer) => {
|
|
240
|
-
if (!trailer) throw new Error("trailer is required");
|
|
241
|
-
return profile
|
|
242
|
-
? composeProfilePrompt(profile, {
|
|
243
|
-
profilesDir: resolvedProfilesDir,
|
|
244
|
-
trailer,
|
|
245
|
-
})
|
|
246
|
-
: { type: "preset", preset: "claude_code", append: trailer };
|
|
247
|
-
};
|
|
248
|
-
|
|
249
260
|
let discusser;
|
|
250
261
|
const leadServer = createDiscussLeadToolServer(ctx);
|
|
251
262
|
|
|
@@ -268,26 +279,44 @@ export function createDiscusser({
|
|
|
268
279
|
onLine: (line) => discusser.loop.emitLine(config.name, line),
|
|
269
280
|
mcpServers: { orchestration: agentServer },
|
|
270
281
|
settingSources: ["project"],
|
|
271
|
-
systemPrompt:
|
|
282
|
+
systemPrompt: composeSystemPrompt({
|
|
283
|
+
role: "agent",
|
|
284
|
+
profile: config.agentProfile,
|
|
285
|
+
profilesDir: resolvedProfilesDir,
|
|
286
|
+
trailer: agentTrailer,
|
|
287
|
+
}),
|
|
272
288
|
redactor,
|
|
273
289
|
});
|
|
274
290
|
|
|
275
291
|
return { name: config.name, role: config.role, runner };
|
|
276
292
|
});
|
|
277
293
|
|
|
278
|
-
const defaultDisallowed = [
|
|
294
|
+
const defaultDisallowed = [
|
|
295
|
+
"Agent",
|
|
296
|
+
"Task",
|
|
297
|
+
"TaskOutput",
|
|
298
|
+
"TaskStop",
|
|
299
|
+
"Bash",
|
|
300
|
+
"Write",
|
|
301
|
+
"Edit",
|
|
302
|
+
];
|
|
279
303
|
const leadRunner = createAgentRunner({
|
|
280
304
|
cwd: resolvedLeadCwd,
|
|
281
305
|
query,
|
|
282
306
|
output: devNull,
|
|
283
307
|
model: leadModel ?? "claude-opus-4-7[1m]",
|
|
284
308
|
maxTurns: maxTurns ?? 80,
|
|
285
|
-
allowedTools: ["
|
|
309
|
+
allowedTools: ["Read", "Glob", "Grep"],
|
|
286
310
|
disallowedTools: defaultDisallowed,
|
|
287
311
|
onLine: (line) => discusser.loop.emitLine("lead", line),
|
|
288
312
|
mcpServers: { orchestration: leadServer },
|
|
289
313
|
settingSources: ["project"],
|
|
290
|
-
systemPrompt:
|
|
314
|
+
systemPrompt: composeSystemPrompt({
|
|
315
|
+
role: "lead",
|
|
316
|
+
profile: leadProfile,
|
|
317
|
+
profilesDir: resolvedProfilesDir,
|
|
318
|
+
trailer: DISCUSS_SYSTEM_PROMPT,
|
|
319
|
+
}),
|
|
291
320
|
redactor,
|
|
292
321
|
});
|
|
293
322
|
|
package/src/facilitator.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
import { Writable } from "node:stream";
|
|
10
10
|
import { resolve } from "node:path";
|
|
11
11
|
import { createAgentRunner } from "./agent-runner.js";
|
|
12
|
-
import {
|
|
12
|
+
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
13
13
|
import { createMessageBus } from "./message-bus.js";
|
|
14
14
|
import {
|
|
15
15
|
createOrchestrationContext,
|
|
@@ -18,26 +18,26 @@ import {
|
|
|
18
18
|
} from "./orchestration-toolkit.js";
|
|
19
19
|
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
20
20
|
|
|
21
|
-
/** System prompt
|
|
21
|
+
/** System prompt for the facilitator lead. L0 mechanics only per COALIGNED. */
|
|
22
22
|
export const FACILITATOR_SYSTEM_PROMPT =
|
|
23
|
-
"You
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
23
|
+
"You are the facilitator.\n" +
|
|
24
|
+
"You have no tools to perform work yourself.\n" +
|
|
25
|
+
"Use `RollCall` to list participants.\n" +
|
|
26
|
+
"Use `Ask` to delegate work to the best-suited participant.\n" +
|
|
27
|
+
"Participants are domain experts; state the task, not how to do it.\n" +
|
|
28
|
+
"`Ask` is async and returns {askIds:[N,…]} immediately.\n" +
|
|
29
|
+
"Answers arrive on your next turn as `[answer#N] <participant>: <text>` in your inbox.\n" +
|
|
30
|
+
"End your turn while Asks are pending. The system resumes you when answers arrive.\n" +
|
|
31
|
+
"Multiple `Ask` calls in one turn run participants in parallel.\n" +
|
|
32
|
+
"End every session by calling `Conclude` with a verdict and summary.";
|
|
33
|
+
|
|
34
|
+
/** System prompt for facilitated agent participants. L0 mechanics only per COALIGNED. */
|
|
34
35
|
export const FACILITATED_AGENT_SYSTEM_PROMPT =
|
|
35
|
-
"You
|
|
36
|
-
"Each question
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"RollCall lists participants.";
|
|
36
|
+
"You are a participant in a facilitated session.\n" +
|
|
37
|
+
"Each question arrives as `[ask#N] <name>: <text>` in your inbox.\n" +
|
|
38
|
+
"Quote N as askId on your `Answer` to route the reply correctly.\n" +
|
|
39
|
+
"If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
|
|
40
|
+
"Do not redo completed work.";
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
43
|
* Facilitate-mode wrapper around `OrchestrationLoop`. The lead is named
|
|
@@ -113,15 +113,6 @@ export function createFacilitator({
|
|
|
113
113
|
if (!redactor) throw new Error("redactor is required");
|
|
114
114
|
const resolvedProfilesDir =
|
|
115
115
|
profilesDir ?? resolve(facilitatorCwd, ".claude/agents");
|
|
116
|
-
const systemPromptFor = (profile, trailer) => {
|
|
117
|
-
if (!trailer) throw new Error("trailer is required");
|
|
118
|
-
return profile
|
|
119
|
-
? composeProfilePrompt(profile, {
|
|
120
|
-
profilesDir: resolvedProfilesDir,
|
|
121
|
-
trailer,
|
|
122
|
-
})
|
|
123
|
-
: { type: "preset", preset: "claude_code", append: trailer };
|
|
124
|
-
};
|
|
125
116
|
const ctx = createOrchestrationContext();
|
|
126
117
|
const messageBus = createMessageBus({
|
|
127
118
|
participants: ["facilitator", ...agentConfigs.map((a) => a.name)],
|
|
@@ -155,17 +146,27 @@ export function createFacilitator({
|
|
|
155
146
|
onLine: (line) => facilitator.emitLine(config.name, line),
|
|
156
147
|
mcpServers: { orchestration: agentServer },
|
|
157
148
|
settingSources: ["project"],
|
|
158
|
-
systemPrompt:
|
|
149
|
+
systemPrompt: composeSystemPrompt({
|
|
150
|
+
role: "agent",
|
|
151
|
+
profile: config.agentProfile,
|
|
152
|
+
profilesDir: resolvedProfilesDir,
|
|
153
|
+
trailer: agentTrailer,
|
|
154
|
+
}),
|
|
159
155
|
redactor,
|
|
160
156
|
});
|
|
161
157
|
|
|
162
158
|
return { name: config.name, role: config.role, runner };
|
|
163
159
|
});
|
|
164
160
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
161
|
+
const defaultDisallowed = [
|
|
162
|
+
"Agent",
|
|
163
|
+
"Task",
|
|
164
|
+
"TaskOutput",
|
|
165
|
+
"TaskStop",
|
|
166
|
+
"Bash",
|
|
167
|
+
"Write",
|
|
168
|
+
"Edit",
|
|
169
|
+
];
|
|
169
170
|
const disallowedTools = facilitatorDisallowedTools
|
|
170
171
|
? [...new Set([...defaultDisallowed, ...facilitatorDisallowedTools])]
|
|
171
172
|
: defaultDisallowed;
|
|
@@ -176,22 +177,17 @@ export function createFacilitator({
|
|
|
176
177
|
output: devNull,
|
|
177
178
|
model: facilitatorModel ?? model,
|
|
178
179
|
maxTurns: maxTurns ?? 80,
|
|
179
|
-
allowedTools: facilitatorAllowedTools ?? [
|
|
180
|
-
"Bash",
|
|
181
|
-
"Read",
|
|
182
|
-
"Glob",
|
|
183
|
-
"Grep",
|
|
184
|
-
"Write",
|
|
185
|
-
"Edit",
|
|
186
|
-
],
|
|
180
|
+
allowedTools: facilitatorAllowedTools ?? ["Read", "Glob", "Grep"],
|
|
187
181
|
disallowedTools,
|
|
188
182
|
onLine: (line) => facilitator.emitLine("facilitator", line),
|
|
189
183
|
mcpServers: { orchestration: facilitatorServer },
|
|
190
184
|
settingSources: ["project"],
|
|
191
|
-
systemPrompt:
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
185
|
+
systemPrompt: composeSystemPrompt({
|
|
186
|
+
role: "lead",
|
|
187
|
+
profile: facilitatorProfile,
|
|
188
|
+
profilesDir: resolvedProfilesDir,
|
|
189
|
+
trailer: FACILITATOR_SYSTEM_PROMPT,
|
|
190
|
+
}),
|
|
195
191
|
redactor,
|
|
196
192
|
});
|
|
197
193
|
|
package/src/index.js
CHANGED
|
@@ -8,7 +8,11 @@ export {
|
|
|
8
8
|
parseGitRemote,
|
|
9
9
|
} from "./trace-github.js";
|
|
10
10
|
export { AgentRunner, createAgentRunner } from "./agent-runner.js";
|
|
11
|
-
export {
|
|
11
|
+
export {
|
|
12
|
+
composeProfilePrompt,
|
|
13
|
+
composeLeadPrompt,
|
|
14
|
+
composeSystemPrompt,
|
|
15
|
+
} from "./profile-prompt.js";
|
|
12
16
|
export {
|
|
13
17
|
Supervisor,
|
|
14
18
|
createSupervisor,
|
|
@@ -19,6 +23,7 @@ export { TeeWriter, createTeeWriter } from "./tee-writer.js";
|
|
|
19
23
|
export { SequenceCounter, createSequenceCounter } from "./sequence-counter.js";
|
|
20
24
|
export {
|
|
21
25
|
createOrchestrationContext,
|
|
26
|
+
createRequestForCommentHandler,
|
|
22
27
|
createSupervisorToolServer,
|
|
23
28
|
createSupervisedAgentToolServer,
|
|
24
29
|
createFacilitatorToolServer,
|
|
@@ -5,15 +5,15 @@
|
|
|
5
5
|
*
|
|
6
6
|
* **Tool surface, by role:**
|
|
7
7
|
*
|
|
8
|
-
* | | Ask | Answer | Announce | RollCall | Conclude | …extras
|
|
9
|
-
*
|
|
10
|
-
* | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
11
|
-
* | Fac. agent | ✓ | ✓ | ✓ | ✓ | |
|
|
12
|
-
* | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ |
|
|
13
|
-
* | Sup. agent | ✓ | ✓ | ✓ | ✓ | |
|
|
14
|
-
* | Discuss lead| ✓ | ✓ | ✓ | ✓ | |
|
|
15
|
-
* | Discuss agt | ✓ | ✓ | ✓ | ✓ | |
|
|
16
|
-
* | Judge | | | | | ✓ |
|
|
8
|
+
* | | Ask | Answer | Announce | RollCall | Conclude | …extras |
|
|
9
|
+
* |-------------|-----|--------|----------|----------|----------|-----------------------|
|
|
10
|
+
* | Facilitator | ✓ | ✓ | ✓ | ✓ | ✓ | |
|
|
11
|
+
* | Fac. agent | ✓ | ✓ | ✓ | ✓ | | RFC |
|
|
12
|
+
* | Supervisor | ✓ | ✓ | ✓ | ✓ | ✓ | |
|
|
13
|
+
* | Sup. agent | ✓ | ✓ | ✓ | ✓ | | |
|
|
14
|
+
* | Discuss lead| ✓ | ✓ | ✓ | ✓ | | Recess / Adjourn |
|
|
15
|
+
* | Discuss agt | ✓ | ✓ | ✓ | ✓ | | RFC |
|
|
16
|
+
* | Judge | | | | | ✓ | |
|
|
17
17
|
*
|
|
18
18
|
* **Ask is async.** Ask returns `{askIds:[…]}` immediately and posts the
|
|
19
19
|
* question to the addressee's bus queue. The reply arrives on the asker's
|
|
@@ -46,9 +46,24 @@ export function createOrchestrationContext() {
|
|
|
46
46
|
|
|
47
47
|
// --- Handler factories ---
|
|
48
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Guard for terminal tools (`Conclude`, `Adjourn`, `Recess`). Returns an
|
|
51
|
+
* error result when the caller still has Asks in flight, telling them to
|
|
52
|
+
* end the turn and wait for the auto-resume. Returns `null` when no Asks
|
|
53
|
+
* are pending and the terminal tool is free to run.
|
|
54
|
+
*/
|
|
55
|
+
export function requireNoPendingAsks(ctx) {
|
|
56
|
+
if (ctx.pendingAsks.size === 0) return null;
|
|
57
|
+
return errorResult(
|
|
58
|
+
"Asks are still pending. End your turn. You will be resumed when answers arrive.",
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
|
|
49
62
|
/** Mark the session as concluded; cancel any open Asks so askers see the synthetic null on their next turn. */
|
|
50
63
|
export function createConcludeHandler(ctx) {
|
|
51
64
|
return async ({ verdict, summary }) => {
|
|
65
|
+
const guard = requireNoPendingAsks(ctx);
|
|
66
|
+
if (guard) return guard;
|
|
52
67
|
concludeSession(ctx, { verdict, summary, reason: "session concluded" });
|
|
53
68
|
return { content: [{ type: "text", text: "Session concluded." }] };
|
|
54
69
|
};
|
|
@@ -235,8 +250,18 @@ const ANNOUNCE_DESC = "Broadcast a message with no reply expected.";
|
|
|
235
250
|
|
|
236
251
|
const ROLLCALL_DESC = "List all participants in the session.";
|
|
237
252
|
|
|
253
|
+
// Terminal-tool descriptions. Each one ends the run. Group them so the
|
|
254
|
+
// contrast is visible: Conclude (success/failure), Adjourn (settled in
|
|
255
|
+
// thread), Recess (paused for out-of-session input). Each description
|
|
256
|
+
// leads with the cost.
|
|
238
257
|
const CONCLUDE_DESC =
|
|
239
|
-
"End the session
|
|
258
|
+
"End the session. Provide a verdict ('success' or 'failure') and a summary.";
|
|
259
|
+
|
|
260
|
+
const ADJOURN_DESC =
|
|
261
|
+
"End the discussion. Provide a verdict ('adjourned' or 'failed') and a summary. Cancels any unanswered Asks.";
|
|
262
|
+
|
|
263
|
+
const RECESS_DESC =
|
|
264
|
+
"End the run and schedule an out-of-session re-dispatch. Cancels any unanswered Asks. Use only when waiting on an external reply or duration. Do not use to wait on in-flight Asks.";
|
|
240
265
|
|
|
241
266
|
// --- Tool builders ---
|
|
242
267
|
|
|
@@ -244,6 +269,7 @@ const CONCLUDE_DESC =
|
|
|
244
269
|
function textResult(text) {
|
|
245
270
|
return { content: [{ type: "text", text }] };
|
|
246
271
|
}
|
|
272
|
+
/** Build an MCP tool error result wrapping a single text message. */
|
|
247
273
|
function errorResult(text) {
|
|
248
274
|
return { content: [{ type: "text", text }], isError: true };
|
|
249
275
|
}
|
|
@@ -337,11 +363,12 @@ export function createFacilitatorToolServer(ctx) {
|
|
|
337
363
|
]);
|
|
338
364
|
}
|
|
339
365
|
|
|
340
|
-
/** Facilitated agent tools: Ask + Answer + Announce + RollCall. */
|
|
366
|
+
/** Facilitated agent tools: Ask + Answer + Announce + RollCall + RequestForComment. */
|
|
341
367
|
export function createFacilitatedAgentToolServer(ctx, { from }) {
|
|
342
|
-
return orchestrationServer(
|
|
343
|
-
baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
|
|
344
|
-
|
|
368
|
+
return orchestrationServer([
|
|
369
|
+
...baseTools(ctx, { from, defaultTo: "facilitator", broadcast: true }),
|
|
370
|
+
requestForCommentTool(ctx),
|
|
371
|
+
]);
|
|
345
372
|
}
|
|
346
373
|
|
|
347
374
|
/**
|
|
@@ -352,6 +379,49 @@ export function createJudgeToolServer(ctx) {
|
|
|
352
379
|
return orchestrationServer([concludeTool(ctx)]);
|
|
353
380
|
}
|
|
354
381
|
|
|
382
|
+
// --- RequestForComment (agent-level coordination tool) ---
|
|
383
|
+
|
|
384
|
+
/** RequestForComment handler — queues RFC intent on `ctx.rfcs[]`. */
|
|
385
|
+
export function createRequestForCommentHandler(ctx) {
|
|
386
|
+
return async ({ channel, body, addressees }) => {
|
|
387
|
+
if (!ctx.rfcs) ctx.rfcs = [];
|
|
388
|
+
if (typeof ctx.rfcCounter !== "number") ctx.rfcCounter = 0;
|
|
389
|
+
const correlationId = `rfc_${++ctx.rfcCounter}`;
|
|
390
|
+
const addresseeList = addressees?.length ? addressees : [null];
|
|
391
|
+
for (const addressee of addresseeList) {
|
|
392
|
+
ctx.rfcs.push({
|
|
393
|
+
...(addressee && { addressee }),
|
|
394
|
+
body,
|
|
395
|
+
channel,
|
|
396
|
+
...(ctx.discussionId && { thread_id: ctx.discussionId }),
|
|
397
|
+
correlation_id: correlationId,
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
return jsonResult({ correlation_id: correlationId, channel });
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
/** Build the RequestForComment tool definition. */
|
|
405
|
+
function requestForCommentTool(ctx) {
|
|
406
|
+
return tool(
|
|
407
|
+
"RequestForComment",
|
|
408
|
+
"Open a new Discussion thread for long-horizon coordination on an open question. The bridge creates the thread; replies arrive asynchronously on future runs.",
|
|
409
|
+
{
|
|
410
|
+
channel: z.string(),
|
|
411
|
+
body: z.string(),
|
|
412
|
+
addressees: z.array(z.string()).optional(),
|
|
413
|
+
},
|
|
414
|
+
createRequestForCommentHandler(ctx),
|
|
415
|
+
);
|
|
416
|
+
}
|
|
417
|
+
|
|
355
418
|
// Re-export the building blocks discuss-tools.js needs to assemble its
|
|
356
|
-
// own lead tool surface (it has
|
|
357
|
-
export {
|
|
419
|
+
// own lead tool surface (it has two extra terminal tools).
|
|
420
|
+
export {
|
|
421
|
+
ADJOURN_DESC,
|
|
422
|
+
baseTools,
|
|
423
|
+
errorResult,
|
|
424
|
+
orchestrationServer,
|
|
425
|
+
RECESS_DESC,
|
|
426
|
+
requestForCommentTool,
|
|
427
|
+
};
|
package/src/profile-prompt.js
CHANGED
|
@@ -1,22 +1,28 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* System prompt composition for agent runners.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* Two helpers:
|
|
5
|
+
*
|
|
6
|
+
* - `composeProfilePrompt(name, opts)` — profile + `claude_code` preset.
|
|
7
|
+
* Used by agent participants that need the full Claude Code tool surface.
|
|
8
|
+
*
|
|
9
|
+
* - `composeLeadPrompt(opts)` — plain string, no preset. Used by lead
|
|
10
|
+
* roles (supervisor, facilitator, discuss lead) that should only see
|
|
11
|
+
* the orchestration instructions and optionally a profile body.
|
|
12
|
+
*
|
|
13
|
+
* - `composeSystemPrompt(opts)` — unified entry point. Delegates to one
|
|
14
|
+
* of the above based on `opts.role`.
|
|
10
15
|
*/
|
|
11
16
|
|
|
12
17
|
import { readFileSync } from "node:fs";
|
|
13
18
|
import { join } from "node:path";
|
|
14
19
|
|
|
15
20
|
/**
|
|
21
|
+
* Compose a `claude_code`-preset system prompt from a profile file.
|
|
16
22
|
* @param {string} name - Profile basename (no `.md` suffix)
|
|
17
23
|
* @param {object} opts
|
|
18
24
|
* @param {string} opts.profilesDir - Directory containing `<name>.md`
|
|
19
|
-
* @param {string} [opts.trailer] -
|
|
25
|
+
* @param {string} [opts.trailer] - Mode-specific trailer appended after a blank line
|
|
20
26
|
* @returns {{type: "preset", preset: "claude_code", append: string}}
|
|
21
27
|
*/
|
|
22
28
|
export function composeProfilePrompt(name, { profilesDir, trailer }) {
|
|
@@ -27,6 +33,45 @@ export function composeProfilePrompt(name, { profilesDir, trailer }) {
|
|
|
27
33
|
return { type: "preset", preset: "claude_code", append };
|
|
28
34
|
}
|
|
29
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Compose a plain-string system prompt for a lead role (no Claude Code preset).
|
|
38
|
+
* @param {object} opts
|
|
39
|
+
* @param {string} [opts.profile] - Profile basename (no `.md` suffix)
|
|
40
|
+
* @param {string} [opts.profilesDir] - Directory containing profile files
|
|
41
|
+
* @param {string} opts.trailer - Mode-specific orchestration instructions
|
|
42
|
+
* @returns {string}
|
|
43
|
+
*/
|
|
44
|
+
export function composeLeadPrompt({ profile, profilesDir, trailer }) {
|
|
45
|
+
if (!trailer) throw new Error("trailer is required");
|
|
46
|
+
if (!profile) return trailer;
|
|
47
|
+
const path = join(profilesDir, `${profile}.md`);
|
|
48
|
+
const raw = readFileSync(path, "utf8");
|
|
49
|
+
const body = stripFrontmatter(raw).trim();
|
|
50
|
+
return `${body}\n\n${trailer}`;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Unified entry point for composing system prompts.
|
|
55
|
+
*
|
|
56
|
+
* @param {object} opts
|
|
57
|
+
* @param {"lead"|"agent"} opts.role - `"lead"` produces a plain string;
|
|
58
|
+
* `"agent"` produces a `claude_code` preset object.
|
|
59
|
+
* @param {string} [opts.profile] - Profile basename
|
|
60
|
+
* @param {string} [opts.profilesDir]
|
|
61
|
+
* @param {string} opts.trailer - Mode-specific instructions
|
|
62
|
+
* @returns {string | {type: "preset", preset: "claude_code", append: string}}
|
|
63
|
+
*/
|
|
64
|
+
export function composeSystemPrompt({ role, profile, profilesDir, trailer }) {
|
|
65
|
+
if (!trailer) throw new Error("trailer is required");
|
|
66
|
+
if (role === "lead") {
|
|
67
|
+
return composeLeadPrompt({ profile, profilesDir, trailer });
|
|
68
|
+
}
|
|
69
|
+
if (profile) {
|
|
70
|
+
return composeProfilePrompt(profile, { profilesDir, trailer });
|
|
71
|
+
}
|
|
72
|
+
return { type: "preset", preset: "claude_code", append: trailer };
|
|
73
|
+
}
|
|
74
|
+
|
|
30
75
|
/**
|
|
31
76
|
* Strip a leading YAML frontmatter fence (`---\n…\n---\n`) from a markdown
|
|
32
77
|
* string. Returns the input unchanged when no frontmatter is present.
|
package/src/render/tool-hints.js
CHANGED
|
@@ -101,7 +101,8 @@ export function simplifyToolName(name) {
|
|
|
101
101
|
*
|
|
102
102
|
* Three branches, in priority order:
|
|
103
103
|
* - A built-in tool with an entry in `HINT_HANDLERS` → sanitized hint, no
|
|
104
|
-
* `{` / `"` from the input (
|
|
104
|
+
* `{` / `"` from the input (built-in tool hints stay free of JSON
|
|
105
|
+
* punctuation so readers see clean one-liners).
|
|
105
106
|
* - An MCP-prefixed tool (`mcp__*`) → full input rendered as compact
|
|
106
107
|
* single-line JSON; `{` and `"` intentionally appear so readers see
|
|
107
108
|
* the actual MCP payload.
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
* Turn renderer — maps a structured turn into formatted text lines.
|
|
3
3
|
*
|
|
4
4
|
* Shared by `TeeWriter.flushTurns()` (live stream) and
|
|
5
|
-
* `TraceCollector.toText()` (offline replay) so both emit identical output
|
|
6
|
-
* (spec 540).
|
|
5
|
+
* `TraceCollector.toText()` (offline replay) so both emit identical output.
|
|
7
6
|
*/
|
|
8
7
|
|
|
9
8
|
import {
|
package/src/supervisor.js
CHANGED
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
import { Writable } from "node:stream";
|
|
19
19
|
import { resolve } from "node:path";
|
|
20
20
|
import { createAgentRunner } from "./agent-runner.js";
|
|
21
|
-
import {
|
|
21
|
+
import { composeSystemPrompt } from "./profile-prompt.js";
|
|
22
22
|
import { createMessageBus } from "./message-bus.js";
|
|
23
23
|
import {
|
|
24
24
|
createOrchestrationContext,
|
|
@@ -27,23 +27,24 @@ import {
|
|
|
27
27
|
} from "./orchestration-toolkit.js";
|
|
28
28
|
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
29
29
|
|
|
30
|
-
/** System prompt
|
|
30
|
+
/** System prompt for the supervisor lead. L0 mechanics only per COALIGNED. */
|
|
31
31
|
export const SUPERVISOR_SYSTEM_PROMPT =
|
|
32
|
-
"You supervise one agent
|
|
33
|
-
"
|
|
34
|
-
"
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"If the agent goes off-track,
|
|
32
|
+
"You supervise one agent.\n" +
|
|
33
|
+
"You have no tools to perform work yourself.\n" +
|
|
34
|
+
"Use `Ask` to delegate work to the agent.\n" +
|
|
35
|
+
"`Ask` is async and returns {askIds:[N]} immediately.\n" +
|
|
36
|
+
"The reply arrives on your next turn as `[answer#N] agent: <text>` in your inbox.\n" +
|
|
37
|
+
"End your turn while Asks are pending. The system resumes you when an answer arrives.\n" +
|
|
38
|
+
"If the agent goes off-track, send a corrective `Ask`.\n" +
|
|
39
|
+
"End every session by calling `Conclude` with a verdict and summary.";
|
|
39
40
|
|
|
40
|
-
/** System prompt
|
|
41
|
+
/** System prompt for the supervised agent. L0 mechanics only per COALIGNED. */
|
|
41
42
|
export const AGENT_SYSTEM_PROMPT =
|
|
42
|
-
"A supervisor
|
|
43
|
-
"Each question
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"
|
|
43
|
+
"A supervisor directs your work.\n" +
|
|
44
|
+
"Each question arrives as `[ask#N] supervisor: <text>` in your inbox.\n" +
|
|
45
|
+
"Quote N as askId on your `Answer` to route the reply correctly.\n" +
|
|
46
|
+
"If the task already contains a completed response with no new human input after it, `Answer` that no further action is needed.\n" +
|
|
47
|
+
"Do not redo completed work.";
|
|
47
48
|
|
|
48
49
|
/**
|
|
49
50
|
* Supervise-mode wrapper around `OrchestrationLoop`. The lead is
|
|
@@ -148,15 +149,6 @@ export function createSupervisor({
|
|
|
148
149
|
if (!redactor) throw new Error("redactor is required");
|
|
149
150
|
const resolvedProfilesDir =
|
|
150
151
|
profilesDir ?? resolve(supervisorCwd, ".claude/agents");
|
|
151
|
-
const systemPromptFor = (profile, trailer) => {
|
|
152
|
-
if (!trailer) throw new Error("trailer is required");
|
|
153
|
-
return profile
|
|
154
|
-
? composeProfilePrompt(profile, {
|
|
155
|
-
profilesDir: resolvedProfilesDir,
|
|
156
|
-
trailer,
|
|
157
|
-
})
|
|
158
|
-
: { type: "preset", preset: "claude_code", append: trailer };
|
|
159
|
-
};
|
|
160
152
|
|
|
161
153
|
const ctx = createOrchestrationContext();
|
|
162
154
|
const messageBus = createMessageBus({
|
|
@@ -183,15 +175,25 @@ export function createSupervisor({
|
|
|
183
175
|
allowedTools,
|
|
184
176
|
onLine: (line) => supervisor.emitLine("agent", line),
|
|
185
177
|
settingSources: ["project"],
|
|
186
|
-
systemPrompt:
|
|
178
|
+
systemPrompt: composeSystemPrompt({
|
|
179
|
+
role: "agent",
|
|
180
|
+
profile: agentProfile,
|
|
181
|
+
profilesDir: resolvedProfilesDir,
|
|
182
|
+
trailer: AGENT_SYSTEM_PROMPT,
|
|
183
|
+
}),
|
|
187
184
|
mcpServers: { orchestration: agentServer, ...agentMcpServers },
|
|
188
185
|
redactor,
|
|
189
186
|
});
|
|
190
187
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
188
|
+
const defaultDisallowed = [
|
|
189
|
+
"Agent",
|
|
190
|
+
"Task",
|
|
191
|
+
"TaskOutput",
|
|
192
|
+
"TaskStop",
|
|
193
|
+
"Bash",
|
|
194
|
+
"Write",
|
|
195
|
+
"Edit",
|
|
196
|
+
];
|
|
195
197
|
const disallowedTools = supervisorDisallowedTools
|
|
196
198
|
? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
|
|
197
199
|
: defaultDisallowed;
|
|
@@ -202,18 +204,16 @@ export function createSupervisor({
|
|
|
202
204
|
output: devNull,
|
|
203
205
|
model: supervisorModel ?? model,
|
|
204
206
|
maxTurns: perRunBudget,
|
|
205
|
-
allowedTools: supervisorAllowedTools ?? [
|
|
206
|
-
"Bash",
|
|
207
|
-
"Read",
|
|
208
|
-
"Glob",
|
|
209
|
-
"Grep",
|
|
210
|
-
"Write",
|
|
211
|
-
"Edit",
|
|
212
|
-
],
|
|
207
|
+
allowedTools: supervisorAllowedTools ?? ["Read", "Glob", "Grep"],
|
|
213
208
|
disallowedTools,
|
|
214
209
|
onLine: (line) => supervisor.emitLine("supervisor", line),
|
|
215
210
|
settingSources: ["project"],
|
|
216
|
-
systemPrompt:
|
|
211
|
+
systemPrompt: composeSystemPrompt({
|
|
212
|
+
role: "lead",
|
|
213
|
+
profile: supervisorProfile,
|
|
214
|
+
profilesDir: resolvedProfilesDir,
|
|
215
|
+
trailer: SUPERVISOR_SYSTEM_PROMPT,
|
|
216
|
+
}),
|
|
217
217
|
mcpServers: { orchestration: supervisorServer },
|
|
218
218
|
redactor,
|
|
219
219
|
});
|
package/src/tee-writer.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Human text rendering is delegated to the pure modules under `./render/`
|
|
11
11
|
* so the live stream and the offline `TraceCollector.toText()` replay share
|
|
12
|
-
* one formatting path
|
|
12
|
+
* one formatting path. The NDJSON going to `fileStream` is
|
|
13
13
|
* untouched — only what reaches `textStream` changes.
|
|
14
14
|
*
|
|
15
15
|
* Follows OO+DI: constructor injection, factory function, tests bypass factory.
|
|
@@ -67,10 +67,9 @@ export class TeeWriter extends Writable {
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
// Emit the trailing `--- Result: ... ---` footer — the one summary line
|
|
70
|
-
// humans want
|
|
71
|
-
// appends, so the live stream and the offline replay stay in sync
|
|
72
|
-
//
|
|
73
|
-
// footer is gone in every mode.
|
|
70
|
+
// humans want. This is the same tail TraceCollector.toText()
|
|
71
|
+
// appends, so the live stream and the offline replay stay in sync.
|
|
72
|
+
// The superseded `--- Evaluation ... ---` footer is gone in every mode.
|
|
74
73
|
if (this.collector.result) {
|
|
75
74
|
const text = this.collector.toText();
|
|
76
75
|
const idx = text.lastIndexOf("\n---");
|
|
@@ -78,7 +77,7 @@ export class TeeWriter extends Writable {
|
|
|
78
77
|
// Slice past the leading `\n` — the previously-streamed body
|
|
79
78
|
// already ended with its own newline, so re-emitting `\n---` here
|
|
80
79
|
// would produce a blank line before the footer and desync from
|
|
81
|
-
// the offline replay
|
|
80
|
+
// the offline replay.
|
|
82
81
|
this.textStream.write(text.slice(idx + 1) + "\n");
|
|
83
82
|
}
|
|
84
83
|
}
|
|
@@ -107,7 +106,8 @@ export class TeeWriter extends Writable {
|
|
|
107
106
|
this.collector.addLine(line);
|
|
108
107
|
|
|
109
108
|
// Orchestrator lifecycle events are suppressed from the text stream
|
|
110
|
-
// entirely
|
|
109
|
+
// entirely — humans only want agent-visible content. They still
|
|
110
|
+
// reached fileStream above.
|
|
111
111
|
if (
|
|
112
112
|
parsed.source === "orchestrator" &&
|
|
113
113
|
isSuppressedOrchestratorEvent(parsed.event)
|
|
@@ -118,7 +118,7 @@ export class TeeWriter extends Writable {
|
|
|
118
118
|
return;
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
-
// Bare event (run mode
|
|
121
|
+
// Bare event (unwrapped run mode line or direct feed)
|
|
122
122
|
this.collector.addLine(line);
|
|
123
123
|
this.flushTurns();
|
|
124
124
|
}
|
package/src/trace-collector.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
*
|
|
7
7
|
* Human text rendering is delegated to the pure modules under `./render/`
|
|
8
8
|
* so the live `TeeWriter` stream and the offline `toText()` replay share
|
|
9
|
-
* one formatting path
|
|
9
|
+
* one formatting path.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
12
|
import { renderTurnLines } from "./render/turn-renderer.js";
|
|
@@ -293,7 +293,7 @@ export class TraceCollector {
|
|
|
293
293
|
}
|
|
294
294
|
|
|
295
295
|
/**
|
|
296
|
-
* Format the trailing result summary line
|
|
296
|
+
* Format the trailing result summary line. When an orchestrator
|
|
297
297
|
* summary is present (supervised / facilitated mode), the headline word is
|
|
298
298
|
* the supervisor's verdict ("success" / "failure") rather than the SDK's
|
|
299
299
|
* per-runner subtype, so the footer aligns with the CI exit code.
|