@forwardimpact/libeval 0.1.44 → 0.1.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +212 -13
- package/package.json +1 -1
- package/src/agent-runner.js +45 -181
- package/src/benchmark/runner.js +2 -2
- package/src/commands/supervise.js +3 -1
- package/src/discuss-tools.js +72 -140
- package/src/discusser.js +18 -35
- package/src/facilitator.js +26 -43
- package/src/index.js +0 -2
- package/src/judge.js +1 -1
- package/src/message-bus.js +27 -81
- package/src/orchestration-loop.js +176 -229
- package/src/orchestration-toolkit.js +272 -303
- package/src/orchestrator-helpers.js +9 -45
- package/src/redaction.js +2 -0
- package/src/render/orchestrator-filter.js +1 -9
- package/src/supervisor.js +79 -465
package/src/discuss-tools.js
CHANGED
|
@@ -1,32 +1,33 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* DiscussTools — tool servers
|
|
3
|
-
*
|
|
4
|
-
* `Conclude` is absent; instead `Adjourn` (terminal verdict) and `Recess`
|
|
5
|
-
* (suspend with a ResumeTrigger) end a run, and `RequestForComment` queues
|
|
6
|
-
* structured replies onto the trace for the bridge to deliver after the
|
|
7
|
-
* workflow run completes.
|
|
2
|
+
* DiscussTools — discuss-mode tool servers. The lead's surface extends the
|
|
3
|
+
* base set with three discuss-only terminal tools:
|
|
8
4
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
5
|
+
* - `RequestForComment` posts a fire-and-forget message to a human channel
|
|
6
|
+
* via the bridge; the reply arrives on a later workflow run.
|
|
7
|
+
* - `Recess` suspends the session with a resumption trigger.
|
|
8
|
+
* - `Adjourn` ends the discussion with a verdict.
|
|
9
|
+
*
|
|
10
|
+
* `Conclude` is absent — discuss mode ends via Adjourn or Recess. The
|
|
11
|
+
* agent surface is identical to the facilitated agent's: Ask / Answer /
|
|
12
|
+
* Announce / RollCall, with Ask defaulting to the lead.
|
|
11
13
|
*/
|
|
12
14
|
|
|
13
|
-
import {
|
|
15
|
+
import { tool } from "@anthropic-ai/claude-agent-sdk";
|
|
14
16
|
import { z } from "zod";
|
|
15
17
|
|
|
16
18
|
import {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
createRollCallHandler,
|
|
21
|
-
createRedirectHandler,
|
|
19
|
+
baseTools,
|
|
20
|
+
concludeSession,
|
|
21
|
+
orchestrationServer,
|
|
22
22
|
} from "./orchestration-toolkit.js";
|
|
23
23
|
|
|
24
24
|
/** System prompt appended for discuss-mode agent runners. */
|
|
25
25
|
export const DISCUSS_AGENT_SYSTEM_PROMPT =
|
|
26
26
|
"You participate in an asynchronous discussion. " +
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
27
|
+
"Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
|
|
28
|
+
"Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
|
|
29
|
+
"Ask sends a question to the lead or another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
|
|
30
|
+
"Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
|
|
30
31
|
"RollCall lists participants.";
|
|
31
32
|
|
|
32
33
|
const RESUME_TRIGGER_SCHEMA = z
|
|
@@ -37,128 +38,51 @@ const RESUME_TRIGGER_SCHEMA = z
|
|
|
37
38
|
})
|
|
38
39
|
.strict();
|
|
39
40
|
|
|
40
|
-
/**
|
|
41
|
-
* Lead tools for the discusser. The discuss-mode surface is Ask / Answer /
|
|
42
|
-
* Announce / Redirect / RollCall plus the discuss-only RequestForComment,
|
|
43
|
-
* Recess, and Adjourn. `Conclude` is intentionally absent — discuss mode
|
|
44
|
-
* ends via Adjourn or Recess, never Conclude. `RequestForComment` writes
|
|
45
|
-
* a structured reply onto `ctx.replies[]`; the discusser flushes those
|
|
46
|
-
* into the terminal summary event at end-of-run.
|
|
47
|
-
*
|
|
48
|
-
* @param {object} ctx - Orchestration context (must carry `replies` array)
|
|
49
|
-
* @returns {object} MCP server config (type: "sdk")
|
|
50
|
-
*/
|
|
41
|
+
/** Discuss-mode lead tool server. */
|
|
51
42
|
export function createDiscussLeadToolServer(ctx) {
|
|
52
|
-
return
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
),
|
|
73
|
-
|
|
74
|
-
"
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
),
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
"Interrupt a participant with replacement instructions.",
|
|
82
|
-
{ message: z.string(), to: z.string().optional() },
|
|
83
|
-
createRedirectHandler(ctx),
|
|
84
|
-
),
|
|
85
|
-
tool(
|
|
86
|
-
"RequestForComment",
|
|
87
|
-
"Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
|
|
88
|
-
{
|
|
89
|
-
channel: z.string(),
|
|
90
|
-
body: z.string(),
|
|
91
|
-
addressees: z.array(z.string()).optional(),
|
|
92
|
-
},
|
|
93
|
-
createRequestForCommentHandler(ctx),
|
|
94
|
-
),
|
|
95
|
-
tool(
|
|
96
|
-
"Recess",
|
|
97
|
-
"Suspend the run. The bridge re-dispatches the workflow when the trigger fires.",
|
|
98
|
-
{ reason: z.string(), trigger: RESUME_TRIGGER_SCHEMA },
|
|
99
|
-
createRecessHandler(ctx),
|
|
100
|
-
),
|
|
101
|
-
tool(
|
|
102
|
-
"Adjourn",
|
|
103
|
-
"End the discussion with a verdict and a summary.",
|
|
104
|
-
{
|
|
105
|
-
verdict: z.enum(["adjourned", "failed"]),
|
|
106
|
-
summary: z.string(),
|
|
107
|
-
outcome: z.string().optional(),
|
|
108
|
-
},
|
|
109
|
-
createAdjournHandler(ctx),
|
|
110
|
-
),
|
|
111
|
-
],
|
|
112
|
-
});
|
|
43
|
+
return orchestrationServer([
|
|
44
|
+
...baseTools(ctx, { from: "lead", defaultTo: undefined, broadcast: true }),
|
|
45
|
+
tool(
|
|
46
|
+
"RequestForComment",
|
|
47
|
+
"Post a fire-and-forget message to a channel via the bridge. Returns a correlation id; the reply arrives on a later workflow run.",
|
|
48
|
+
{
|
|
49
|
+
channel: z.string(),
|
|
50
|
+
body: z.string(),
|
|
51
|
+
addressees: z.array(z.string()).optional(),
|
|
52
|
+
},
|
|
53
|
+
createRequestForCommentHandler(ctx),
|
|
54
|
+
),
|
|
55
|
+
tool(
|
|
56
|
+
"Recess",
|
|
57
|
+
"Suspend the run. The bridge re-dispatches the workflow when the trigger fires.",
|
|
58
|
+
{ reason: z.string(), trigger: RESUME_TRIGGER_SCHEMA },
|
|
59
|
+
createRecessHandler(ctx),
|
|
60
|
+
),
|
|
61
|
+
tool(
|
|
62
|
+
"Adjourn",
|
|
63
|
+
"End the discussion with a verdict ('adjourned' / 'failed') and a summary.",
|
|
64
|
+
{
|
|
65
|
+
verdict: z.enum(["adjourned", "failed"]),
|
|
66
|
+
summary: z.string(),
|
|
67
|
+
outcome: z.string().optional(),
|
|
68
|
+
},
|
|
69
|
+
createAdjournHandler(ctx),
|
|
70
|
+
),
|
|
71
|
+
]);
|
|
113
72
|
}
|
|
114
73
|
|
|
115
|
-
/**
|
|
116
|
-
* Discuss-mode agent tools: Ask / Answer / Announce / RollCall. Surface is
|
|
117
|
-
* defined here (not borrowed from facilitate mode) so the two modes stay
|
|
118
|
-
* structurally independent.
|
|
119
|
-
*
|
|
120
|
-
* @param {object} ctx - Orchestration context
|
|
121
|
-
* @param {{from: string}} opts - Agent name (canonical)
|
|
122
|
-
* @returns {object} MCP server config (type: "sdk")
|
|
123
|
-
*/
|
|
74
|
+
/** Discuss-mode agent tool server. */
|
|
124
75
|
export function createDiscussAgentToolServer(ctx, { from }) {
|
|
125
|
-
return
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
tool(
|
|
129
|
-
"Ask",
|
|
130
|
-
"Send a question to another participant. Omit 'to' to ask the lead.",
|
|
131
|
-
{ question: z.string(), to: z.string().optional() },
|
|
132
|
-
createAskHandler(ctx, { from, defaultTo: "lead" }),
|
|
133
|
-
),
|
|
134
|
-
tool(
|
|
135
|
-
"Answer",
|
|
136
|
-
"Reply to an ask addressed to you.",
|
|
137
|
-
{ message: z.string() },
|
|
138
|
-
createAnswerHandler(ctx, { from }),
|
|
139
|
-
),
|
|
140
|
-
tool(
|
|
141
|
-
"Announce",
|
|
142
|
-
"Broadcast a message with no reply expected.",
|
|
143
|
-
{ message: z.string() },
|
|
144
|
-
createAnnounceHandler(ctx, { from }),
|
|
145
|
-
),
|
|
146
|
-
tool(
|
|
147
|
-
"RollCall",
|
|
148
|
-
"List all participants in the session.",
|
|
149
|
-
{},
|
|
150
|
-
createRollCallHandler(ctx),
|
|
151
|
-
),
|
|
152
|
-
],
|
|
153
|
-
});
|
|
76
|
+
return orchestrationServer(
|
|
77
|
+
baseTools(ctx, { from, defaultTo: "lead", broadcast: true }),
|
|
78
|
+
);
|
|
154
79
|
}
|
|
155
80
|
|
|
156
|
-
/**
|
|
81
|
+
/** RequestForComment handler — queues structured replies on `ctx.replies[]`. */
|
|
157
82
|
export function createRequestForCommentHandler(ctx) {
|
|
158
83
|
return async ({ channel, body, addressees }) => {
|
|
159
84
|
const correlationId = `rfc_${++ctx.rfcCounter}`;
|
|
160
|
-
const addresseeList =
|
|
161
|
-
Array.isArray(addressees) && addressees.length > 0 ? addressees : [null];
|
|
85
|
+
const addresseeList = addressees?.length ? addressees : [null];
|
|
162
86
|
for (const addressee of addresseeList) {
|
|
163
87
|
ctx.replies.push({
|
|
164
88
|
...(addressee && { addressee }),
|
|
@@ -178,26 +102,34 @@ export function createRequestForCommentHandler(ctx) {
|
|
|
178
102
|
};
|
|
179
103
|
}
|
|
180
104
|
|
|
181
|
-
/**
|
|
105
|
+
/**
|
|
106
|
+
* Recess handler — ends the run with a structured pause + resumption
|
|
107
|
+
* trigger; cancels any open Asks so askers see a synthetic null answer.
|
|
108
|
+
* `concluded` flips true (same as Adjourn); the `recessed` verdict
|
|
109
|
+
* distinguishes them, and `recessTrigger` carries the resume shape for
|
|
110
|
+
* the bridge.
|
|
111
|
+
*/
|
|
182
112
|
export function createRecessHandler(ctx) {
|
|
183
113
|
return async ({ reason, trigger }) => {
|
|
184
|
-
ctx.recessed = true;
|
|
185
114
|
ctx.recessTrigger = trigger;
|
|
186
|
-
ctx
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
115
|
+
concludeSession(ctx, {
|
|
116
|
+
verdict: "recessed",
|
|
117
|
+
summary: reason,
|
|
118
|
+
reason: "session recessed",
|
|
119
|
+
});
|
|
190
120
|
return { content: [{ type: "text", text: "Recess queued." }] };
|
|
191
121
|
};
|
|
192
122
|
}
|
|
193
123
|
|
|
194
|
-
/**
|
|
124
|
+
/** Adjourn handler — ends the discussion with a verdict. */
|
|
195
125
|
export function createAdjournHandler(ctx) {
|
|
196
126
|
return async ({ verdict, summary, outcome }) => {
|
|
197
|
-
ctx.concluded = true;
|
|
198
|
-
ctx.verdict = verdict;
|
|
199
|
-
ctx.summary = summary;
|
|
200
127
|
if (outcome !== undefined) ctx.outcome = outcome;
|
|
128
|
+
concludeSession(ctx, {
|
|
129
|
+
verdict,
|
|
130
|
+
summary,
|
|
131
|
+
reason: "session adjourned",
|
|
132
|
+
});
|
|
201
133
|
return { content: [{ type: "text", text: "Session adjourned." }] };
|
|
202
134
|
};
|
|
203
135
|
}
|
package/src/discusser.js
CHANGED
|
@@ -27,14 +27,15 @@ import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
|
27
27
|
/** System prompt appended for the lead (Chair) runner in discuss mode. */
|
|
28
28
|
export const DISCUSS_SYSTEM_PROMPT =
|
|
29
29
|
"You lead an asynchronous discussion across multiple participants and a human channel. " +
|
|
30
|
-
"Ask
|
|
30
|
+
"Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you when the next message lands. " +
|
|
31
|
+
"Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
|
|
31
32
|
"Announce delivers a message with no reply obligation. " +
|
|
32
|
-
"Redirect interrupts an in-progress participant with replacement instructions. " +
|
|
33
33
|
"RollCall returns the participant roster. " +
|
|
34
34
|
"RequestForComment posts a message to the human thread via the bridge. Every reply you want the human to see MUST go through RequestForComment — the bridge delivers only queued replies, not your text output. " +
|
|
35
|
-
"Recess suspends the run with a resumption trigger (responses / elapsed / either). " +
|
|
35
|
+
"Recess suspends the run with a resumption trigger (responses / elapsed / either); any open Asks get a synthetic '[no answer: session concluded]' on the asker's queue so nothing dangles. " +
|
|
36
36
|
"Adjourn ends the discussion with a verdict ('adjourned' / 'failed') and a summary. " +
|
|
37
|
-
"
|
|
37
|
+
"Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
|
|
38
|
+
"You MUST call RequestForComment with your response before calling Adjourn. You MUST end every run by calling Adjourn or Recess — never end a turn with only text *after* every Ask round has resolved.";
|
|
38
39
|
|
|
39
40
|
/**
|
|
40
41
|
* Augment a base orchestration context with discuss-mode fields.
|
|
@@ -44,34 +45,13 @@ export const DISCUSS_SYSTEM_PROMPT =
|
|
|
44
45
|
*/
|
|
45
46
|
export function augmentContextForDiscuss(ctx, discussionId) {
|
|
46
47
|
ctx.discussionId = discussionId;
|
|
47
|
-
ctx.recessed = false;
|
|
48
48
|
ctx.recessTrigger = null;
|
|
49
|
-
ctx.recessReason = null;
|
|
50
49
|
ctx.replies = [];
|
|
51
50
|
ctx.rfcCounter = 0;
|
|
52
51
|
ctx.outcome = null;
|
|
53
52
|
return ctx;
|
|
54
53
|
}
|
|
55
54
|
|
|
56
|
-
/**
|
|
57
|
-
* Round-trip-safe representation of `ctx.pendingAsks` (a `Map`).
|
|
58
|
-
* @param {Map<string, object>} map
|
|
59
|
-
* @returns {object}
|
|
60
|
-
*/
|
|
61
|
-
export function pendingAsksToPlain(map) {
|
|
62
|
-
return Object.fromEntries(map);
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Restore a plain object back into a `Map<string, …>`.
|
|
67
|
-
* @param {object|null|undefined} plain
|
|
68
|
-
* @returns {Map<string, object>}
|
|
69
|
-
*/
|
|
70
|
-
export function pendingAsksFromPlain(plain) {
|
|
71
|
-
if (!plain) return new Map();
|
|
72
|
-
return new Map(Object.entries(plain));
|
|
73
|
-
}
|
|
74
|
-
|
|
75
55
|
const devNull = new Writable({
|
|
76
56
|
write(_chunk, _enc, cb) {
|
|
77
57
|
cb();
|
|
@@ -89,9 +69,9 @@ export class Discusser {
|
|
|
89
69
|
* @param {OrchestrationLoop} deps.loop
|
|
90
70
|
* @param {object} deps.ctx
|
|
91
71
|
* @param {import("stream").Writable} deps.output
|
|
72
|
+
* @param {object} deps.redactor
|
|
92
73
|
* @param {string|null} [deps.discussionId]
|
|
93
74
|
* @param {SequenceCounter} [deps.counter]
|
|
94
|
-
* @param {object} [deps.redactor]
|
|
95
75
|
*/
|
|
96
76
|
constructor({ loop, ctx, output, discussionId, counter, redactor }) {
|
|
97
77
|
if (!loop) throw new Error("loop is required");
|
|
@@ -123,7 +103,7 @@ export class Discusser {
|
|
|
123
103
|
await this.loop.run(task);
|
|
124
104
|
|
|
125
105
|
const verdict = this.ctx.verdict ?? "failed";
|
|
126
|
-
const success = verdict === "adjourned"
|
|
106
|
+
const success = verdict === "adjourned";
|
|
127
107
|
this.#emitDiscussSummary({
|
|
128
108
|
success,
|
|
129
109
|
verdict,
|
|
@@ -163,7 +143,6 @@ export class Discusser {
|
|
|
163
143
|
replies: this.ctx.replies,
|
|
164
144
|
...(this.ctx.recessTrigger && { trigger: this.ctx.recessTrigger }),
|
|
165
145
|
...(this.discussionId && { discussion_id: this.discussionId }),
|
|
166
|
-
pending_asks: pendingAsksToPlain(this.ctx.pendingAsks),
|
|
167
146
|
};
|
|
168
147
|
this.output.write(
|
|
169
148
|
JSON.stringify(
|
|
@@ -182,6 +161,12 @@ export class Discusser {
|
|
|
182
161
|
* the `OrchestrationLoop` (with `leadName: "lead"` and discuss-mode
|
|
183
162
|
* protocol tagging) and the wrapping `Discusser`.
|
|
184
163
|
*
|
|
164
|
+
* Resume semantics: Recess ends the run, cancels any open Asks via
|
|
165
|
+
* `cancelPendingAsks`, and emits a synthetic null answer per cancelled
|
|
166
|
+
* ask so nothing dangles in the trace. The bridge later re-dispatches
|
|
167
|
+
* the workflow against a fresh context; the human reads the trail of
|
|
168
|
+
* events to decide what to re-ask.
|
|
169
|
+
*
|
|
185
170
|
* @param {object} deps
|
|
186
171
|
* @param {string} [deps.leadProfile]
|
|
187
172
|
* @param {string} [deps.leadModel]
|
|
@@ -225,12 +210,11 @@ export function createDiscusser({
|
|
|
225
210
|
discussionId ?? null,
|
|
226
211
|
);
|
|
227
212
|
|
|
228
|
-
// Hydrate resume context —
|
|
229
|
-
//
|
|
230
|
-
//
|
|
213
|
+
// Hydrate resume context — participants, replies, counters. `pendingAsks`
|
|
214
|
+
// is intentionally not restored: Recess cancelled every in-flight Ask
|
|
215
|
+
// with a synthetic null answer, so there's nothing meaningful to carry
|
|
216
|
+
// forward.
|
|
231
217
|
if (resumeContext) {
|
|
232
|
-
if (resumeContext.pendingAsks)
|
|
233
|
-
ctx.pendingAsks = pendingAsksFromPlain(resumeContext.pendingAsks);
|
|
234
218
|
if (Array.isArray(resumeContext.participants))
|
|
235
219
|
ctx.participants = resumeContext.participants;
|
|
236
220
|
if (Array.isArray(resumeContext.replies))
|
|
@@ -297,7 +281,7 @@ export function createDiscusser({
|
|
|
297
281
|
query,
|
|
298
282
|
output: devNull,
|
|
299
283
|
model: leadModel ?? "claude-opus-4-7[1m]",
|
|
300
|
-
maxTurns: maxTurns ??
|
|
284
|
+
maxTurns: maxTurns ?? 80,
|
|
301
285
|
allowedTools: ["Bash", "Read", "Glob", "Grep", "Write", "Edit"],
|
|
302
286
|
disallowedTools: defaultDisallowed,
|
|
303
287
|
onLine: (line) => discusser.loop.emitLine("lead", line),
|
|
@@ -314,7 +298,6 @@ export function createDiscusser({
|
|
|
314
298
|
output,
|
|
315
299
|
leadName: "lead",
|
|
316
300
|
mode: "discussion",
|
|
317
|
-
maxTurns: maxTurns ?? 40,
|
|
318
301
|
ctx,
|
|
319
302
|
taskAmend,
|
|
320
303
|
redactor,
|
package/src/facilitator.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Facilitator — facilitate-mode wrapper around `OrchestrationLoop`. The
|
|
3
|
-
* lead participant is named "facilitator" and
|
|
4
|
-
*
|
|
3
|
+
* lead participant is named "facilitator" and ends the session via the
|
|
4
|
+
* `Conclude` tool. The within-run turn loop lives in
|
|
5
5
|
* `orchestration-loop.js`; this file owns only the facilitate-mode
|
|
6
6
|
* specifics (lead role name, system prompts, tool wiring, factory).
|
|
7
7
|
*/
|
|
@@ -16,34 +16,33 @@ import {
|
|
|
16
16
|
createFacilitatorToolServer,
|
|
17
17
|
createFacilitatedAgentToolServer,
|
|
18
18
|
} from "./orchestration-toolkit.js";
|
|
19
|
-
import { createAsyncQueue } from "./orchestrator-helpers.js";
|
|
20
19
|
import { OrchestrationLoop } from "./orchestration-loop.js";
|
|
21
20
|
|
|
22
21
|
/** System prompt appended for the facilitator runner. */
|
|
23
22
|
export const FACILITATOR_SYSTEM_PROMPT =
|
|
24
23
|
"You coordinate multiple participants via these tools: " +
|
|
25
|
-
"Ask
|
|
24
|
+
"Ask sends a question and returns immediately with {askIds:[N,…]}. The reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox — between turns you can plan, reflect, or send more Asks while participants work in parallel. End your turn with text after you've asked everything you intend to; the orchestrator wakes you again as soon as a reply (or any message) lands. " +
|
|
25
|
+
"Answer replies to an ask a participant addressed to you (you'll see it tagged `[ask#N] <participant>: …` in your inbox). Quote askId from the [ask#N] tag; omit it and the handler auto-picks the only pending ask or routes your message as an Announce. " +
|
|
26
26
|
"Announce delivers a message with no reply obligation. " +
|
|
27
|
-
"Redirect interrupts an in-progress participant with replacement instructions. " +
|
|
28
27
|
"RollCall returns the participant roster. " +
|
|
29
28
|
"Conclude ends the session with a verdict ('success' or 'failure') and a summary. " +
|
|
30
|
-
"Ask
|
|
31
|
-
"You MUST
|
|
29
|
+
"Multiple Ask / Announce calls in one assistant turn dispatch in parallel — issue them as parallel tool_use blocks rather than sending the same question both broadcast and individually. " +
|
|
30
|
+
"You MUST end every session with Conclude — never end a turn with only text *after* every Ask round has resolved. " +
|
|
32
31
|
"If you can answer the task yourself, still call Conclude with verdict='success' and the answer as the summary.";
|
|
33
32
|
|
|
34
33
|
/** System prompt appended for facilitated agent runners. */
|
|
35
34
|
export const FACILITATED_AGENT_SYSTEM_PROMPT =
|
|
36
35
|
"You participate in a coordinated session. " +
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
36
|
+
"Each question you receive carries an [ask#N] header — quote that N back as the askId field on Answer so the reply pairs with the right question. " +
|
|
37
|
+
"Answer replies to an ask addressed to you. askId is optional: omit it and the handler auto-picks if exactly one ask is owed to you, otherwise it routes your message as an Announce. " +
|
|
38
|
+
"Ask sends a question to another participant and returns immediately with {askIds:[N]}; the reply arrives on a later turn as `[answer#N] <participant>: <text>` in your inbox. " +
|
|
39
|
+
"Announce broadcasts a message to every other participant — use this for unsolicited remarks or to reply to an Announce. " +
|
|
40
40
|
"RollCall lists participants.";
|
|
41
41
|
|
|
42
42
|
/**
|
|
43
|
-
* Facilitate-mode wrapper around `OrchestrationLoop`. The lead
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* existing callers rely on.
|
|
43
|
+
* Facilitate-mode wrapper around `OrchestrationLoop`. The lead is named
|
|
44
|
+
* `"facilitator"`. `facilitatorRunner` getter is a readability shim for
|
|
45
|
+
* tests that read the runner directly.
|
|
47
46
|
*/
|
|
48
47
|
export class Facilitator extends OrchestrationLoop {
|
|
49
48
|
/**
|
|
@@ -52,11 +51,9 @@ export class Facilitator extends OrchestrationLoop {
|
|
|
52
51
|
* @param {Array<{name: string, role: string, runner: import("./agent-runner.js").AgentRunner}>} deps.agents
|
|
53
52
|
* @param {import("./message-bus.js").MessageBus} deps.messageBus
|
|
54
53
|
* @param {import("stream").Writable} deps.output
|
|
55
|
-
* @param {
|
|
56
|
-
* @param {object} [deps.ctx]
|
|
57
|
-
* @param {object} [deps.eventQueue]
|
|
58
|
-
* @param {string} [deps.taskAmend]
|
|
54
|
+
* @param {object} deps.ctx
|
|
59
55
|
* @param {object} deps.redactor
|
|
56
|
+
* @param {string} [deps.taskAmend]
|
|
60
57
|
*/
|
|
61
58
|
constructor(deps) {
|
|
62
59
|
super({
|
|
@@ -67,20 +64,10 @@ export class Facilitator extends OrchestrationLoop {
|
|
|
67
64
|
});
|
|
68
65
|
}
|
|
69
66
|
|
|
70
|
-
/**
|
|
67
|
+
/** Readability shim — exposes the lead runner under its mode-specific name. */
|
|
71
68
|
get facilitatorRunner() {
|
|
72
69
|
return this.leadRunner;
|
|
73
70
|
}
|
|
74
|
-
|
|
75
|
-
/** @returns {number} */
|
|
76
|
-
get facilitatorTurns() {
|
|
77
|
-
return this.leadTurns;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/** @param {number} v */
|
|
81
|
-
set facilitatorTurns(v) {
|
|
82
|
-
this.leadTurns = v;
|
|
83
|
-
}
|
|
84
71
|
}
|
|
85
72
|
|
|
86
73
|
const devNull = new Writable({
|
|
@@ -96,15 +83,15 @@ const devNull = new Writable({
|
|
|
96
83
|
* @param {Array<{name: string, role: string, cwd?: string, maxTurns?: number, allowedTools?: string[], agentProfile?: string, systemPromptAmend?: string}>} deps.agentConfigs
|
|
97
84
|
* @param {function} deps.query
|
|
98
85
|
* @param {import("stream").Writable} deps.output
|
|
99
|
-
* @param {string} [deps.model]
|
|
100
|
-
* @param {string} [deps.agentModel]
|
|
101
|
-
* @param {string} [deps.facilitatorModel]
|
|
102
|
-
* @param {number} [deps.maxTurns] -
|
|
103
|
-
* @param {string[]} [deps.facilitatorAllowedTools]
|
|
104
|
-
* @param {string[]} [deps.facilitatorDisallowedTools]
|
|
105
|
-
* @param {string} [deps.facilitatorProfile]
|
|
106
|
-
* @param {string} [deps.profilesDir]
|
|
107
|
-
* @param {string} [deps.taskAmend]
|
|
86
|
+
* @param {string} [deps.model]
|
|
87
|
+
* @param {string} [deps.agentModel]
|
|
88
|
+
* @param {string} [deps.facilitatorModel]
|
|
89
|
+
* @param {number} [deps.maxTurns] - Per-SDK-call turn budget for the facilitator runner (default 80). Each agent's budget is taken from `config.maxTurns` (default 50). The lead is resumed once per inbox-drain round, so this caps the size of one such round, not the whole session — `OrchestrationLoop.maxLeadTurns` bounds session length.
|
|
90
|
+
* @param {string[]} [deps.facilitatorAllowedTools]
|
|
91
|
+
* @param {string[]} [deps.facilitatorDisallowedTools]
|
|
92
|
+
* @param {string} [deps.facilitatorProfile]
|
|
93
|
+
* @param {string} [deps.profilesDir]
|
|
94
|
+
* @param {string} [deps.taskAmend]
|
|
108
95
|
* @returns {Facilitator}
|
|
109
96
|
*/
|
|
110
97
|
export function createFacilitator({
|
|
@@ -147,8 +134,6 @@ export function createFacilitator({
|
|
|
147
134
|
|
|
148
135
|
let facilitator;
|
|
149
136
|
|
|
150
|
-
const eventQueue = createAsyncQueue();
|
|
151
|
-
|
|
152
137
|
const facilitatorServer = createFacilitatorToolServer(ctx);
|
|
153
138
|
|
|
154
139
|
const agents = agentConfigs.map((config) => {
|
|
@@ -190,7 +175,7 @@ export function createFacilitator({
|
|
|
190
175
|
query,
|
|
191
176
|
output: devNull,
|
|
192
177
|
model: facilitatorModel ?? model,
|
|
193
|
-
maxTurns: maxTurns ??
|
|
178
|
+
maxTurns: maxTurns ?? 80,
|
|
194
179
|
allowedTools: facilitatorAllowedTools ?? [
|
|
195
180
|
"Bash",
|
|
196
181
|
"Read",
|
|
@@ -215,9 +200,7 @@ export function createFacilitator({
|
|
|
215
200
|
agents,
|
|
216
201
|
messageBus,
|
|
217
202
|
output,
|
|
218
|
-
maxTurns,
|
|
219
203
|
ctx,
|
|
220
|
-
eventQueue,
|
|
221
204
|
taskAmend,
|
|
222
205
|
redactor,
|
|
223
206
|
});
|
package/src/index.js
CHANGED
package/src/judge.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Judge — one agent session that inspects a completed agent's work and emits
|
|
3
3
|
* a verdict via the orchestration `Conclude` tool. Parallel concept to
|
|
4
4
|
* `Supervisor` and `Facilitator`, but post-hoc and solo: no peer agents,
|
|
5
|
-
* no message bus, no
|
|
5
|
+
* no message bus, no orchestration loop. The judge reads the task, optionally
|
|
6
6
|
* inspects the working directory and trace via read-only tools, and calls
|
|
7
7
|
* Conclude exactly once.
|
|
8
8
|
*
|