@forwardimpact/libeval 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,12 @@
2
2
  * OrchestrationToolkit — tool schemas, per-role tool sets, and handler
3
3
  * factories for orchestration between supervisors, facilitators, and agents.
4
4
  *
5
+ * The tool surface is Ask / Answer / Announce + Redirect / Conclude / RollCall,
6
+ * shared across facilitation and supervision. Ask registers a pending-ask in
7
+ * the context; Answer clears it and routes the reply. The orchestrator's
8
+ * turn-complete guard (see checkPendingAsk) holds the request-response
9
+ * contract at the runtime instead of the prompt layer.
10
+ *
5
11
  * Handlers communicate via a shared context object. The orchestrator reads
6
12
  * context at natural checkpoints (after resume(), after onBatch).
7
13
  */
@@ -20,6 +26,12 @@ export function createOrchestrationContext() {
20
26
  redirect: null,
21
27
  participants: [],
22
28
  messageBus: null,
29
+ // Map<addresseeName, {askId, askerName, question, reminded}>
30
+ // Always keyed by an addressee name. Broadcast asks write one entry
31
+ // per named participant, so every pending entry has a concrete
32
+ // addressee and the match rule is uniform.
33
+ pendingAsks: new Map(),
34
+ askIdCounter: 0,
23
35
  };
24
36
  }
25
37
 
@@ -40,46 +52,150 @@ export function createRedirectHandler(ctx) {
40
52
  };
41
53
  }
42
54
 
43
- export function createAskHandler(ctx, { onAsk }) {
44
- return async ({ question }) => {
45
- try {
46
- const answer = await onAsk(question);
47
- return { content: [{ type: "text", text: answer }] };
48
- } catch (err) {
55
+ export function createRollCallHandler(ctx) {
56
+ return async () => {
57
+ return {
58
+ content: [{ type: "text", text: JSON.stringify(ctx.participants) }],
59
+ };
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Create an Ask handler for a given caller. Ask registers a pending-ask
65
+ * in ctx and routes the question to the addressee via the message bus.
66
+ *
67
+ * @param {object} ctx
68
+ * @param {object} opts
69
+ * @param {string} opts.from - Canonical name of the asker.
70
+ * @param {string|undefined} opts.defaultTo - Default addressee when the
71
+ * caller omits `to`. Use `undefined` to signal "broadcast across all
72
+ * non-asker participants" (facilitator-only).
73
+ */
74
+ export function createAskHandler(ctx, { from, defaultTo }) {
75
+ return async ({ question, to }) => {
76
+ const explicitTo = typeof to === "string" && to.length > 0 ? to : null;
77
+ const effectiveTo = explicitTo ?? defaultTo ?? null;
78
+
79
+ const addressees = effectiveTo
80
+ ? [effectiveTo]
81
+ : ctx.participants.map((p) => p.name).filter((name) => name !== from);
82
+
83
+ if (addressees.length === 0) {
49
84
  return {
50
- content: [{ type: "text", text: `Error: ${err.message}` }],
85
+ content: [{ type: "text", text: "No addressee for Ask." }],
51
86
  isError: true,
52
87
  };
53
88
  }
89
+
90
+ for (const addressee of addressees) {
91
+ const askId = ++ctx.askIdCounter;
92
+ ctx.pendingAsks.set(addressee, {
93
+ askId,
94
+ askerName: from,
95
+ question,
96
+ reminded: false,
97
+ });
98
+ ctx.messageBus.ask(from, addressee, question, askId);
99
+ }
100
+
101
+ return { content: [{ type: "text", text: "Ask delivered." }] };
54
102
  };
55
103
  }
56
104
 
57
- export function createRollCallHandler(ctx) {
58
- return async () => {
59
- return {
60
- content: [{ type: "text", text: JSON.stringify(ctx.participants) }],
61
- };
105
+ /**
106
+ * Create an Answer handler for a given caller. Answer clears the caller's
107
+ * pending-ask entry (keyed by the caller's canonical name) and routes the
108
+ * reply to the original asker via the message bus.
109
+ *
110
+ * @param {object} ctx
111
+ * @param {object} opts
112
+ * @param {string} opts.from - Canonical name of the answerer.
113
+ */
114
+ export function createAnswerHandler(ctx, { from }) {
115
+ return async ({ message }) => {
116
+ const entry = ctx.pendingAsks.get(from);
117
+ if (!entry) {
118
+ return {
119
+ content: [{ type: "text", text: "No pending ask to answer." }],
120
+ isError: true,
121
+ };
122
+ }
123
+ ctx.pendingAsks.delete(from);
124
+ ctx.messageBus.answer(from, entry.askerName, message, entry.askId);
125
+ return { content: [{ type: "text", text: "Answer delivered." }] };
62
126
  };
63
127
  }
64
128
 
65
- export function createShareHandler(ctx, { from }) {
129
+ /**
130
+ * Create an Announce handler. Announce broadcasts a message to every
131
+ * participant except the sender; it never touches pendingAsks.
132
+ *
133
+ * @param {object} ctx
134
+ * @param {object} opts
135
+ * @param {string} opts.from
136
+ */
137
+ export function createAnnounceHandler(ctx, { from }) {
66
138
  return async ({ message }) => {
67
- ctx.messageBus.share(from, message);
68
- return { content: [{ type: "text", text: "Message shared." }] };
139
+ ctx.messageBus.announce(from, message);
140
+ return { content: [{ type: "text", text: "Announcement delivered." }] };
69
141
  };
70
142
  }
71
143
 
72
- export function createTellHandler(ctx, { from }) {
73
- return async ({ message, to }) => {
74
- ctx.messageBus.tell(from, to, message);
75
- return { content: [{ type: "text", text: "Message sent." }] };
76
- };
144
+ /**
145
+ * Shared turn-complete guard. Consulted by Facilitator#runAgent and
146
+ * Supervisor#runAgentTurn / #endOfTurnReview before finalising an agent's
147
+ * turn. Returns "advance" when no pending-ask is owed by `addresseeName`;
148
+ * "recheck" after queueing a single synthetic reminder; "advance" after
149
+ * emitting a protocol_violation event and injecting a synthetic null
150
+ * answer so the original asker unblocks.
151
+ *
152
+ * @param {object} args
153
+ * @param {object} args.ctx
154
+ * @param {object} args.messageBus
155
+ * @param {string} args.addresseeName
156
+ * @param {"facilitated"|"supervised"} args.mode
157
+ * @param {(event: object) => void} args.emitViolation
158
+ * @returns {"advance"|"recheck"}
159
+ */
160
+ export function checkPendingAsk({
161
+ ctx,
162
+ messageBus,
163
+ addresseeName,
164
+ mode,
165
+ emitViolation,
166
+ }) {
167
+ const entry = ctx.pendingAsks.get(addresseeName);
168
+ if (!entry) return "advance";
169
+
170
+ if (!entry.reminded) {
171
+ entry.reminded = true;
172
+ messageBus.synthetic(
173
+ addresseeName,
174
+ `You have an unanswered ask from ${entry.askerName}. Reply via Answer.`,
175
+ );
176
+ return "recheck";
177
+ }
178
+
179
+ emitViolation({
180
+ type: "protocol_violation",
181
+ agent: addresseeName,
182
+ askId: entry.askId,
183
+ mode,
184
+ });
185
+ messageBus.answer(
186
+ "@orchestrator",
187
+ entry.askerName,
188
+ `[no answer: ${addresseeName} did not reply to ask ${entry.askId}]`,
189
+ entry.askId,
190
+ );
191
+ ctx.pendingAsks.delete(addresseeName);
192
+ return "advance";
77
193
  }
78
194
 
79
195
  // --- Per-role MCP server factories ---
80
196
 
81
197
  /**
82
- * Supervisor tools: Conclude + Redirect.
198
+ * Supervisor tools: Ask + Announce + Conclude + Redirect + RollCall.
83
199
  * @param {object} ctx - Orchestration context
84
200
  * @returns {object} MCP server config (type: "sdk")
85
201
  */
@@ -87,46 +203,79 @@ export function createSupervisorToolServer(ctx) {
87
203
  return createSdkMcpServer({
88
204
  name: "orchestration",
89
205
  tools: [
206
+ tool(
207
+ "Ask",
208
+ "Send a question to the agent. The reply arrives via Answer.",
209
+ { question: z.string() },
210
+ createAskHandler(ctx, { from: "supervisor", defaultTo: "agent" }),
211
+ ),
212
+ tool(
213
+ "Announce",
214
+ "Broadcast a message with no reply expected.",
215
+ { message: z.string() },
216
+ createAnnounceHandler(ctx, { from: "supervisor" }),
217
+ ),
90
218
  tool(
91
219
  "Conclude",
92
- "Signal that the evaluation is done. Provide a summary.",
220
+ "End the session with a summary.",
93
221
  { summary: z.string() },
94
222
  createConcludeHandler(ctx),
95
223
  ),
96
224
  tool(
97
225
  "Redirect",
98
- "Interrupt the agent with a corrective message.",
226
+ "Interrupt the agent with replacement instructions.",
99
227
  { message: z.string(), to: z.string().optional() },
100
228
  createRedirectHandler(ctx),
101
229
  ),
230
+ tool(
231
+ "RollCall",
232
+ "List all participants in the session.",
233
+ {},
234
+ createRollCallHandler(ctx),
235
+ ),
102
236
  ],
103
237
  });
104
238
  }
105
239
 
106
240
  /**
107
- * Supervised agent tools: Ask.
241
+ * Supervised agent tools: Ask + Answer + Announce + RollCall.
108
242
  * @param {object} ctx - Orchestration context
109
- * @param {object} opts
110
- * @param {function} opts.onAsk - Async callback: (question) → answer string
111
243
  * @returns {object} MCP server config (type: "sdk")
112
244
  */
113
- export function createSupervisedAgentToolServer(ctx, { onAsk }) {
245
+ export function createSupervisedAgentToolServer(ctx) {
114
246
  return createSdkMcpServer({
115
247
  name: "orchestration",
116
248
  tools: [
117
249
  tool(
118
250
  "Ask",
119
- "Ask the supervisor a clarifying question. Blocks until answered.",
251
+ "Send a question to the supervisor. The reply arrives via Answer.",
120
252
  { question: z.string() },
121
- createAskHandler(ctx, { onAsk }),
253
+ createAskHandler(ctx, { from: "agent", defaultTo: "supervisor" }),
254
+ ),
255
+ tool(
256
+ "Answer",
257
+ "Reply to an ask addressed to you.",
258
+ { message: z.string() },
259
+ createAnswerHandler(ctx, { from: "agent" }),
260
+ ),
261
+ tool(
262
+ "Announce",
263
+ "Broadcast a message with no reply expected.",
264
+ { message: z.string() },
265
+ createAnnounceHandler(ctx, { from: "agent" }),
266
+ ),
267
+ tool(
268
+ "RollCall",
269
+ "List all participants in the session.",
270
+ {},
271
+ createRollCallHandler(ctx),
122
272
  ),
123
273
  ],
124
274
  });
125
275
  }
126
276
 
127
277
  /**
128
- * Facilitator tools: Conclude + Redirect + RollCall + Share + Tell.
129
- * No Ask — the facilitator answers Ask calls, not issues them.
278
+ * Facilitator tools: Ask + Announce + Conclude + Redirect + RollCall.
130
279
  * @param {object} ctx - Orchestration context
131
280
  * @returns {object} MCP server config (type: "sdk")
132
281
  */
@@ -134,15 +283,27 @@ export function createFacilitatorToolServer(ctx) {
134
283
  return createSdkMcpServer({
135
284
  name: "orchestration",
136
285
  tools: [
286
+ tool(
287
+ "Ask",
288
+ "Send a question to a participant. Omit 'to' to broadcast. The reply arrives via Answer.",
289
+ { question: z.string(), to: z.string().optional() },
290
+ createAskHandler(ctx, { from: "facilitator", defaultTo: undefined }),
291
+ ),
292
+ tool(
293
+ "Announce",
294
+ "Broadcast a message with no reply expected.",
295
+ { message: z.string() },
296
+ createAnnounceHandler(ctx, { from: "facilitator" }),
297
+ ),
137
298
  tool(
138
299
  "Conclude",
139
- "Signal that the task is done. Provide a summary.",
300
+ "End the session with a summary.",
140
301
  { summary: z.string() },
141
302
  createConcludeHandler(ctx),
142
303
  ),
143
304
  tool(
144
305
  "Redirect",
145
- "Interrupt agents with a corrective message. Use to='all' for all agents or a specific agent name.",
306
+ "Interrupt a participant with replacement instructions. Use to='all' for all participants or a specific name.",
146
307
  { message: z.string(), to: z.string().optional() },
147
308
  createRedirectHandler(ctx),
148
309
  ),
@@ -152,57 +313,44 @@ export function createFacilitatorToolServer(ctx) {
152
313
  {},
153
314
  createRollCallHandler(ctx),
154
315
  ),
155
- tool(
156
- "Share",
157
- "Broadcast a message to all participants. After sending, stop making tool calls to receive responses.",
158
- { message: z.string() },
159
- createShareHandler(ctx, { from: "facilitator" }),
160
- ),
161
- tool(
162
- "Tell",
163
- "Send a direct message to one participant. After sending, stop making tool calls to receive their response.",
164
- { message: z.string(), to: z.string() },
165
- createTellHandler(ctx, { from: "facilitator" }),
166
- ),
167
316
  ],
168
317
  });
169
318
  }
170
319
 
171
320
  /**
172
- * Facilitated agent tools: Ask + RollCall + Share + Tell.
321
+ * Facilitated agent tools: Ask + Answer + Announce + RollCall.
173
322
  * @param {object} ctx - Orchestration context
174
323
  * @param {object} opts
175
- * @param {string} opts.from - Agent name (for Share/Tell)
176
- * @param {function} opts.onAsk - Async callback: (question) → answer string
324
+ * @param {string} opts.from - Agent name (canonical, used for handler wiring)
177
325
  * @returns {object} MCP server config (type: "sdk")
178
326
  */
179
- export function createFacilitatedAgentToolServer(ctx, { from, onAsk }) {
327
+ export function createFacilitatedAgentToolServer(ctx, { from }) {
180
328
  return createSdkMcpServer({
181
329
  name: "orchestration",
182
330
  tools: [
183
331
  tool(
184
332
  "Ask",
185
- "Ask the facilitator a clarifying question. Blocks until answered.",
186
- { question: z.string() },
187
- createAskHandler(ctx, { onAsk }),
333
+ "Send a question to another participant. Omit 'to' to ask the facilitator.",
334
+ { question: z.string(), to: z.string().optional() },
335
+ createAskHandler(ctx, { from, defaultTo: "facilitator" }),
188
336
  ),
189
337
  tool(
190
- "RollCall",
191
- "List all participants in the session.",
192
- {},
193
- createRollCallHandler(ctx),
338
+ "Answer",
339
+ "Reply to an ask addressed to you.",
340
+ { message: z.string() },
341
+ createAnswerHandler(ctx, { from }),
194
342
  ),
195
343
  tool(
196
- "Share",
197
- "Broadcast a message to all participants.",
344
+ "Announce",
345
+ "Broadcast a message with no reply expected.",
198
346
  { message: z.string() },
199
- createShareHandler(ctx, { from }),
347
+ createAnnounceHandler(ctx, { from }),
200
348
  ),
201
349
  tool(
202
- "Tell",
203
- "Send a direct message to one participant.",
204
- { message: z.string(), to: z.string() },
205
- createTellHandler(ctx, { from }),
350
+ "RollCall",
351
+ "List all participants in the session.",
352
+ {},
353
+ createRollCallHandler(ctx),
206
354
  ),
207
355
  ],
208
356
  });
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Shared helpers for Facilitator and Supervisor orchestrators:
3
+ * - `createAsyncQueue` — simple promise-based queue used by the facilitator
4
+ * event loop.
5
+ * - `formatMessages` — render a drained message batch as tagged lines.
6
+ */
7
+
8
+ export function createAsyncQueue() {
9
+ const items = [];
10
+ let waiter = null;
11
+ let closed = false;
12
+ return {
13
+ enqueue(item) {
14
+ items.push(item);
15
+ if (waiter) {
16
+ waiter();
17
+ waiter = null;
18
+ }
19
+ },
20
+ async dequeue() {
21
+ if (items.length > 0) return items.shift();
22
+ if (closed) return null;
23
+ await new Promise((resolve) => {
24
+ waiter = resolve;
25
+ });
26
+ return items.length > 0 ? items.shift() : null;
27
+ },
28
+ close() {
29
+ closed = true;
30
+ if (waiter) {
31
+ waiter();
32
+ waiter = null;
33
+ }
34
+ },
35
+ };
36
+ }
37
+
38
+ /**
39
+ * Render a drained batch of bus messages as tagged text lines.
40
+ * @param {Array<{from: string, text: string, kind?: string, direct?: boolean}>} messages
41
+ * @returns {string}
42
+ */
43
+ export function formatMessages(messages) {
44
+ return messages.map(formatMessage).join("\n");
45
+ }
46
+
47
+ function formatMessage(m) {
48
+ return `${tagFor(m)} ${m.from}: ${m.text}`;
49
+ }
50
+
51
+ function tagFor(m) {
52
+ if (m.kind === "ask") return "[ask]";
53
+ if (m.kind === "answer") return "[answer]";
54
+ if (m.kind === "announce") return "[shared]";
55
+ if (m.kind === "synthetic") return "[system]";
56
+ if (m.kind === "direct") return "[direct]";
57
+ return m.direct ? "[direct]" : "[shared]";
58
+ }
@@ -78,7 +78,7 @@ const HINT_HANDLERS = {
78
78
 
79
79
  /**
80
80
  * Strip the `mcp__<server>__` prefix from MCP-namespaced tool names so logs
81
- * show the bare method (e.g. `mcp__orchestration__Tell` → `Tell`). Non-MCP
81
+ * show the bare method (e.g. `mcp__orchestration__Ask` → `Ask`). Non-MCP
82
82
  * names and malformed inputs pass through unchanged.
83
83
  * @param {string} name
84
84
  * @returns {string}
@@ -92,7 +92,7 @@ export function simplifyToolName(name) {
92
92
  }
93
93
 
94
94
  /**
95
- * MCP-prefixed tool names (e.g. `mcp__orchestration__Tell`) take a different
95
+ * MCP-prefixed tool names (e.g. `mcp__orchestration__Ask`) take a different
96
96
  * handler path. The method name itself is surfaced via `simplifyToolName`,
97
97
  * so this only adds the `to/from` decorators for orchestration calls.
98
98
  * Returns null if the name does not match any MCP prefix.
@@ -121,7 +121,7 @@ function hintForMcp(name, input) {
121
121
  * ends with `sanitize`, so the output is guaranteed free of `{`, `}`, `"`
122
122
  * from the input object (success criterion #2).
123
123
  *
124
- * @param {string} name - Tool name (e.g. "Bash", "Read", "mcp__orchestration__Tell")
124
+ * @param {string} name - Tool name (e.g. "Bash", "Read", "mcp__orchestration__Ask")
125
125
  * @param {object|null|undefined} input - Raw tool input object from the trace
126
126
  * @returns {string} One-line hint, or "" when no rule matches
127
127
  */
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Strip `thinking.signature` base64 blobs from a JSON-serializable value.
3
+ *
4
+ * Applied at the CLI output boundary — the stored structured trace keeps
5
+ * signatures intact (lossless storage), and the display filter drops them
6
+ * by default because they dominate output without helping analysis.
7
+ *
8
+ * Recursively walks the input. For any object whose `type === "thinking"`,
9
+ * the `signature` field is removed after copying. Signatures on objects of
10
+ * any other type are preserved.
11
+ *
12
+ * @param {*} value - Any JSON-serializable value
13
+ * @returns {*} A deep-copy with thinking signatures removed
14
+ */
15
+ export function stripSignatures(value) {
16
+ if (value === null || typeof value !== "object") return value;
17
+ if (Array.isArray(value)) return value.map(stripSignatures);
18
+
19
+ const result = {};
20
+ for (const [key, val] of Object.entries(value)) {
21
+ result[key] = stripSignatures(val);
22
+ }
23
+ if (result.type === "thinking") {
24
+ delete result.signature;
25
+ }
26
+ return result;
27
+ }