@forwardimpact/libeval 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supervisor.js CHANGED
@@ -13,25 +13,49 @@ import { TraceCollector } from "./trace-collector.js";
13
13
 
14
14
  /**
15
15
  * Check if the supervisor's response signals evaluation success.
16
- * Matches EVALUATION_SUCCESSFUL anywhere in the text, tolerating markdown
17
- * formatting (e.g. **EVALUATION_SUCCESSFUL**). Uses word boundaries to
16
+ * Matches EVALUATION_COMPLETE anywhere in the text, tolerating markdown
17
+ * formatting (e.g. **EVALUATION_COMPLETE**). Uses word boundaries to
18
18
  * avoid matching inside longer identifiers.
19
19
  * @param {string} text
20
20
  * @returns {boolean}
21
21
  */
22
- export function isSuccessful(text) {
23
- return /(?:^|[\s*_~`])EVALUATION_SUCCESSFUL(?:[\s*_~`.,!?]|$)/m.test(text);
22
+ export function isComplete(text) {
23
+ return /(?:^|[\s*_~`])EVALUATION_COMPLETE(?:[\s*_~`.,!?]|$)/m.test(text);
24
+ }
25
+
26
+ /**
27
+ * Check if the supervisor's response signals a mid-turn intervention.
28
+ * Same tolerance rules as isComplete (markdown formatting, word boundaries),
29
+ * but matches the EVALUATION_INTERVENTION keyword instead.
30
+ * @param {string} text
31
+ * @returns {boolean}
32
+ */
33
+ export function isIntervention(text) {
34
+ return /(?:^|[\s*_~`])EVALUATION_INTERVENTION(?:[\s*_~`.,!?]|$)/m.test(text);
24
35
  }
25
36
 
26
37
  /** System prompt appended for the supervisor runner in supervise mode. */
27
38
  export const SUPERVISOR_SYSTEM_PROMPT =
28
- "You supervise another AI agent through a relay — your output becomes the agent's next input. " +
29
- "Guide the agent, answer its questions, and write EVALUATION_SUCCESSFUL when their task is complete.";
39
+ "You relay messages to one persistent agent session — your only output " +
40
+ "channel. Spawning sub-agents or restarting the agent is blocked. Do not " +
41
+ "do the work yourself. Reply briefly to let the agent continue, write " +
42
+ "EVALUATION_INTERVENTION + instructions to interrupt mid-turn, or " +
43
+ "EVALUATION_COMPLETE when done. Only your final message each turn is " +
44
+ "relayed.";
30
45
 
31
46
  /** System prompt appended for the agent runner in supervise mode. */
32
47
  export const AGENT_SYSTEM_PROMPT =
33
- "You are being supervised by another AI agent. " +
34
- "When requirements are ambiguous or you are uncertain, stop and ask a clarifying question before proceeding.";
48
+ "A supervisor watches your work and may interrupt with new instructions " +
49
+ "mid-task. Treat any new prompt as authoritative and adjust course. " +
50
+ "When uncertain, stop and ask a clarifying question.";
51
+
52
+ /**
53
+ * Maximum number of mid-turn interventions allowed within a single agent turn.
54
+ * Bounded so a looping supervisor exhausts its quota fast (observability) but
55
+ * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
56
+ * The outer maxTurns budget still bounds overall runtime.
57
+ */
58
+ const MAX_INTERVENTIONS_PER_TURN = 5;
35
59
 
36
60
  export class Supervisor {
37
61
  /**
@@ -56,12 +80,28 @@ export class Supervisor {
56
80
  /**
57
81
  * Set to true when any supervisor message contains the success signal.
58
82
  * The SDK result text only reflects the last assistant message, so when
59
- * the supervisor writes EVALUATION_SUCCESSFUL in an early message and
83
+ * the supervisor writes EVALUATION_COMPLETE in an early message and
60
84
  * then continues with follow-up work, the result text won't contain it.
61
85
  * This flag captures the signal from the full message stream.
62
86
  * @type {boolean}
63
87
  */
64
- this.successSignalSeen = false;
88
+ this.completeSignalSeen = false;
89
+ /**
90
+ * Set to true when any supervisor message contains EVALUATION_INTERVENTION.
91
+ * Mirrors completeSignalSeen — populated by emitLine when a supervisor
92
+ * assistant text block matches isIntervention(...). The mid-turn loop
93
+ * reads this flag after each supervisor invocation to decide whether to
94
+ * abort the agent's in-flight SDK session.
95
+ * @type {boolean}
96
+ */
97
+ this.interventionSignalSeen = false;
98
+ /**
99
+ * The most recent supervisor SDK result captured inside the mid-turn
100
+ * onBatch callback. The outer loop reads this after the agent aborts to
101
+ * build the next relay prompt without re-running the supervisor.
102
+ * @type {{success: boolean, text: string}|null}
103
+ */
104
+ this.lastSupervisorResult = null;
65
105
  }
66
106
 
67
107
  /**
@@ -75,7 +115,9 @@ export class Supervisor {
75
115
  // Turn 0: Supervisor receives the task and introduces it to the agent
76
116
  this.currentSource = "supervisor";
77
117
  this.currentTurn = 0;
78
- this.successSignalSeen = false;
118
+ this.completeSignalSeen = false;
119
+ this.interventionSignalSeen = false;
120
+ this.lastSupervisorResult = null;
79
121
  let supervisorResult = await this.supervisorRunner.run(task);
80
122
 
81
123
  if (supervisorResult.error) {
@@ -85,58 +127,190 @@ export class Supervisor {
85
127
 
86
128
  // Check for the success signal in either the SDK result text or the
87
129
  // streamed message content. The SDK result text only reflects the last
88
- // assistant message, so when the supervisor writes EVALUATION_SUCCESSFUL
130
+ // assistant message, so when the supervisor writes EVALUATION_COMPLETE
89
131
  // early and then continues (e.g. filing issues), we must also check the
90
132
  // flag set by emitLine during streaming.
91
- if (this.successSignalSeen || isSuccessful(supervisorResult.text)) {
133
+ if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
92
134
  this.emitSummary({ success: true, turns: 0 });
93
135
  return { success: true, turns: 0 };
94
136
  }
95
137
 
96
- for (let turn = 1; turn <= this.maxTurns; turn++) {
97
- // Supervisor's output becomes the agent's input
98
- this.currentSource = "agent";
99
- this.currentTurn = turn;
100
- let agentResult;
101
- if (turn === 1) {
102
- agentResult = await this.agentRunner.run(supervisorResult.text);
103
- } else {
104
- agentResult = await this.agentRunner.resume(supervisorResult.text);
105
- }
138
+ const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
139
+ for (let turn = 1; turn <= turnLimit; turn++) {
140
+ // Only the supervisor's final message is relayed to the agent.
141
+ // Extract the last assistant text block from the buffer to avoid
142
+ // leaking intermediate reasoning (research, tool calls, notes).
143
+ const relay = this.extractLastText(
144
+ this.supervisorRunner,
145
+ supervisorResult.text,
146
+ );
106
147
 
107
- if (agentResult.error) {
108
- this.emitSummary({ success: false, turns: turn });
109
- return { success: false, turns: turn };
110
- }
148
+ // Drive the agent through interventions until its SDK session ends
149
+ // naturally, the supervisor signals completion mid-turn, or the
150
+ // per-turn intervention budget is exhausted.
151
+ const turnOutcome = await this.#runAgentTurn(turn, relay);
152
+ if (turnOutcome.exit) return turnOutcome.exit;
153
+
154
+ // End-of-turn review (existing behaviour). Returns either an exit
155
+ // outcome (error or completion) or the supervisor result for the
156
+ // next turn's relay.
157
+ const reviewOutcome = await this.#endOfTurnReview(turn);
158
+ if (reviewOutcome.exit) return reviewOutcome.exit;
159
+ supervisorResult = reviewOutcome.supervisorResult;
160
+ }
161
+
162
+ this.emitSummary({ success: false, turns: this.maxTurns });
163
+ return { success: false, turns: this.maxTurns };
164
+ }
111
165
 
112
- // Build the full agent transcript from buffered NDJSON events so the
113
- // supervisor sees tool calls and reasoning, not just the SDK result summary.
114
- const agentTranscript = this.extractTranscript(this.agentRunner);
166
+ /**
167
+ * Drive the agent through one turn, allowing the supervisor to interrupt
168
+ * mid-stream via EVALUATION_INTERVENTION. Returns either an `exit` outcome
169
+ * (the loop should return immediately) or `{exit: null}` (proceed to the
170
+ * end-of-turn review).
171
+ * @param {number} turn
172
+ * @param {string} initialRelay
173
+ * @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
174
+ */
175
+ async #runAgentTurn(turn, initialRelay) {
176
+ let relay = initialRelay;
177
+ let interventions = 0;
115
178
 
116
- const supervisorPrompt =
117
- `The agent reported:\n\n${agentTranscript}\n\n` +
118
- `Review the agent's work and decide how to proceed.`;
179
+ // Wire the mid-turn observation hook on the agent runner. The bound
180
+ // callback captures `turn` so the inner loop's multiple resume(...)
181
+ // calls all see the same turn id. The supervisorRunner does NOT get
182
+ // an onBatch callback — it only fires onLine, which is enough for
183
+ // emitLine to detect EVALUATION_COMPLETE / EVALUATION_INTERVENTION.
184
+ this.agentRunner.onBatch = (batchLines, ctx) =>
185
+ this.#midTurnReview(turn, batchLines, ctx);
119
186
 
120
- this.currentSource = "supervisor";
121
- this.currentTurn = turn;
122
- this.successSignalSeen = false;
123
- supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);
187
+ try {
188
+ while (true) {
189
+ this.currentSource = "agent";
190
+ this.currentTurn = turn;
191
+ const isFirstAgentCall = turn === 1 && interventions === 0;
192
+ const agentResult = isFirstAgentCall
193
+ ? await this.agentRunner.run(relay)
194
+ : await this.agentRunner.resume(relay);
124
195
 
125
- if (supervisorResult.error) {
126
- this.emitSummary({ success: false, turns: turn });
127
- return { success: false, turns: turn };
128
- }
196
+ if (agentResult.error && !agentResult.aborted) {
197
+ this.emitSummary({ success: false, turns: turn });
198
+ return { exit: { success: false, turns: turn } };
199
+ }
200
+
201
+ // Mid-turn EVALUATION_COMPLETE: end the session immediately.
202
+ if (this.completeSignalSeen) {
203
+ this.emitSummary({ success: true, turns: turn });
204
+ return { exit: { success: true, turns: turn } };
205
+ }
206
+
207
+ if (agentResult.aborted && this.interventionSignalSeen) {
208
+ interventions++;
209
+ if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
210
+ this.emitOrchestratorEvent({ type: "intervention_limit", turn });
211
+ return { exit: null };
212
+ }
213
+ relay = this.extractLastText(
214
+ this.supervisorRunner,
215
+ this.lastSupervisorResult?.text ?? "",
216
+ );
217
+ this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
218
+ continue;
219
+ }
129
220
 
130
- // The supervisor's turn is fully completecheck for success signal
131
- // in either the SDK result text or streamed messages.
132
- if (this.successSignalSeen || isSuccessful(supervisorResult.text)) {
133
- this.emitSummary({ success: true, turns: turn });
134
- return { success: true, turns: turn };
221
+ // Agent's SDK session finished naturallyproceed to end-of-turn.
222
+ return { exit: null };
135
223
  }
224
+ } finally {
225
+ // Detach onBatch before the end-of-turn review so the supervisor's
226
+ // own SDK session does not trigger nested onBatch fires.
227
+ this.agentRunner.onBatch = null;
136
228
  }
229
+ }
137
230
 
138
- this.emitSummary({ success: false, turns: this.maxTurns });
139
- return { success: false, turns: this.maxTurns };
231
+ /**
232
+ * Mid-turn supervisor review fired from inside the agent's onBatch hook.
233
+ * Emits a `mid_turn_review` orchestrator marker, runs the supervisor's
234
+ * LLM against the batch, and aborts the agent if the supervisor signals
235
+ * EVALUATION_INTERVENTION or EVALUATION_COMPLETE.
236
+ * @param {number} turn
237
+ * @param {string[]} batchLines
238
+ * @param {{abort: () => void}} ctx
239
+ */
240
+ async #midTurnReview(turn, batchLines, { abort }) {
241
+ const batchTranscript = this.renderBatch(batchLines);
242
+
243
+ // Order matters: emit the orchestrator marker BEFORE the supervisor
244
+ // LLM call so the trace reads
245
+ // agent line → orchestrator:mid_turn_review
246
+ // → supervisor lines (tagged turn:N)
247
+ // → orchestrator:intervention_requested|complete_requested
248
+ this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
249
+
250
+ // currentTurn stays = turn so mid-turn supervisor lines share the
251
+ // agent's turn id. They are distinguishable from end-of-turn reviews
252
+ // by the surrounding orchestrator events emitted around this call.
253
+ this.currentSource = "supervisor";
254
+ this.completeSignalSeen = false;
255
+ this.interventionSignalSeen = false;
256
+
257
+ this.lastSupervisorResult = await this.supervisorRunner.resume(
258
+ `The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
259
+ `Respond with a brief acknowledgement to let it continue, or write ` +
260
+ `EVALUATION_INTERVENTION followed by a corrective message to stop ` +
261
+ `and relay a new instruction. Write EVALUATION_COMPLETE only when ` +
262
+ `the task is fully done.`,
263
+ );
264
+ this.currentSource = "agent";
265
+
266
+ if (this.interventionSignalSeen) {
267
+ this.emitOrchestratorEvent({ type: "intervention_requested", turn });
268
+ abort();
269
+ return;
270
+ }
271
+ if (this.completeSignalSeen) {
272
+ this.emitOrchestratorEvent({ type: "complete_requested", turn });
273
+ abort();
274
+ }
275
+ // Non-intervention: do nothing; the agent loop pulls the next line.
276
+ }
277
+
278
+ /**
279
+ * End-of-turn supervisor review (existing behaviour). Returns either an
280
+ * exit outcome (error or completion) or the supervisor result so the
281
+ * outer loop can build the next turn's relay.
282
+ * @param {number} turn
283
+ * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object}>}
284
+ */
285
+ async #endOfTurnReview(turn) {
286
+ // Build the full agent transcript from buffered NDJSON events so the
287
+ // supervisor sees tool calls and reasoning, not just the SDK result.
288
+ const agentTranscript = this.extractTranscript(this.agentRunner);
289
+
290
+ const supervisorPrompt =
291
+ `The agent reported:\n\n${agentTranscript}\n\n` +
292
+ `Review the agent's work and decide how to proceed.`;
293
+
294
+ this.currentSource = "supervisor";
295
+ this.currentTurn = turn;
296
+ this.completeSignalSeen = false;
297
+ this.interventionSignalSeen = false;
298
+ const supervisorResult =
299
+ await this.supervisorRunner.resume(supervisorPrompt);
300
+
301
+ if (supervisorResult.error) {
302
+ this.emitSummary({ success: false, turns: turn });
303
+ return { exit: { success: false, turns: turn } };
304
+ }
305
+
306
+ // The supervisor's turn is fully complete — check for success signal
307
+ // in either the SDK result text or streamed messages.
308
+ if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
309
+ this.emitSummary({ success: true, turns: turn });
310
+ return { exit: { success: true, turns: turn } };
311
+ }
312
+
313
+ return { exit: null, supervisorResult };
140
314
  }
141
315
 
142
316
  /**
@@ -154,12 +328,38 @@ export class Supervisor {
154
328
  return collector.toText() || "[The agent produced no output.]";
155
329
  }
156
330
 
331
+ /**
332
+ * Extract only the last assistant text block from an AgentRunner's buffer.
333
+ * Scans buffered NDJSON events in reverse to find the final assistant message
334
+ * with a text content block. This prevents intermediate reasoning (tool calls,
335
+ * research notes) from leaking to the agent.
336
+ * @param {import("./agent-runner.js").AgentRunner} runner
337
+ * @param {string} fallback - Fallback text if no assistant text block is found
338
+ * @returns {string}
339
+ */
340
+ extractLastText(runner, fallback) {
341
+ const lines = runner.buffer;
342
+ for (let i = lines.length - 1; i >= 0; i--) {
343
+ const event = JSON.parse(lines[i]);
344
+ if (event.type !== "assistant") continue;
345
+ const content = event.message?.content ?? event.content;
346
+ if (!Array.isArray(content)) continue;
347
+ for (let j = content.length - 1; j >= 0; j--) {
348
+ if (content[j].type === "text" && content[j].text) {
349
+ return content[j].text;
350
+ }
351
+ }
352
+ }
353
+ return fallback;
354
+ }
355
+
157
356
  /**
158
357
  * Emit a single NDJSON line tagged with the current source and turn.
159
358
  * Called in real-time via the AgentRunner onLine callback.
160
359
  *
161
360
  * When the current source is the supervisor, also scans assistant text
162
- * content for the EVALUATION_SUCCESSFUL signal and sets successSignalSeen.
361
+ * content for the EVALUATION_COMPLETE and EVALUATION_INTERVENTION signals,
362
+ * setting completeSignalSeen / interventionSignalSeen respectively.
163
363
  * @param {string} line - Raw NDJSON line from the runner
164
364
  */
165
365
  emitLine(line) {
@@ -171,22 +371,57 @@ export class Supervisor {
171
371
  };
172
372
  this.output.write(JSON.stringify(tagged) + "\n");
173
373
 
174
- // Scan supervisor assistant messages for the success signal in real time.
374
+ // Scan supervisor assistant messages for the signals in real time.
175
375
  // The SDK result text only reflects the final assistant message, but the
176
- // supervisor may write EVALUATION_SUCCESSFUL in an earlier message and
177
- // then continue with follow-up tool calls.
376
+ // supervisor may write EVALUATION_COMPLETE / EVALUATION_INTERVENTION in
377
+ // an earlier message and then continue with follow-up tool calls.
178
378
  if (this.currentSource === "supervisor" && event.type === "assistant") {
179
379
  const content = event.message?.content ?? event.content ?? [];
180
380
  if (Array.isArray(content)) {
181
381
  for (const block of content) {
182
- if (block.type === "text" && isSuccessful(block.text)) {
183
- this.successSignalSeen = true;
184
- }
382
+ if (block.type !== "text" || !block.text) continue;
383
+ if (isComplete(block.text)) this.completeSignalSeen = true;
384
+ if (isIntervention(block.text)) this.interventionSignalSeen = true;
185
385
  }
186
386
  }
187
387
  }
188
388
  }
189
389
 
390
+ /**
391
+ * Render a batch of buffered NDJSON lines as human-readable text for the
392
+ * mid-turn supervisor prompt. Reuses the TraceCollector pipeline so the
393
+ * supervisor sees tool calls and reasoning, not just raw events.
394
+ * @param {string[]} batchLines
395
+ * @returns {string}
396
+ */
397
+ renderBatch(batchLines) {
398
+ if (batchLines.length === 0) return "[empty]";
399
+ const collector = new TraceCollector();
400
+ for (const line of batchLines) {
401
+ collector.addLine(line);
402
+ }
403
+ return collector.toText() || "[empty]";
404
+ }
405
+
406
+ /**
407
+ * Emit an orchestrator-source NDJSON line. Used by the mid-turn loop to
408
+ * mark mid_turn_review / intervention_requested / intervention_relayed /
409
+ * intervention_limit / complete_requested boundaries in the trace, so the
410
+ * improvement coach can distinguish mid-turn supervisor activity from
411
+ * end-of-turn reviews. Additive to existing trace shape — the parser
412
+ * already reads `source` and ignores unknown event types.
413
+ * @param {{type: string, turn?: number}} event
414
+ */
415
+ emitOrchestratorEvent(event) {
416
+ this.output.write(
417
+ JSON.stringify({
418
+ source: "orchestrator",
419
+ turn: this.currentTurn,
420
+ event,
421
+ }) + "\n",
422
+ );
423
+ }
424
+
190
425
  /**
191
426
  * Emit a final orchestrator summary line.
192
427
  * @param {{success: boolean, turns: number}} result
@@ -253,10 +488,14 @@ export function createSupervisor({
253
488
  },
254
489
  });
255
490
 
256
- // Block Task/TaskOutput so the supervisor cannot spawn its own sub-agents.
257
- // The relay loop handles agent communication letting the supervisor use
258
- // Task would bypass the relay and produce an empty agent trace.
259
- const defaultDisallowed = ["Task", "TaskOutput"];
491
+ // Block every sub-agent spawning tool so the supervisor cannot bypass the
492
+ // relay loop. The current Claude Agent SDK exposes the spawn tool to the
493
+ // model as `Agent`; older versions called it `Task`. Both are blocked
494
+ // (along with TaskOutput/TaskStop) so the supervisor sees no spawn tool
495
+ // regardless of which SDK version is installed. Letting the supervisor
496
+ // spawn its own sub-agent would bypass the relay and produce an empty
497
+ // agent trace, which is the failure mode that motivated this default.
498
+ const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
260
499
  const disallowedTools = supervisorDisallowedTools
261
500
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
262
501
  : defaultDisallowed;
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Test-only mock factory for AgentRunner. Yields pre-scripted responses,
3
+ * and (when an `onBatch` callback is set) fires it at the same boundaries
4
+ * the real AgentRunner would: assistant messages with at least one text
5
+ * block, and the terminal `result` message. If the callback calls
6
+ * `abort()`, the mock stops iterating that response's messages and
7
+ * reports `aborted: true`.
8
+ *
9
+ * Intentionally a regular module (not a test file) so describe/test blocks
10
+ * here would not run. Lives under test/ to make its scope explicit.
11
+ */
12
+
13
+ import { PassThrough } from "node:stream";
14
+ import { AgentRunner } from "@forwardimpact/libeval";
15
+
16
+ /**
17
+ * Whether a scripted message should trigger an onBatch flush. Mirrors the
18
+ * real AgentRunner: assistant-with-text-block or terminal `result` message.
19
+ * Tool-only or string-content messages accumulate without flushing.
20
+ * @param {object} message
21
+ * @returns {boolean}
22
+ */
23
+ export function shouldFlush(message) {
24
+ if (message.type === "result") return true;
25
+ if (message.type !== "assistant") return false;
26
+ const content = message.message?.content ?? message.content;
27
+ if (!Array.isArray(content)) return false;
28
+ for (const block of content) {
29
+ if (block.type === "text" && block.text) return true;
30
+ }
31
+ return false;
32
+ }
33
+
34
+ /**
35
+ * Create a mock AgentRunner that yields pre-scripted responses. Each call
36
+ * to `run()` or `resume()` pops the next response from the array.
37
+ * @param {object[]} responses - Array of {text, success} objects
38
+ * @param {object[]} [messages] - Messages to buffer per response
39
+ * @returns {AgentRunner}
40
+ */
41
+ export function createMockRunner(responses, messages) {
42
+ const output = new PassThrough();
43
+ let callIndex = 0;
44
+
45
+ const runner = new AgentRunner({
46
+ cwd: "/tmp",
47
+ query: async function* () {},
48
+ output,
49
+ });
50
+
51
+ const consume = async (msgs) => {
52
+ let aborted = false;
53
+ for (const m of msgs) {
54
+ const line = JSON.stringify(m);
55
+ runner.buffer.push(line);
56
+ if (runner.onLine) runner.onLine(line);
57
+ if (runner.onBatch && shouldFlush(m)) {
58
+ await runner.onBatch([line], {
59
+ abort: () => {
60
+ aborted = true;
61
+ },
62
+ });
63
+ if (aborted) break;
64
+ }
65
+ }
66
+ return aborted;
67
+ };
68
+
69
+ runner.run = async (_task) => {
70
+ const resp = responses[callIndex++];
71
+ const msgs = messages?.[callIndex - 1] ?? [
72
+ { type: "assistant", content: resp.text },
73
+ ];
74
+ const aborted = await consume(msgs);
75
+ runner.sessionId = "mock-session";
76
+ return {
77
+ success: resp.success ?? true,
78
+ text: resp.text,
79
+ sessionId: "mock-session",
80
+ aborted,
81
+ error: null,
82
+ };
83
+ };
84
+
85
+ runner.resume = async (_prompt) => {
86
+ const resp = responses[callIndex++];
87
+ const msgs = messages?.[callIndex - 1] ?? [
88
+ { type: "assistant", content: resp.text },
89
+ ];
90
+ const aborted = await consume(msgs);
91
+ return {
92
+ success: resp.success ?? true,
93
+ text: resp.text,
94
+ sessionId: runner.sessionId,
95
+ aborted,
96
+ error: null,
97
+ };
98
+ };
99
+
100
+ return runner;
101
+ }