@forwardimpact/libeval 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/supervisor.js CHANGED
@@ -13,25 +13,49 @@ import { TraceCollector } from "./trace-collector.js";
13
13
 
14
14
  /**
15
15
  * Check if the supervisor's response signals evaluation success.
16
- * Matches EVALUATION_SUCCESSFUL anywhere in the text, tolerating markdown
17
- * formatting (e.g. **EVALUATION_SUCCESSFUL**). Uses word boundaries to
16
+ * Matches EVALUATION_COMPLETE anywhere in the text, tolerating markdown
17
+ * formatting (e.g. **EVALUATION_COMPLETE**). Uses word boundaries to
18
18
  * avoid matching inside longer identifiers.
19
19
  * @param {string} text
20
20
  * @returns {boolean}
21
21
  */
22
- export function isSuccessful(text) {
23
- return /(?:^|[\s*_~`])EVALUATION_SUCCESSFUL(?:[\s*_~`.,!?]|$)/m.test(text);
22
+ export function isComplete(text) {
23
+ return /(?:^|[\s*_~`])EVALUATION_COMPLETE(?:[\s*_~`.,!?]|$)/m.test(text);
24
+ }
25
+
26
+ /**
27
+ * Check if the supervisor's response signals a mid-turn intervention.
28
+ * Same tolerance rules as isComplete (markdown formatting, word boundaries),
29
+ * but matches the EVALUATION_INTERVENTION keyword instead.
30
+ * @param {string} text
31
+ * @returns {boolean}
32
+ */
33
+ export function isIntervention(text) {
34
+ return /(?:^|[\s*_~`])EVALUATION_INTERVENTION(?:[\s*_~`.,!?]|$)/m.test(text);
24
35
  }
25
36
 
26
37
  /** System prompt appended for the supervisor runner in supervise mode. */
27
38
  export const SUPERVISOR_SYSTEM_PROMPT =
28
- "You supervise another AI agent through a relay — your output becomes the agent's next input. " +
29
- "Guide the agent, answer its questions, and write EVALUATION_SUCCESSFUL when their task is complete.";
39
+ "You relay messages to one persistent agent session — your only output " +
40
+ "channel. Spawning sub-agents or restarting the agent is blocked. Do not " +
41
+ "do the work yourself. Reply briefly to let the agent continue, write " +
42
+ "EVALUATION_INTERVENTION + instructions to interrupt mid-turn, or " +
43
+ "EVALUATION_COMPLETE when done. Only your final message each turn is " +
44
+ "relayed.";
30
45
 
31
46
  /** System prompt appended for the agent runner in supervise mode. */
32
47
  export const AGENT_SYSTEM_PROMPT =
33
- "You are being supervised by another AI agent. " +
34
- "When requirements are ambiguous or you are uncertain, stop and ask a clarifying question before proceeding.";
48
+ "A supervisor watches your work and may interrupt with new instructions " +
49
+ "mid-task. Treat any new prompt as authoritative and adjust course. " +
50
+ "When uncertain, stop and ask a clarifying question.";
51
+
52
+ /**
53
+ * Maximum number of mid-turn interventions allowed within a single agent turn.
54
+ * Bounded so a looping supervisor exhausts its quota fast (observability) but
55
+ * leaves headroom for legitimate "intervene, observe, intervene again" patterns.
56
+ * The outer maxTurns budget still bounds overall runtime.
57
+ */
58
+ const MAX_INTERVENTIONS_PER_TURN = 5;
35
59
 
36
60
  export class Supervisor {
37
61
  /**
@@ -53,6 +77,31 @@ export class Supervisor {
53
77
  this.currentSource = "agent";
54
78
  /** @type {number} */
55
79
  this.currentTurn = 0;
80
+ /**
81
+ * Set to true when any supervisor message contains the success signal.
82
+ * The SDK result text only reflects the last assistant message, so when
83
+ * the supervisor writes EVALUATION_COMPLETE in an early message and
84
+ * then continues with follow-up work, the result text won't contain it.
85
+ * This flag captures the signal from the full message stream.
86
+ * @type {boolean}
87
+ */
88
+ this.completeSignalSeen = false;
89
+ /**
90
+ * Set to true when any supervisor message contains EVALUATION_INTERVENTION.
91
+ * Mirrors completeSignalSeen — populated by emitLine when a supervisor
92
+ * assistant text block matches isIntervention(...). The mid-turn loop
93
+ * reads this flag after each supervisor invocation to decide whether to
94
+ * abort the agent's in-flight SDK session.
95
+ * @type {boolean}
96
+ */
97
+ this.interventionSignalSeen = false;
98
+ /**
99
+ * The most recent supervisor SDK result captured inside the mid-turn
100
+ * onBatch callback. The outer loop reads this after the agent aborts to
101
+ * build the next relay prompt without re-running the supervisor.
102
+ * @type {{success: boolean, text: string}|null}
103
+ */
104
+ this.lastSupervisorResult = null;
56
105
  }
57
106
 
58
107
  /**
@@ -66,6 +115,9 @@ export class Supervisor {
66
115
  // Turn 0: Supervisor receives the task and introduces it to the agent
67
116
  this.currentSource = "supervisor";
68
117
  this.currentTurn = 0;
118
+ this.completeSignalSeen = false;
119
+ this.interventionSignalSeen = false;
120
+ this.lastSupervisorResult = null;
69
121
  let supervisorResult = await this.supervisorRunner.run(task);
70
122
 
71
123
  if (supervisorResult.error) {
@@ -73,55 +125,192 @@ export class Supervisor {
73
125
  return { success: false, turns: 0 };
74
126
  }
75
127
 
76
- // The supervisor's turn is fully complete (all tool calls executed) by the
77
- // time we check the signal no work is interrupted.
78
- if (isSuccessful(supervisorResult.text)) {
128
+ // Check for the success signal in either the SDK result text or the
129
+ // streamed message content. The SDK result text only reflects the last
130
+ // assistant message, so when the supervisor writes EVALUATION_COMPLETE
131
+ // early and then continues (e.g. filing issues), we must also check the
132
+ // flag set by emitLine during streaming.
133
+ if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
79
134
  this.emitSummary({ success: true, turns: 0 });
80
135
  return { success: true, turns: 0 };
81
136
  }
82
137
 
83
- for (let turn = 1; turn <= this.maxTurns; turn++) {
84
- // Supervisor's output becomes the agent's input
85
- this.currentSource = "agent";
86
- this.currentTurn = turn;
87
- let agentResult;
88
- if (turn === 1) {
89
- agentResult = await this.agentRunner.run(supervisorResult.text);
90
- } else {
91
- agentResult = await this.agentRunner.resume(supervisorResult.text);
92
- }
138
+ const turnLimit = this.maxTurns === 0 ? Infinity : this.maxTurns;
139
+ for (let turn = 1; turn <= turnLimit; turn++) {
140
+ // Only the supervisor's final message is relayed to the agent.
141
+ // Extract the last assistant text block from the buffer to avoid
142
+ // leaking intermediate reasoning (research, tool calls, notes).
143
+ const relay = this.extractLastText(
144
+ this.supervisorRunner,
145
+ supervisorResult.text,
146
+ );
93
147
 
94
- if (agentResult.error) {
95
- this.emitSummary({ success: false, turns: turn });
96
- return { success: false, turns: turn };
97
- }
148
+ // Drive the agent through interventions until its SDK session ends
149
+ // naturally, the supervisor signals completion mid-turn, or the
150
+ // per-turn intervention budget is exhausted.
151
+ const turnOutcome = await this.#runAgentTurn(turn, relay);
152
+ if (turnOutcome.exit) return turnOutcome.exit;
98
153
 
99
- // Build the full agent transcript from buffered NDJSON events so the
100
- // supervisor sees tool calls and reasoning, not just the SDK result summary.
101
- const agentTranscript = this.extractTranscript(this.agentRunner);
154
+ // End-of-turn review (existing behaviour). Returns either an exit
155
+ // outcome (error or completion) or the supervisor result for the
156
+ // next turn's relay.
157
+ const reviewOutcome = await this.#endOfTurnReview(turn);
158
+ if (reviewOutcome.exit) return reviewOutcome.exit;
159
+ supervisorResult = reviewOutcome.supervisorResult;
160
+ }
102
161
 
103
- const supervisorPrompt =
104
- `The agent reported:\n\n${agentTranscript}\n\n` +
105
- `Review the agent's work and decide how to proceed.`;
162
+ this.emitSummary({ success: false, turns: this.maxTurns });
163
+ return { success: false, turns: this.maxTurns };
164
+ }
106
165
 
107
- this.currentSource = "supervisor";
108
- this.currentTurn = turn;
109
- supervisorResult = await this.supervisorRunner.resume(supervisorPrompt);
166
+ /**
167
+ * Drive the agent through one turn, allowing the supervisor to interrupt
168
+ * mid-stream via EVALUATION_INTERVENTION. Returns either an `exit` outcome
169
+ * (the loop should return immediately) or `{exit: null}` (proceed to the
170
+ * end-of-turn review).
171
+ * @param {number} turn
172
+ * @param {string} initialRelay
173
+ * @returns {Promise<{exit: {success: boolean, turns: number}|null}>}
174
+ */
175
+ async #runAgentTurn(turn, initialRelay) {
176
+ let relay = initialRelay;
177
+ let interventions = 0;
110
178
 
111
- if (supervisorResult.error) {
112
- this.emitSummary({ success: false, turns: turn });
113
- return { success: false, turns: turn };
114
- }
179
+ // Wire the mid-turn observation hook on the agent runner. The bound
180
+ // callback captures `turn` so the inner loop's multiple resume(...)
181
+ // calls all see the same turn id. The supervisorRunner does NOT get
182
+ // an onBatch callback — it only fires onLine, which is enough for
183
+ // emitLine to detect EVALUATION_COMPLETE / EVALUATION_INTERVENTION.
184
+ this.agentRunner.onBatch = (batchLines, ctx) =>
185
+ this.#midTurnReview(turn, batchLines, ctx);
115
186
 
116
- // The supervisor's turn is fully complete — check for success signal.
117
- if (isSuccessful(supervisorResult.text)) {
118
- this.emitSummary({ success: true, turns: turn });
119
- return { success: true, turns: turn };
187
+ try {
188
+ while (true) {
189
+ this.currentSource = "agent";
190
+ this.currentTurn = turn;
191
+ const isFirstAgentCall = turn === 1 && interventions === 0;
192
+ const agentResult = isFirstAgentCall
193
+ ? await this.agentRunner.run(relay)
194
+ : await this.agentRunner.resume(relay);
195
+
196
+ if (agentResult.error && !agentResult.aborted) {
197
+ this.emitSummary({ success: false, turns: turn });
198
+ return { exit: { success: false, turns: turn } };
199
+ }
200
+
201
+ // Mid-turn EVALUATION_COMPLETE: end the session immediately.
202
+ if (this.completeSignalSeen) {
203
+ this.emitSummary({ success: true, turns: turn });
204
+ return { exit: { success: true, turns: turn } };
205
+ }
206
+
207
+ if (agentResult.aborted && this.interventionSignalSeen) {
208
+ interventions++;
209
+ if (interventions >= MAX_INTERVENTIONS_PER_TURN) {
210
+ this.emitOrchestratorEvent({ type: "intervention_limit", turn });
211
+ return { exit: null };
212
+ }
213
+ relay = this.extractLastText(
214
+ this.supervisorRunner,
215
+ this.lastSupervisorResult?.text ?? "",
216
+ );
217
+ this.emitOrchestratorEvent({ type: "intervention_relayed", turn });
218
+ continue;
219
+ }
220
+
221
+ // Agent's SDK session finished naturally — proceed to end-of-turn.
222
+ return { exit: null };
120
223
  }
224
+ } finally {
225
+ // Detach onBatch before the end-of-turn review so the supervisor's
226
+ // own SDK session does not trigger nested onBatch fires.
227
+ this.agentRunner.onBatch = null;
121
228
  }
229
+ }
122
230
 
123
- this.emitSummary({ success: false, turns: this.maxTurns });
124
- return { success: false, turns: this.maxTurns };
231
+ /**
232
+ * Mid-turn supervisor review fired from inside the agent's onBatch hook.
233
+ * Emits a `mid_turn_review` orchestrator marker, runs the supervisor's
234
+ * LLM against the batch, and aborts the agent if the supervisor signals
235
+ * EVALUATION_INTERVENTION or EVALUATION_COMPLETE.
236
+ * @param {number} turn
237
+ * @param {string[]} batchLines
238
+ * @param {{abort: () => void}} ctx
239
+ */
240
+ async #midTurnReview(turn, batchLines, { abort }) {
241
+ const batchTranscript = this.renderBatch(batchLines);
242
+
243
+ // Order matters: emit the orchestrator marker BEFORE the supervisor
244
+ // LLM call so the trace reads
245
+ // agent line → orchestrator:mid_turn_review
246
+ // → supervisor lines (tagged turn:N)
247
+ // → orchestrator:intervention_requested|complete_requested
248
+ this.emitOrchestratorEvent({ type: "mid_turn_review", turn });
249
+
250
+ // currentTurn stays = turn so mid-turn supervisor lines share the
251
+ // agent's turn id. They are distinguishable from end-of-turn reviews
252
+ // by the surrounding orchestrator events emitted around this call.
253
+ this.currentSource = "supervisor";
254
+ this.completeSignalSeen = false;
255
+ this.interventionSignalSeen = false;
256
+
257
+ this.lastSupervisorResult = await this.supervisorRunner.resume(
258
+ `The agent is mid-turn. Latest batch:\n\n${batchTranscript}\n\n` +
259
+ `Respond with a brief acknowledgement to let it continue, or write ` +
260
+ `EVALUATION_INTERVENTION followed by a corrective message to stop ` +
261
+ `and relay a new instruction. Write EVALUATION_COMPLETE only when ` +
262
+ `the task is fully done.`,
263
+ );
264
+ this.currentSource = "agent";
265
+
266
+ if (this.interventionSignalSeen) {
267
+ this.emitOrchestratorEvent({ type: "intervention_requested", turn });
268
+ abort();
269
+ return;
270
+ }
271
+ if (this.completeSignalSeen) {
272
+ this.emitOrchestratorEvent({ type: "complete_requested", turn });
273
+ abort();
274
+ }
275
+ // Non-intervention: do nothing; the agent loop pulls the next line.
276
+ }
277
+
278
+ /**
279
+ * End-of-turn supervisor review (existing behaviour). Returns either an
280
+ * exit outcome (error or completion) or the supervisor result so the
281
+ * outer loop can build the next turn's relay.
282
+ * @param {number} turn
283
+ * @returns {Promise<{exit: {success: boolean, turns: number}|null, supervisorResult?: object}>}
284
+ */
285
+ async #endOfTurnReview(turn) {
286
+ // Build the full agent transcript from buffered NDJSON events so the
287
+ // supervisor sees tool calls and reasoning, not just the SDK result.
288
+ const agentTranscript = this.extractTranscript(this.agentRunner);
289
+
290
+ const supervisorPrompt =
291
+ `The agent reported:\n\n${agentTranscript}\n\n` +
292
+ `Review the agent's work and decide how to proceed.`;
293
+
294
+ this.currentSource = "supervisor";
295
+ this.currentTurn = turn;
296
+ this.completeSignalSeen = false;
297
+ this.interventionSignalSeen = false;
298
+ const supervisorResult =
299
+ await this.supervisorRunner.resume(supervisorPrompt);
300
+
301
+ if (supervisorResult.error) {
302
+ this.emitSummary({ success: false, turns: turn });
303
+ return { exit: { success: false, turns: turn } };
304
+ }
305
+
306
+ // The supervisor's turn is fully complete — check for success signal
307
+ // in either the SDK result text or streamed messages.
308
+ if (this.completeSignalSeen || isComplete(supervisorResult.text)) {
309
+ this.emitSummary({ success: true, turns: turn });
310
+ return { exit: { success: true, turns: turn } };
311
+ }
312
+
313
+ return { exit: null, supervisorResult };
125
314
  }
126
315
 
127
316
  /**
@@ -139,9 +328,38 @@ export class Supervisor {
139
328
  return collector.toText() || "[The agent produced no output.]";
140
329
  }
141
330
 
331
+ /**
332
+ * Extract only the last assistant text block from an AgentRunner's buffer.
333
+ * Scans buffered NDJSON events in reverse to find the final assistant message
334
+ * with a text content block. This prevents intermediate reasoning (tool calls,
335
+ * research notes) from leaking to the agent.
336
+ * @param {import("./agent-runner.js").AgentRunner} runner
337
+ * @param {string} fallback - Fallback text if no assistant text block is found
338
+ * @returns {string}
339
+ */
340
+ extractLastText(runner, fallback) {
341
+ const lines = runner.buffer;
342
+ for (let i = lines.length - 1; i >= 0; i--) {
343
+ const event = JSON.parse(lines[i]);
344
+ if (event.type !== "assistant") continue;
345
+ const content = event.message?.content ?? event.content;
346
+ if (!Array.isArray(content)) continue;
347
+ for (let j = content.length - 1; j >= 0; j--) {
348
+ if (content[j].type === "text" && content[j].text) {
349
+ return content[j].text;
350
+ }
351
+ }
352
+ }
353
+ return fallback;
354
+ }
355
+
142
356
  /**
143
357
  * Emit a single NDJSON line tagged with the current source and turn.
144
358
  * Called in real-time via the AgentRunner onLine callback.
359
+ *
360
+ * When the current source is the supervisor, also scans assistant text
361
+ * content for the EVALUATION_COMPLETE and EVALUATION_INTERVENTION signals,
362
+ * setting completeSignalSeen / interventionSignalSeen respectively.
145
363
  * @param {string} line - Raw NDJSON line from the runner
146
364
  */
147
365
  emitLine(line) {
@@ -152,6 +370,56 @@ export class Supervisor {
152
370
  event,
153
371
  };
154
372
  this.output.write(JSON.stringify(tagged) + "\n");
373
+
374
+ // Scan supervisor assistant messages for the signals in real time.
375
+ // The SDK result text only reflects the final assistant message, but the
376
+ // supervisor may write EVALUATION_COMPLETE / EVALUATION_INTERVENTION in
377
+ // an earlier message and then continue with follow-up tool calls.
378
+ if (this.currentSource === "supervisor" && event.type === "assistant") {
379
+ const content = event.message?.content ?? event.content ?? [];
380
+ if (Array.isArray(content)) {
381
+ for (const block of content) {
382
+ if (block.type !== "text" || !block.text) continue;
383
+ if (isComplete(block.text)) this.completeSignalSeen = true;
384
+ if (isIntervention(block.text)) this.interventionSignalSeen = true;
385
+ }
386
+ }
387
+ }
388
+ }
389
+
390
+ /**
391
+ * Render a batch of buffered NDJSON lines as human-readable text for the
392
+ * mid-turn supervisor prompt. Reuses the TraceCollector pipeline so the
393
+ * supervisor sees tool calls and reasoning, not just raw events.
394
+ * @param {string[]} batchLines
395
+ * @returns {string}
396
+ */
397
+ renderBatch(batchLines) {
398
+ if (batchLines.length === 0) return "[empty]";
399
+ const collector = new TraceCollector();
400
+ for (const line of batchLines) {
401
+ collector.addLine(line);
402
+ }
403
+ return collector.toText() || "[empty]";
404
+ }
405
+
406
+ /**
407
+ * Emit an orchestrator-source NDJSON line. Used by the mid-turn loop to
408
+ * mark mid_turn_review / intervention_requested / intervention_relayed /
409
+ * intervention_limit / complete_requested boundaries in the trace, so the
410
+ * improvement coach can distinguish mid-turn supervisor activity from
411
+ * end-of-turn reviews. Additive to existing trace shape — the parser
412
+ * already reads `source` and ignores unknown event types.
413
+ * @param {{type: string, turn?: number}} event
414
+ */
415
+ emitOrchestratorEvent(event) {
416
+ this.output.write(
417
+ JSON.stringify({
418
+ source: "orchestrator",
419
+ turn: this.currentTurn,
420
+ event,
421
+ }) + "\n",
422
+ );
155
423
  }
156
424
 
157
425
  /**
@@ -220,10 +488,14 @@ export function createSupervisor({
220
488
  },
221
489
  });
222
490
 
223
- // Block Task/TaskOutput so the supervisor cannot spawn its own sub-agents.
224
- // The relay loop handles agent communication letting the supervisor use
225
- // Task would bypass the relay and produce an empty agent trace.
226
- const defaultDisallowed = ["Task", "TaskOutput"];
491
+ // Block every sub-agent spawning tool so the supervisor cannot bypass the
492
+ // relay loop. The current Claude Agent SDK exposes the spawn tool to the
493
+ // model as `Agent`; older versions called it `Task`. Both are blocked
494
+ // (along with TaskOutput/TaskStop) so the supervisor sees no spawn tool
495
+ // regardless of which SDK version is installed. Letting the supervisor
496
+ // spawn its own sub-agent would bypass the relay and produce an empty
497
+ // agent trace, which is the failure mode that motivated this default.
498
+ const defaultDisallowed = ["Agent", "Task", "TaskOutput", "TaskStop"];
227
499
  const disallowedTools = supervisorDisallowedTools
228
500
  ? [...new Set([...defaultDisallowed, ...supervisorDisallowedTools])]
229
501
  : defaultDisallowed;
@@ -38,6 +38,13 @@ export class TraceCollector {
38
38
  return;
39
39
  }
40
40
 
41
+ // Unwrap combined supervised trace format {source, turn, event}.
42
+ // The Supervisor emits this wrapper; when replayed through addLine the
43
+ // inner event is the one we need.
44
+ if (event.event && !event.type && typeof event.source === "string") {
45
+ event = event.event;
46
+ }
47
+
41
48
  switch (event.type) {
42
49
  case "system":
43
50
  this.handleSystem(event);
@@ -0,0 +1,101 @@
1
+ /**
2
+ * Test-only mock factory for AgentRunner. Yields pre-scripted responses,
3
+ * and (when an `onBatch` callback is set) fires it at the same boundaries
4
+ * the real AgentRunner would: assistant messages with at least one text
5
+ * block, and the terminal `result` message. If the callback calls
6
+ * `abort()`, the mock stops iterating that response's messages and
7
+ * reports `aborted: true`.
8
+ *
9
+ * Intentionally a regular module (not a test file) so describe/test blocks
10
+ * here would not run. Lives under test/ to make its scope explicit.
11
+ */
12
+
13
+ import { PassThrough } from "node:stream";
14
+ import { AgentRunner } from "@forwardimpact/libeval";
15
+
16
+ /**
17
+ * Whether a scripted message should trigger an onBatch flush. Mirrors the
18
+ * real AgentRunner: assistant-with-text-block or terminal `result` message.
19
+ * Tool-only or string-content messages accumulate without flushing.
20
+ * @param {object} message
21
+ * @returns {boolean}
22
+ */
23
+ export function shouldFlush(message) {
24
+ if (message.type === "result") return true;
25
+ if (message.type !== "assistant") return false;
26
+ const content = message.message?.content ?? message.content;
27
+ if (!Array.isArray(content)) return false;
28
+ for (const block of content) {
29
+ if (block.type === "text" && block.text) return true;
30
+ }
31
+ return false;
32
+ }
33
+
34
+ /**
35
+ * Create a mock AgentRunner that yields pre-scripted responses. Each call
36
+ * to `run()` or `resume()` pops the next response from the array.
37
+ * @param {object[]} responses - Array of {text, success} objects
38
+ * @param {object[]} [messages] - Messages to buffer per response
39
+ * @returns {AgentRunner}
40
+ */
41
+ export function createMockRunner(responses, messages) {
42
+ const output = new PassThrough();
43
+ let callIndex = 0;
44
+
45
+ const runner = new AgentRunner({
46
+ cwd: "/tmp",
47
+ query: async function* () {},
48
+ output,
49
+ });
50
+
51
+ const consume = async (msgs) => {
52
+ let aborted = false;
53
+ for (const m of msgs) {
54
+ const line = JSON.stringify(m);
55
+ runner.buffer.push(line);
56
+ if (runner.onLine) runner.onLine(line);
57
+ if (runner.onBatch && shouldFlush(m)) {
58
+ await runner.onBatch([line], {
59
+ abort: () => {
60
+ aborted = true;
61
+ },
62
+ });
63
+ if (aborted) break;
64
+ }
65
+ }
66
+ return aborted;
67
+ };
68
+
69
+ runner.run = async (_task) => {
70
+ const resp = responses[callIndex++];
71
+ const msgs = messages?.[callIndex - 1] ?? [
72
+ { type: "assistant", content: resp.text },
73
+ ];
74
+ const aborted = await consume(msgs);
75
+ runner.sessionId = "mock-session";
76
+ return {
77
+ success: resp.success ?? true,
78
+ text: resp.text,
79
+ sessionId: "mock-session",
80
+ aborted,
81
+ error: null,
82
+ };
83
+ };
84
+
85
+ runner.resume = async (_prompt) => {
86
+ const resp = responses[callIndex++];
87
+ const msgs = messages?.[callIndex - 1] ?? [
88
+ { type: "assistant", content: resp.text },
89
+ ];
90
+ const aborted = await consume(msgs);
91
+ return {
92
+ success: resp.success ?? true,
93
+ text: resp.text,
94
+ sessionId: runner.sessionId,
95
+ aborted,
96
+ error: null,
97
+ };
98
+ };
99
+
100
+ return runner;
101
+ }